/** * Copyright 2022 by XGBoost Contributors * * \brief Utilities for estimating initial score. */ #include "fit_stump.h" #include // std::int32_t #include // std::size_t #include "../collective/communicator-inl.h" #include "../common/common.h" // AssertGPUSupport #include "../common/numeric.h" // cpu_impl::Reduce #include "../common/threading_utils.h" // ParallelFor #include "../common/transform_iterator.h" // MakeIndexTransformIter #include "xgboost/base.h" // bst_target_t, GradientPairPrecise #include "xgboost/context.h" // Context #include "xgboost/linalg.h" // TensorView, Tensor, Constant #include "xgboost/logging.h" // CHECK_EQ namespace xgboost { namespace tree { namespace cpu_impl { void FitStump(Context const* ctx, MetaInfo const& info, linalg::TensorView gpair, linalg::VectorView out) { auto n_targets = out.Size(); CHECK_EQ(n_targets, gpair.Shape(1)); linalg::Tensor sum_tloc = linalg::Constant(ctx, GradientPairPrecise{}, ctx->Threads(), n_targets); auto h_sum_tloc = sum_tloc.HostView(); // first dim for gpair is samples, second dim is target. // Reduce by column, parallel by samples common::ParallelFor(gpair.Shape(0), ctx->Threads(), [&](auto i) { for (bst_target_t t = 0; t < n_targets; ++t) { h_sum_tloc(omp_get_thread_num(), t) += GradientPairPrecise{gpair(i, t)}; } }); // Aggregate to the first row. auto h_sum = h_sum_tloc.Slice(0, linalg::All()); for (std::int32_t i = 1; i < ctx->Threads(); ++i) { for (bst_target_t j = 0; j < n_targets; ++j) { h_sum(j) += h_sum_tloc(i, j); } } CHECK(h_sum.CContiguous()); // In vertical federated learning, only worker 0 needs to call this, no need to do an allreduce. if (!collective::IsFederated() || info.data_split_mode != DataSplitMode::kCol) { collective::Allreduce( reinterpret_cast(h_sum.Values().data()), h_sum.Size() * 2); } for (std::size_t i = 0; i < h_sum.Size(); ++i) { out(i) = static_cast(CalcUnregularizedWeight(h_sum(i).GetGrad(), h_sum(i).GetHess())); } } } // namespace cpu_impl namespace cuda_impl { void FitStump(Context const* ctx, linalg::TensorView gpair, linalg::VectorView out); #if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) inline void FitStump(Context const*, linalg::TensorView, linalg::VectorView) { common::AssertGPUSupport(); } #endif // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_HIP) } // namespace cuda_impl void FitStump(Context const* ctx, MetaInfo const& info, HostDeviceVector const& gpair, bst_target_t n_targets, linalg::Vector* out) { out->SetDevice(ctx->gpu_id); out->Reshape(n_targets); auto n_samples = gpair.Size() / n_targets; gpair.SetDevice(ctx->gpu_id); auto gpair_t = linalg::MakeTensorView(ctx, &gpair, n_samples, n_targets); ctx->IsCPU() ? cpu_impl::FitStump(ctx, info, gpair_t, out->HostView()) : cuda_impl::FitStump(ctx, gpair_t, out->View(ctx->gpu_id)); } } // namespace tree } // namespace xgboost