Implement fit stump. (#8607)

This commit is contained in:
Jiaming Yuan
2023-01-04 04:14:51 +08:00
committed by GitHub
parent 20e6087579
commit 8d545ab2a2
23 changed files with 421 additions and 60 deletions

82
src/tree/fit_stump.cc Normal file
View File

@@ -0,0 +1,82 @@
/**
* Copyright 2022 by XGBoost Contributors
*
* \brief Utilities for estimating initial score.
*/
#include "fit_stump.h"
#include <cinttypes> // std::int32_t
#include <cstddef> // std::size_t
#include "../collective/communicator-inl.h"
#include "../common/common.h" // AssertGPUSupport
#include "../common/numeric.h" // cpu_impl::Reduce
#include "../common/threading_utils.h" // ParallelFor
#include "../common/transform_iterator.h" // MakeIndexTransformIter
#include "xgboost/base.h" // bst_target_t, GradientPairPrecise
#include "xgboost/context.h" // Context
#include "xgboost/linalg.h" // TensorView, Tensor, Constant
#include "xgboost/logging.h" // CHECK_EQ
namespace xgboost {
namespace tree {
namespace cpu_impl {
void FitStump(Context const* ctx, linalg::TensorView<GradientPair const, 2> gpair,
linalg::VectorView<float> out) {
auto n_targets = out.Size();
CHECK_EQ(n_targets, gpair.Shape(1));
linalg::Tensor<GradientPairPrecise, 2> sum_tloc =
linalg::Constant(ctx, GradientPairPrecise{}, ctx->Threads(), n_targets);
auto h_sum_tloc = sum_tloc.HostView();
// first dim for gpair is samples, second dim is target.
// Reduce by column, parallel by samples
common::ParallelFor(gpair.Shape(0), ctx->Threads(), [&](auto i) {
for (bst_target_t t = 0; t < n_targets; ++t) {
h_sum_tloc(omp_get_thread_num(), t) += GradientPairPrecise{gpair(i, t)};
}
});
// Aggregate to the first row.
auto h_sum = h_sum_tloc.Slice(0, linalg::All());
for (std::int32_t i = 1; i < ctx->Threads(); ++i) {
for (bst_target_t j = 0; j < n_targets; ++j) {
h_sum(j) += h_sum_tloc(i, j);
}
}
CHECK(h_sum.CContiguous());
collective::Allreduce<collective::Operation::kSum>(
reinterpret_cast<double*>(h_sum.Values().data()), h_sum.Size() * 2);
for (std::size_t i = 0; i < h_sum.Size(); ++i) {
out(i) = static_cast<float>(CalcUnregularizedWeight(h_sum(i).GetGrad(), h_sum(i).GetHess()));
}
}
} // namespace cpu_impl
namespace cuda_impl {
void FitStump(Context const* ctx, linalg::TensorView<GradientPair const, 2> gpair,
linalg::VectorView<float> out);
#if !defined(XGBOOST_USE_CUDA)
inline void FitStump(Context const*, linalg::TensorView<GradientPair const, 2>,
linalg::VectorView<float>) {
common::AssertGPUSupport();
}
#endif // !defined(XGBOOST_USE_CUDA)
} // namespace cuda_impl
void FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair,
bst_target_t n_targets, linalg::Vector<float>* out) {
out->SetDevice(ctx->gpu_id);
out->Reshape(n_targets);
auto n_samples = gpair.Size() / n_targets;
gpair.SetDevice(ctx->gpu_id);
linalg::TensorView<GradientPair const, 2> gpair_t{
ctx->IsCPU() ? gpair.ConstHostSpan() : gpair.ConstDeviceSpan(),
{n_samples, n_targets},
ctx->gpu_id};
ctx->IsCPU() ? cpu_impl::FitStump(ctx, gpair_t, out->HostView())
: cuda_impl::FitStump(ctx, gpair_t, out->View(ctx->gpu_id));
}
} // namespace tree
} // namespace xgboost

63
src/tree/fit_stump.cu Normal file
View File

@@ -0,0 +1,63 @@
/**
* Copyright 2022 by XGBoost Contributors
*
* \brief Utilities for estimating initial score.
*/
#if !defined(NOMINMAX) && defined(_WIN32)
#define NOMINMAX
#endif // !defined(NOMINMAX)
#include <thrust/execution_policy.h> // cuda::par
#include <thrust/iterator/counting_iterator.h> // thrust::make_counting_iterator
#include <cstddef> // std::size_t
#include "../collective/device_communicator.cuh" // DeviceCommunicator
#include "../common/device_helpers.cuh" // dh::MakeTransformIterator
#include "fit_stump.h"
#include "xgboost/base.h" // GradientPairPrecise, GradientPair, XGBOOST_DEVICE
#include "xgboost/context.h" // Context
#include "xgboost/linalg.h" // TensorView, Tensor, Constant
#include "xgboost/logging.h" // CHECK_EQ
#include "xgboost/span.h" // span
namespace xgboost {
namespace tree {
namespace cuda_impl {
void FitStump(Context const* ctx, linalg::TensorView<GradientPair const, 2> gpair,
linalg::VectorView<float> out) {
auto n_targets = out.Size();
CHECK_EQ(n_targets, gpair.Shape(1));
linalg::Vector<GradientPairPrecise> sum = linalg::Constant(ctx, GradientPairPrecise{}, n_targets);
CHECK(out.Contiguous());
// Reduce by column
auto key_it = dh::MakeTransformIterator<bst_target_t>(
thrust::make_counting_iterator(0ul),
[=] XGBOOST_DEVICE(std::size_t i) -> bst_target_t { return i / gpair.Shape(0); });
auto grad_it = dh::MakeTransformIterator<GradientPairPrecise>(
thrust::make_counting_iterator(0ul),
[=] XGBOOST_DEVICE(std::size_t i) -> GradientPairPrecise {
auto target = i / gpair.Shape(0);
auto sample = i % gpair.Shape(0);
return GradientPairPrecise{gpair(sample, target)};
});
auto d_sum = sum.View(ctx->gpu_id);
CHECK(d_sum.CContiguous());
dh::XGBCachingDeviceAllocator<char> alloc;
auto policy = thrust::cuda::par(alloc);
thrust::reduce_by_key(policy, key_it, key_it + gpair.Size(), grad_it,
thrust::make_discard_iterator(), dh::tbegin(d_sum.Values()));
collective::DeviceCommunicator* communicator = collective::Communicator::GetDevice(ctx->gpu_id);
communicator->AllReduceSum(reinterpret_cast<double*>(d_sum.Values().data()), d_sum.Size() * 2);
thrust::for_each_n(policy, thrust::make_counting_iterator(0ul), n_targets,
[=] XGBOOST_DEVICE(std::size_t i) mutable {
out(i) = static_cast<float>(
CalcUnregularizedWeight(d_sum(i).GetGrad(), d_sum(i).GetHess()));
});
}
} // namespace cuda_impl
} // namespace tree
} // namespace xgboost

37
src/tree/fit_stump.h Normal file
View File

@@ -0,0 +1,37 @@
/**
* Copyright 2022 by XGBoost Contributors
*
* \brief Utilities for estimating initial score.
*/
#ifndef XGBOOST_TREE_FIT_STUMP_H_
#define XGBOOST_TREE_FIT_STUMP_H_
#if !defined(NOMINMAX) && defined(_WIN32)
#define NOMINMAX
#endif // !defined(NOMINMAX)
#include <algorithm> // std::max
#include "../common/common.h" // AssertGPUSupport
#include "xgboost/base.h" // GradientPair
#include "xgboost/context.h" // Context
#include "xgboost/host_device_vector.h" // HostDeviceVector
#include "xgboost/linalg.h" // TensorView
namespace xgboost {
namespace tree {
template <typename T>
XGBOOST_DEVICE inline double CalcUnregularizedWeight(T sum_grad, T sum_hess) {
return -sum_grad / std::max(sum_hess, static_cast<double>(kRtEps));
}
/**
* @brief Fit a tree stump as an estimation of base_score.
*/
void FitStump(Context const* ctx, HostDeviceVector<GradientPair> const& gpair,
bst_target_t n_targets, linalg::Vector<float>* out);
} // namespace tree
} // namespace xgboost
#endif // XGBOOST_TREE_FIT_STUMP_H_