Use matrix for gradient. (#9508)
- Use the `linalg::Matrix` for storing gradients. - New API for the custom objective. - Custom objective for multi-class/multi-target is now required to return the correct shape. - Custom objective for Python can accept arrays with any strides. (row-major, column-major)
This commit is contained in:
@@ -29,7 +29,6 @@
|
||||
#include "../common/error_msg.h"
|
||||
|
||||
namespace xgboost::gbm {
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(gblinear);
|
||||
|
||||
// training parameters
|
||||
@@ -142,7 +141,7 @@ class GBLinear : public GradientBooster {
|
||||
this->updater_->SaveConfig(&j_updater);
|
||||
}
|
||||
|
||||
void DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair, PredictionCacheEntry*,
|
||||
void DoBoost(DMatrix* p_fmat, linalg::Matrix<GradientPair>* in_gpair, PredictionCacheEntry*,
|
||||
ObjFunction const*) override {
|
||||
monitor_.Start("DoBoost");
|
||||
|
||||
@@ -232,9 +231,8 @@ class GBLinear : public GradientBooster {
|
||||
std::fill(contribs.begin(), contribs.end(), 0);
|
||||
}
|
||||
|
||||
std::vector<std::string> DumpModel(const FeatureMap& fmap,
|
||||
bool with_stats,
|
||||
std::string format) const override {
|
||||
[[nodiscard]] std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,
|
||||
std::string format) const override {
|
||||
return model_.DumpModel(fmap, with_stats, format);
|
||||
}
|
||||
|
||||
@@ -263,7 +261,7 @@ class GBLinear : public GradientBooster {
|
||||
}
|
||||
}
|
||||
|
||||
bool UseGPU() const override {
|
||||
[[nodiscard]] bool UseGPU() const override {
|
||||
if (param_.updater == "gpu_coord_descent") {
|
||||
return true;
|
||||
} else {
|
||||
|
||||
@@ -167,8 +167,8 @@ void GBTree::Configure(Args const& cfg) {
|
||||
}
|
||||
}
|
||||
|
||||
void GPUCopyGradient(HostDeviceVector<GradientPair> const*, bst_group_t, bst_group_t,
|
||||
HostDeviceVector<GradientPair>*)
|
||||
void GPUCopyGradient(Context const*, linalg::Matrix<GradientPair> const*, bst_group_t,
|
||||
linalg::Matrix<GradientPair>*)
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
; // NOLINT
|
||||
#else
|
||||
@@ -177,16 +177,19 @@ void GPUCopyGradient(HostDeviceVector<GradientPair> const*, bst_group_t, bst_gro
|
||||
}
|
||||
#endif
|
||||
|
||||
void CopyGradient(HostDeviceVector<GradientPair> const* in_gpair, int32_t n_threads,
|
||||
bst_group_t n_groups, bst_group_t group_id,
|
||||
HostDeviceVector<GradientPair>* out_gpair) {
|
||||
if (in_gpair->DeviceIdx() != Context::kCpuId) {
|
||||
GPUCopyGradient(in_gpair, n_groups, group_id, out_gpair);
|
||||
void CopyGradient(Context const* ctx, linalg::Matrix<GradientPair> const* in_gpair,
|
||||
bst_group_t group_id, linalg::Matrix<GradientPair>* out_gpair) {
|
||||
out_gpair->SetDevice(ctx->Device());
|
||||
out_gpair->Reshape(in_gpair->Shape(0), 1);
|
||||
if (ctx->IsCUDA()) {
|
||||
GPUCopyGradient(ctx, in_gpair, group_id, out_gpair);
|
||||
} else {
|
||||
std::vector<GradientPair> &tmp_h = out_gpair->HostVector();
|
||||
const auto& gpair_h = in_gpair->ConstHostVector();
|
||||
common::ParallelFor(out_gpair->Size(), n_threads,
|
||||
[&](auto i) { tmp_h[i] = gpair_h[i * n_groups + group_id]; });
|
||||
auto const& in = *in_gpair;
|
||||
auto target_gpair = in.Slice(linalg::All(), group_id);
|
||||
auto h_tmp = out_gpair->HostView();
|
||||
auto h_in = in.HostView().Slice(linalg::All(), group_id);
|
||||
CHECK_EQ(h_tmp.Size(), h_in.Size());
|
||||
common::ParallelFor(h_in.Size(), ctx->Threads(), [&](auto i) { h_tmp(i) = h_in(i); });
|
||||
}
|
||||
}
|
||||
|
||||
@@ -215,7 +218,7 @@ void GBTree::UpdateTreeLeaf(DMatrix const* p_fmat, HostDeviceVector<float> const
|
||||
}
|
||||
}
|
||||
|
||||
void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
|
||||
void GBTree::DoBoost(DMatrix* p_fmat, linalg::Matrix<GradientPair>* in_gpair,
|
||||
PredictionCacheEntry* predt, ObjFunction const* obj) {
|
||||
if (model_.learner_model_param->IsVectorLeaf()) {
|
||||
CHECK(tparam_.tree_method == TreeMethod::kHist || tparam_.tree_method == TreeMethod::kAuto)
|
||||
@@ -263,12 +266,12 @@ void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
|
||||
}
|
||||
} else {
|
||||
CHECK_EQ(in_gpair->Size() % n_groups, 0U) << "must have exactly ngroup * nrow gpairs";
|
||||
HostDeviceVector<GradientPair> tmp(in_gpair->Size() / n_groups, GradientPair(),
|
||||
in_gpair->DeviceIdx());
|
||||
linalg::Matrix<GradientPair> tmp{{in_gpair->Shape(0), static_cast<std::size_t>(1ul)},
|
||||
ctx_->Ordinal()};
|
||||
bool update_predict = true;
|
||||
for (bst_target_t gid = 0; gid < n_groups; ++gid) {
|
||||
node_position.clear();
|
||||
CopyGradient(in_gpair, ctx_->Threads(), n_groups, gid, &tmp);
|
||||
CopyGradient(ctx_, in_gpair, gid, &tmp);
|
||||
TreesOneGroup ret;
|
||||
BoostNewTrees(&tmp, p_fmat, gid, &node_position, &ret);
|
||||
UpdateTreeLeaf(p_fmat, predt->predictions, obj, gid, node_position, &ret);
|
||||
@@ -289,7 +292,7 @@ void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
|
||||
this->CommitModel(std::move(new_trees));
|
||||
}
|
||||
|
||||
void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat, int bst_group,
|
||||
void GBTree::BoostNewTrees(linalg::Matrix<GradientPair>* gpair, DMatrix* p_fmat, int bst_group,
|
||||
std::vector<HostDeviceVector<bst_node_t>>* out_position,
|
||||
TreesOneGroup* ret) {
|
||||
std::vector<RegTree*> new_trees;
|
||||
|
||||
@@ -1,22 +1,24 @@
|
||||
/**
|
||||
* Copyright 2021-2023, XGBoost Contributors
|
||||
*/
|
||||
#include "../common/device_helpers.cuh"
|
||||
#include "xgboost/linalg.h"
|
||||
#include "xgboost/span.h"
|
||||
#include <thrust/iterator/counting_iterator.h> // for make_counting_iterator
|
||||
|
||||
#include "../common/cuda_context.cuh"
|
||||
#include "../common/device_helpers.cuh" // for MakeTransformIterator
|
||||
#include "xgboost/base.h" // for GradientPair
|
||||
#include "xgboost/linalg.h" // for Matrix
|
||||
|
||||
namespace xgboost::gbm {
|
||||
void GPUCopyGradient(HostDeviceVector<GradientPair> const *in_gpair,
|
||||
bst_group_t n_groups, bst_group_t group_id,
|
||||
HostDeviceVector<GradientPair> *out_gpair) {
|
||||
auto mat = linalg::TensorView<GradientPair const, 2>(
|
||||
in_gpair->ConstDeviceSpan(),
|
||||
{in_gpair->Size() / n_groups, static_cast<size_t>(n_groups)},
|
||||
in_gpair->DeviceIdx());
|
||||
auto v_in = mat.Slice(linalg::All(), group_id);
|
||||
out_gpair->Resize(v_in.Size());
|
||||
auto d_out = out_gpair->DeviceSpan();
|
||||
dh::LaunchN(v_in.Size(), [=] __device__(size_t i) { d_out[i] = v_in(i); });
|
||||
void GPUCopyGradient(Context const *ctx, linalg::Matrix<GradientPair> const *in_gpair,
|
||||
bst_group_t group_id, linalg::Matrix<GradientPair> *out_gpair) {
|
||||
auto v_in = in_gpair->View(ctx->Device()).Slice(linalg::All(), group_id);
|
||||
out_gpair->SetDevice(ctx->Device());
|
||||
out_gpair->Reshape(v_in.Size(), 1);
|
||||
auto d_out = out_gpair->View(ctx->Device());
|
||||
auto cuctx = ctx->CUDACtx();
|
||||
auto it = dh::MakeTransformIterator<GradientPair>(
|
||||
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) { return v_in(i); });
|
||||
thrust::copy(cuctx->CTP(), it, it + v_in.Size(), d_out.Values().data());
|
||||
}
|
||||
|
||||
void GPUDartPredictInc(common::Span<float> out_predts,
|
||||
|
||||
@@ -183,8 +183,8 @@ class GBTree : public GradientBooster {
|
||||
/**
|
||||
* @brief Carry out one iteration of boosting.
|
||||
*/
|
||||
void DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
|
||||
PredictionCacheEntry* predt, ObjFunction const* obj) override;
|
||||
void DoBoost(DMatrix* p_fmat, linalg::Matrix<GradientPair>* in_gpair, PredictionCacheEntry* predt,
|
||||
ObjFunction const* obj) override;
|
||||
|
||||
[[nodiscard]] bool UseGPU() const override { return tparam_.tree_method == TreeMethod::kGPUHist; }
|
||||
|
||||
@@ -326,7 +326,7 @@ class GBTree : public GradientBooster {
|
||||
}
|
||||
|
||||
protected:
|
||||
void BoostNewTrees(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat, int bst_group,
|
||||
void BoostNewTrees(linalg::Matrix<GradientPair>* gpair, DMatrix* p_fmat, int bst_group,
|
||||
std::vector<HostDeviceVector<bst_node_t>>* out_position,
|
||||
std::vector<std::unique_ptr<RegTree>>* ret);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user