Use matrix for gradient. (#9508)

- Use the `linalg::Matrix` for storing gradients.
- New API for the custom objective.
- Custom objective for multi-class/multi-target is now required to return the correct shape.
- Custom objective for Python can accept arrays with any strides. (row-major, column-major)
This commit is contained in:
Jiaming Yuan
2023-08-24 05:29:52 +08:00
committed by GitHub
parent 6103dca0bb
commit 972730cde0
77 changed files with 1052 additions and 651 deletions

View File

@@ -29,7 +29,6 @@
#include "../common/error_msg.h"
namespace xgboost::gbm {
DMLC_REGISTRY_FILE_TAG(gblinear);
// training parameters
@@ -142,7 +141,7 @@ class GBLinear : public GradientBooster {
this->updater_->SaveConfig(&j_updater);
}
void DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair, PredictionCacheEntry*,
void DoBoost(DMatrix* p_fmat, linalg::Matrix<GradientPair>* in_gpair, PredictionCacheEntry*,
ObjFunction const*) override {
monitor_.Start("DoBoost");
@@ -232,9 +231,8 @@ class GBLinear : public GradientBooster {
std::fill(contribs.begin(), contribs.end(), 0);
}
std::vector<std::string> DumpModel(const FeatureMap& fmap,
bool with_stats,
std::string format) const override {
[[nodiscard]] std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,
std::string format) const override {
return model_.DumpModel(fmap, with_stats, format);
}
@@ -263,7 +261,7 @@ class GBLinear : public GradientBooster {
}
}
bool UseGPU() const override {
[[nodiscard]] bool UseGPU() const override {
if (param_.updater == "gpu_coord_descent") {
return true;
} else {

View File

@@ -167,8 +167,8 @@ void GBTree::Configure(Args const& cfg) {
}
}
void GPUCopyGradient(HostDeviceVector<GradientPair> const*, bst_group_t, bst_group_t,
HostDeviceVector<GradientPair>*)
void GPUCopyGradient(Context const*, linalg::Matrix<GradientPair> const*, bst_group_t,
linalg::Matrix<GradientPair>*)
#if defined(XGBOOST_USE_CUDA)
; // NOLINT
#else
@@ -177,16 +177,19 @@ void GPUCopyGradient(HostDeviceVector<GradientPair> const*, bst_group_t, bst_gro
}
#endif
void CopyGradient(HostDeviceVector<GradientPair> const* in_gpair, int32_t n_threads,
bst_group_t n_groups, bst_group_t group_id,
HostDeviceVector<GradientPair>* out_gpair) {
if (in_gpair->DeviceIdx() != Context::kCpuId) {
GPUCopyGradient(in_gpair, n_groups, group_id, out_gpair);
void CopyGradient(Context const* ctx, linalg::Matrix<GradientPair> const* in_gpair,
bst_group_t group_id, linalg::Matrix<GradientPair>* out_gpair) {
out_gpair->SetDevice(ctx->Device());
out_gpair->Reshape(in_gpair->Shape(0), 1);
if (ctx->IsCUDA()) {
GPUCopyGradient(ctx, in_gpair, group_id, out_gpair);
} else {
std::vector<GradientPair> &tmp_h = out_gpair->HostVector();
const auto& gpair_h = in_gpair->ConstHostVector();
common::ParallelFor(out_gpair->Size(), n_threads,
[&](auto i) { tmp_h[i] = gpair_h[i * n_groups + group_id]; });
auto const& in = *in_gpair;
auto target_gpair = in.Slice(linalg::All(), group_id);
auto h_tmp = out_gpair->HostView();
auto h_in = in.HostView().Slice(linalg::All(), group_id);
CHECK_EQ(h_tmp.Size(), h_in.Size());
common::ParallelFor(h_in.Size(), ctx->Threads(), [&](auto i) { h_tmp(i) = h_in(i); });
}
}
@@ -215,7 +218,7 @@ void GBTree::UpdateTreeLeaf(DMatrix const* p_fmat, HostDeviceVector<float> const
}
}
void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
void GBTree::DoBoost(DMatrix* p_fmat, linalg::Matrix<GradientPair>* in_gpair,
PredictionCacheEntry* predt, ObjFunction const* obj) {
if (model_.learner_model_param->IsVectorLeaf()) {
CHECK(tparam_.tree_method == TreeMethod::kHist || tparam_.tree_method == TreeMethod::kAuto)
@@ -263,12 +266,12 @@ void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
}
} else {
CHECK_EQ(in_gpair->Size() % n_groups, 0U) << "must have exactly ngroup * nrow gpairs";
HostDeviceVector<GradientPair> tmp(in_gpair->Size() / n_groups, GradientPair(),
in_gpair->DeviceIdx());
linalg::Matrix<GradientPair> tmp{{in_gpair->Shape(0), static_cast<std::size_t>(1ul)},
ctx_->Ordinal()};
bool update_predict = true;
for (bst_target_t gid = 0; gid < n_groups; ++gid) {
node_position.clear();
CopyGradient(in_gpair, ctx_->Threads(), n_groups, gid, &tmp);
CopyGradient(ctx_, in_gpair, gid, &tmp);
TreesOneGroup ret;
BoostNewTrees(&tmp, p_fmat, gid, &node_position, &ret);
UpdateTreeLeaf(p_fmat, predt->predictions, obj, gid, node_position, &ret);
@@ -289,7 +292,7 @@ void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
this->CommitModel(std::move(new_trees));
}
void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat, int bst_group,
void GBTree::BoostNewTrees(linalg::Matrix<GradientPair>* gpair, DMatrix* p_fmat, int bst_group,
std::vector<HostDeviceVector<bst_node_t>>* out_position,
TreesOneGroup* ret) {
std::vector<RegTree*> new_trees;

View File

@@ -1,22 +1,24 @@
/**
* Copyright 2021-2023, XGBoost Contributors
*/
#include "../common/device_helpers.cuh"
#include "xgboost/linalg.h"
#include "xgboost/span.h"
#include <thrust/iterator/counting_iterator.h> // for make_counting_iterator
#include "../common/cuda_context.cuh"
#include "../common/device_helpers.cuh" // for MakeTransformIterator
#include "xgboost/base.h" // for GradientPair
#include "xgboost/linalg.h" // for Matrix
namespace xgboost::gbm {
void GPUCopyGradient(HostDeviceVector<GradientPair> const *in_gpair,
bst_group_t n_groups, bst_group_t group_id,
HostDeviceVector<GradientPair> *out_gpair) {
auto mat = linalg::TensorView<GradientPair const, 2>(
in_gpair->ConstDeviceSpan(),
{in_gpair->Size() / n_groups, static_cast<size_t>(n_groups)},
in_gpair->DeviceIdx());
auto v_in = mat.Slice(linalg::All(), group_id);
out_gpair->Resize(v_in.Size());
auto d_out = out_gpair->DeviceSpan();
dh::LaunchN(v_in.Size(), [=] __device__(size_t i) { d_out[i] = v_in(i); });
void GPUCopyGradient(Context const *ctx, linalg::Matrix<GradientPair> const *in_gpair,
bst_group_t group_id, linalg::Matrix<GradientPair> *out_gpair) {
auto v_in = in_gpair->View(ctx->Device()).Slice(linalg::All(), group_id);
out_gpair->SetDevice(ctx->Device());
out_gpair->Reshape(v_in.Size(), 1);
auto d_out = out_gpair->View(ctx->Device());
auto cuctx = ctx->CUDACtx();
auto it = dh::MakeTransformIterator<GradientPair>(
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) { return v_in(i); });
thrust::copy(cuctx->CTP(), it, it + v_in.Size(), d_out.Values().data());
}
void GPUDartPredictInc(common::Span<float> out_predts,

View File

@@ -183,8 +183,8 @@ class GBTree : public GradientBooster {
/**
* @brief Carry out one iteration of boosting.
*/
void DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
PredictionCacheEntry* predt, ObjFunction const* obj) override;
void DoBoost(DMatrix* p_fmat, linalg::Matrix<GradientPair>* in_gpair, PredictionCacheEntry* predt,
ObjFunction const* obj) override;
[[nodiscard]] bool UseGPU() const override { return tparam_.tree_method == TreeMethod::kGPUHist; }
@@ -326,7 +326,7 @@ class GBTree : public GradientBooster {
}
protected:
void BoostNewTrees(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat, int bst_group,
void BoostNewTrees(linalg::Matrix<GradientPair>* gpair, DMatrix* p_fmat, int bst_group,
std::vector<HostDeviceVector<bst_node_t>>* out_position,
std::vector<std::unique_ptr<RegTree>>* ret);