Use matrix for gradient. (#9508)

- Use the `linalg::Matrix` for storing gradients.
- New API for the custom objective.
- Custom objective for multi-class/multi-target is now required to return the correct shape.
- Custom objective for Python can accept arrays with any strides. (row-major, column-major)
This commit is contained in:
Jiaming Yuan
2023-08-24 05:29:52 +08:00
committed by GitHub
parent 6103dca0bb
commit 972730cde0
77 changed files with 1052 additions and 651 deletions

View File

@@ -45,30 +45,31 @@ class CoordinateUpdater : public LinearUpdater {
out["coordinate_param"] = ToJson(cparam_);
}
void Update(HostDeviceVector<GradientPair> *in_gpair, DMatrix *p_fmat,
gbm::GBLinearModel *model, double sum_instance_weight) override {
void Update(linalg::Matrix<GradientPair> *in_gpair, DMatrix *p_fmat, gbm::GBLinearModel *model,
double sum_instance_weight) override {
auto gpair = in_gpair->Data();
tparam_.DenormalizePenalties(sum_instance_weight);
const int ngroup = model->learner_model_param->num_output_group;
// update bias
for (int group_idx = 0; group_idx < ngroup; ++group_idx) {
auto grad = GetBiasGradientParallel(group_idx, ngroup, in_gpair->ConstHostVector(), p_fmat,
auto grad = GetBiasGradientParallel(group_idx, ngroup, gpair->ConstHostVector(), p_fmat,
ctx_->Threads());
auto dbias = static_cast<float>(tparam_.learning_rate *
CoordinateDeltaBias(grad.first, grad.second));
model->Bias()[group_idx] += dbias;
UpdateBiasResidualParallel(ctx_, group_idx, ngroup, dbias, &in_gpair->HostVector(), p_fmat);
UpdateBiasResidualParallel(ctx_, group_idx, ngroup, dbias, &gpair->HostVector(), p_fmat);
}
// prepare for updating the weights
selector_->Setup(ctx_, *model, in_gpair->ConstHostVector(), p_fmat, tparam_.reg_alpha_denorm,
selector_->Setup(ctx_, *model, gpair->ConstHostVector(), p_fmat, tparam_.reg_alpha_denorm,
tparam_.reg_lambda_denorm, cparam_.top_k);
// update weights
for (int group_idx = 0; group_idx < ngroup; ++group_idx) {
for (unsigned i = 0U; i < model->learner_model_param->num_feature; i++) {
int fidx =
selector_->NextFeature(ctx_, i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
selector_->NextFeature(ctx_, i, *model, group_idx, gpair->ConstHostVector(), p_fmat,
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
if (fidx < 0) break;
this->UpdateFeature(fidx, group_idx, &in_gpair->HostVector(), p_fmat, model);
this->UpdateFeature(fidx, group_idx, &gpair->HostVector(), p_fmat, model);
}
}
monitor_.Stop("UpdateFeature");

View File

@@ -93,17 +93,18 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
}
}
void Update(HostDeviceVector<GradientPair> *in_gpair, DMatrix *p_fmat,
gbm::GBLinearModel *model, double sum_instance_weight) override {
void Update(linalg::Matrix<GradientPair> *in_gpair, DMatrix *p_fmat, gbm::GBLinearModel *model,
double sum_instance_weight) override {
tparam_.DenormalizePenalties(sum_instance_weight);
monitor_.Start("LazyInitDevice");
this->LazyInitDevice(p_fmat, *(model->learner_model_param));
monitor_.Stop("LazyInitDevice");
monitor_.Start("UpdateGpair");
// Update gpair
if (ctx_->gpu_id >= 0) {
this->UpdateGpair(in_gpair->ConstHostVector());
if (ctx_->IsCUDA()) {
this->UpdateGpair(in_gpair->Data()->ConstHostVector());
}
monitor_.Stop("UpdateGpair");
@@ -111,15 +112,15 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
this->UpdateBias(model);
monitor_.Stop("UpdateBias");
// prepare for updating the weights
selector_->Setup(ctx_, *model, in_gpair->ConstHostVector(), p_fmat, tparam_.reg_alpha_denorm,
tparam_.reg_lambda_denorm, coord_param_.top_k);
selector_->Setup(ctx_, *model, in_gpair->Data()->ConstHostVector(), p_fmat,
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm, coord_param_.top_k);
monitor_.Start("UpdateFeature");
for (uint32_t group_idx = 0; group_idx < model->learner_model_param->num_output_group;
++group_idx) {
for (auto i = 0U; i < model->learner_model_param->num_feature; i++) {
auto fidx =
selector_->NextFeature(ctx_, i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
selector_->NextFeature(ctx_, i, *model, group_idx, in_gpair->Data()->ConstHostVector(),
p_fmat, tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
if (fidx < 0) break;
this->UpdateFeature(fidx, group_idx, model);
}

View File

@@ -6,8 +6,7 @@
#include <xgboost/linear_updater.h>
#include "coordinate_common.h"
namespace xgboost {
namespace linear {
namespace xgboost::linear {
DMLC_REGISTRY_FILE_TAG(updater_shotgun);
@@ -32,30 +31,31 @@ class ShotgunUpdater : public LinearUpdater {
out["linear_train_param"] = ToJson(param_);
}
void Update(HostDeviceVector<GradientPair> *in_gpair, DMatrix *p_fmat,
gbm::GBLinearModel *model, double sum_instance_weight) override {
auto &gpair = in_gpair->HostVector();
void Update(linalg::Matrix<GradientPair> *in_gpair, DMatrix *p_fmat, gbm::GBLinearModel *model,
double sum_instance_weight) override {
auto gpair = in_gpair->Data();
param_.DenormalizePenalties(sum_instance_weight);
const int ngroup = model->learner_model_param->num_output_group;
// update bias
for (int gid = 0; gid < ngroup; ++gid) {
auto grad = GetBiasGradientParallel(gid, ngroup, in_gpair->ConstHostVector(), p_fmat,
auto grad = GetBiasGradientParallel(gid, ngroup, gpair->ConstHostVector(), p_fmat,
ctx_->Threads());
auto dbias = static_cast<bst_float>(param_.learning_rate *
CoordinateDeltaBias(grad.first, grad.second));
model->Bias()[gid] += dbias;
UpdateBiasResidualParallel(ctx_, gid, ngroup, dbias, &in_gpair->HostVector(), p_fmat);
UpdateBiasResidualParallel(ctx_, gid, ngroup, dbias, &gpair->HostVector(), p_fmat);
}
// lock-free parallel updates of weights
selector_->Setup(ctx_, *model, in_gpair->ConstHostVector(), p_fmat, param_.reg_alpha_denorm,
selector_->Setup(ctx_, *model, gpair->ConstHostVector(), p_fmat, param_.reg_alpha_denorm,
param_.reg_lambda_denorm, 0);
auto &h_gpair = gpair->HostVector();
for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx_)) {
auto page = batch.GetView();
const auto nfeat = static_cast<bst_omp_uint>(batch.Size());
common::ParallelFor(nfeat, ctx_->Threads(), [&](auto i) {
int ii = selector_->NextFeature(ctx_, i, *model, 0, in_gpair->ConstHostVector(), p_fmat,
int ii = selector_->NextFeature(ctx_, i, *model, 0, gpair->ConstHostVector(), p_fmat,
param_.reg_alpha_denorm, param_.reg_lambda_denorm);
if (ii < 0) return;
const bst_uint fid = ii;
@@ -63,7 +63,7 @@ class ShotgunUpdater : public LinearUpdater {
for (int gid = 0; gid < ngroup; ++gid) {
double sum_grad = 0.0, sum_hess = 0.0;
for (auto &c : col) {
const GradientPair &p = gpair[c.index * ngroup + gid];
const GradientPair &p = h_gpair[c.index * ngroup + gid];
if (p.GetHess() < 0.0f) continue;
const bst_float v = c.fvalue;
sum_grad += p.GetGrad() * v;
@@ -77,7 +77,7 @@ class ShotgunUpdater : public LinearUpdater {
w += dw;
// update grad values
for (auto &c : col) {
GradientPair &p = gpair[c.index * ngroup + gid];
GradientPair &p = h_gpair[c.index * ngroup + gid];
if (p.GetHess() < 0.0f) continue;
p += GradientPair(p.GetHess() * c.fvalue * dw, 0);
}
@@ -98,5 +98,4 @@ XGBOOST_REGISTER_LINEAR_UPDATER(ShotgunUpdater, "shotgun")
"Update linear model according to shotgun coordinate descent "
"algorithm.")
.set_body([]() { return new ShotgunUpdater(); });
} // namespace linear
} // namespace xgboost
} // namespace xgboost::linear