Use matrix for gradient. (#9508)
- Use the `linalg::Matrix` for storing gradients. - New API for the custom objective. - Custom objective for multi-class/multi-target is now required to return the correct shape. - Custom objective for Python can accept arrays with any strides. (row-major, column-major)
This commit is contained in:
@@ -45,30 +45,31 @@ class CoordinateUpdater : public LinearUpdater {
|
||||
out["coordinate_param"] = ToJson(cparam_);
|
||||
}
|
||||
|
||||
void Update(HostDeviceVector<GradientPair> *in_gpair, DMatrix *p_fmat,
|
||||
gbm::GBLinearModel *model, double sum_instance_weight) override {
|
||||
void Update(linalg::Matrix<GradientPair> *in_gpair, DMatrix *p_fmat, gbm::GBLinearModel *model,
|
||||
double sum_instance_weight) override {
|
||||
auto gpair = in_gpair->Data();
|
||||
tparam_.DenormalizePenalties(sum_instance_weight);
|
||||
const int ngroup = model->learner_model_param->num_output_group;
|
||||
// update bias
|
||||
for (int group_idx = 0; group_idx < ngroup; ++group_idx) {
|
||||
auto grad = GetBiasGradientParallel(group_idx, ngroup, in_gpair->ConstHostVector(), p_fmat,
|
||||
auto grad = GetBiasGradientParallel(group_idx, ngroup, gpair->ConstHostVector(), p_fmat,
|
||||
ctx_->Threads());
|
||||
auto dbias = static_cast<float>(tparam_.learning_rate *
|
||||
CoordinateDeltaBias(grad.first, grad.second));
|
||||
model->Bias()[group_idx] += dbias;
|
||||
UpdateBiasResidualParallel(ctx_, group_idx, ngroup, dbias, &in_gpair->HostVector(), p_fmat);
|
||||
UpdateBiasResidualParallel(ctx_, group_idx, ngroup, dbias, &gpair->HostVector(), p_fmat);
|
||||
}
|
||||
// prepare for updating the weights
|
||||
selector_->Setup(ctx_, *model, in_gpair->ConstHostVector(), p_fmat, tparam_.reg_alpha_denorm,
|
||||
selector_->Setup(ctx_, *model, gpair->ConstHostVector(), p_fmat, tparam_.reg_alpha_denorm,
|
||||
tparam_.reg_lambda_denorm, cparam_.top_k);
|
||||
// update weights
|
||||
for (int group_idx = 0; group_idx < ngroup; ++group_idx) {
|
||||
for (unsigned i = 0U; i < model->learner_model_param->num_feature; i++) {
|
||||
int fidx =
|
||||
selector_->NextFeature(ctx_, i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
|
||||
selector_->NextFeature(ctx_, i, *model, group_idx, gpair->ConstHostVector(), p_fmat,
|
||||
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
|
||||
if (fidx < 0) break;
|
||||
this->UpdateFeature(fidx, group_idx, &in_gpair->HostVector(), p_fmat, model);
|
||||
this->UpdateFeature(fidx, group_idx, &gpair->HostVector(), p_fmat, model);
|
||||
}
|
||||
}
|
||||
monitor_.Stop("UpdateFeature");
|
||||
|
||||
@@ -93,17 +93,18 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
}
|
||||
}
|
||||
|
||||
void Update(HostDeviceVector<GradientPair> *in_gpair, DMatrix *p_fmat,
|
||||
gbm::GBLinearModel *model, double sum_instance_weight) override {
|
||||
void Update(linalg::Matrix<GradientPair> *in_gpair, DMatrix *p_fmat, gbm::GBLinearModel *model,
|
||||
double sum_instance_weight) override {
|
||||
tparam_.DenormalizePenalties(sum_instance_weight);
|
||||
monitor_.Start("LazyInitDevice");
|
||||
this->LazyInitDevice(p_fmat, *(model->learner_model_param));
|
||||
monitor_.Stop("LazyInitDevice");
|
||||
|
||||
monitor_.Start("UpdateGpair");
|
||||
|
||||
// Update gpair
|
||||
if (ctx_->gpu_id >= 0) {
|
||||
this->UpdateGpair(in_gpair->ConstHostVector());
|
||||
if (ctx_->IsCUDA()) {
|
||||
this->UpdateGpair(in_gpair->Data()->ConstHostVector());
|
||||
}
|
||||
monitor_.Stop("UpdateGpair");
|
||||
|
||||
@@ -111,15 +112,15 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
|
||||
this->UpdateBias(model);
|
||||
monitor_.Stop("UpdateBias");
|
||||
// prepare for updating the weights
|
||||
selector_->Setup(ctx_, *model, in_gpair->ConstHostVector(), p_fmat, tparam_.reg_alpha_denorm,
|
||||
tparam_.reg_lambda_denorm, coord_param_.top_k);
|
||||
selector_->Setup(ctx_, *model, in_gpair->Data()->ConstHostVector(), p_fmat,
|
||||
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm, coord_param_.top_k);
|
||||
monitor_.Start("UpdateFeature");
|
||||
for (uint32_t group_idx = 0; group_idx < model->learner_model_param->num_output_group;
|
||||
++group_idx) {
|
||||
for (auto i = 0U; i < model->learner_model_param->num_feature; i++) {
|
||||
auto fidx =
|
||||
selector_->NextFeature(ctx_, i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
|
||||
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
|
||||
selector_->NextFeature(ctx_, i, *model, group_idx, in_gpair->Data()->ConstHostVector(),
|
||||
p_fmat, tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
|
||||
if (fidx < 0) break;
|
||||
this->UpdateFeature(fidx, group_idx, model);
|
||||
}
|
||||
|
||||
@@ -6,8 +6,7 @@
|
||||
#include <xgboost/linear_updater.h>
|
||||
#include "coordinate_common.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace linear {
|
||||
namespace xgboost::linear {
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(updater_shotgun);
|
||||
|
||||
@@ -32,30 +31,31 @@ class ShotgunUpdater : public LinearUpdater {
|
||||
out["linear_train_param"] = ToJson(param_);
|
||||
}
|
||||
|
||||
void Update(HostDeviceVector<GradientPair> *in_gpair, DMatrix *p_fmat,
|
||||
gbm::GBLinearModel *model, double sum_instance_weight) override {
|
||||
auto &gpair = in_gpair->HostVector();
|
||||
void Update(linalg::Matrix<GradientPair> *in_gpair, DMatrix *p_fmat, gbm::GBLinearModel *model,
|
||||
double sum_instance_weight) override {
|
||||
auto gpair = in_gpair->Data();
|
||||
param_.DenormalizePenalties(sum_instance_weight);
|
||||
const int ngroup = model->learner_model_param->num_output_group;
|
||||
|
||||
// update bias
|
||||
for (int gid = 0; gid < ngroup; ++gid) {
|
||||
auto grad = GetBiasGradientParallel(gid, ngroup, in_gpair->ConstHostVector(), p_fmat,
|
||||
auto grad = GetBiasGradientParallel(gid, ngroup, gpair->ConstHostVector(), p_fmat,
|
||||
ctx_->Threads());
|
||||
auto dbias = static_cast<bst_float>(param_.learning_rate *
|
||||
CoordinateDeltaBias(grad.first, grad.second));
|
||||
model->Bias()[gid] += dbias;
|
||||
UpdateBiasResidualParallel(ctx_, gid, ngroup, dbias, &in_gpair->HostVector(), p_fmat);
|
||||
UpdateBiasResidualParallel(ctx_, gid, ngroup, dbias, &gpair->HostVector(), p_fmat);
|
||||
}
|
||||
|
||||
// lock-free parallel updates of weights
|
||||
selector_->Setup(ctx_, *model, in_gpair->ConstHostVector(), p_fmat, param_.reg_alpha_denorm,
|
||||
selector_->Setup(ctx_, *model, gpair->ConstHostVector(), p_fmat, param_.reg_alpha_denorm,
|
||||
param_.reg_lambda_denorm, 0);
|
||||
auto &h_gpair = gpair->HostVector();
|
||||
for (const auto &batch : p_fmat->GetBatches<CSCPage>(ctx_)) {
|
||||
auto page = batch.GetView();
|
||||
const auto nfeat = static_cast<bst_omp_uint>(batch.Size());
|
||||
common::ParallelFor(nfeat, ctx_->Threads(), [&](auto i) {
|
||||
int ii = selector_->NextFeature(ctx_, i, *model, 0, in_gpair->ConstHostVector(), p_fmat,
|
||||
int ii = selector_->NextFeature(ctx_, i, *model, 0, gpair->ConstHostVector(), p_fmat,
|
||||
param_.reg_alpha_denorm, param_.reg_lambda_denorm);
|
||||
if (ii < 0) return;
|
||||
const bst_uint fid = ii;
|
||||
@@ -63,7 +63,7 @@ class ShotgunUpdater : public LinearUpdater {
|
||||
for (int gid = 0; gid < ngroup; ++gid) {
|
||||
double sum_grad = 0.0, sum_hess = 0.0;
|
||||
for (auto &c : col) {
|
||||
const GradientPair &p = gpair[c.index * ngroup + gid];
|
||||
const GradientPair &p = h_gpair[c.index * ngroup + gid];
|
||||
if (p.GetHess() < 0.0f) continue;
|
||||
const bst_float v = c.fvalue;
|
||||
sum_grad += p.GetGrad() * v;
|
||||
@@ -77,7 +77,7 @@ class ShotgunUpdater : public LinearUpdater {
|
||||
w += dw;
|
||||
// update grad values
|
||||
for (auto &c : col) {
|
||||
GradientPair &p = gpair[c.index * ngroup + gid];
|
||||
GradientPair &p = h_gpair[c.index * ngroup + gid];
|
||||
if (p.GetHess() < 0.0f) continue;
|
||||
p += GradientPair(p.GetHess() * c.fvalue * dw, 0);
|
||||
}
|
||||
@@ -98,5 +98,4 @@ XGBOOST_REGISTER_LINEAR_UPDATER(ShotgunUpdater, "shotgun")
|
||||
"Update linear model according to shotgun coordinate descent "
|
||||
"algorithm.")
|
||||
.set_body([]() { return new ShotgunUpdater(); });
|
||||
} // namespace linear
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::linear
|
||||
|
||||
Reference in New Issue
Block a user