Calculate base_score based on input labels for mae. (#8107)
Fit an intercept as base score for abs loss.
This commit is contained in:
@@ -161,9 +161,10 @@ class GBLinear : public GradientBooster {
|
||||
uint32_t layer_begin, uint32_t) override {
|
||||
LinearCheckLayer(layer_begin);
|
||||
const int ngroup = model_.learner_model_param->num_output_group;
|
||||
|
||||
auto base_score = learner_model_param_->BaseScore(ctx_);
|
||||
for (int gid = 0; gid < ngroup; ++gid) {
|
||||
this->Pred(inst, dmlc::BeginPtr(*out_preds), gid,
|
||||
learner_model_param_->base_score);
|
||||
this->Pred(inst, dmlc::BeginPtr(*out_preds), gid, base_score(0));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -184,6 +185,7 @@ class GBLinear : public GradientBooster {
|
||||
contribs.resize(p_fmat->Info().num_row_ * ncolumns * ngroup);
|
||||
// make sure contributions is zeroed, we could be reusing a previously allocated one
|
||||
std::fill(contribs.begin(), contribs.end(), 0);
|
||||
auto base_score = learner_model_param_->BaseScore(ctx_);
|
||||
// start collecting the contributions
|
||||
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
|
||||
// parallel over local batch
|
||||
@@ -202,8 +204,8 @@ class GBLinear : public GradientBooster {
|
||||
}
|
||||
// add base margin to BIAS
|
||||
p_contribs[ncolumns - 1] =
|
||||
model_.Bias()[gid] + ((base_margin.Size() != 0) ? base_margin(row_idx, gid)
|
||||
: learner_model_param_->base_score);
|
||||
model_.Bias()[gid] +
|
||||
((base_margin.Size() != 0) ? base_margin(row_idx, gid) : base_score(0));
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -268,10 +270,12 @@ class GBLinear : public GradientBooster {
|
||||
monitor_.Start("PredictBatchInternal");
|
||||
model_.LazyInitModel();
|
||||
std::vector<bst_float> &preds = *out_preds;
|
||||
auto base_margin = p_fmat->Info().base_margin_.View(GenericParameter::kCpuId);
|
||||
auto base_margin = p_fmat->Info().base_margin_.View(Context::kCpuId);
|
||||
// start collecting the prediction
|
||||
const int ngroup = model_.learner_model_param->num_output_group;
|
||||
preds.resize(p_fmat->Info().num_row_ * ngroup);
|
||||
|
||||
auto base_score = learner_model_param_->BaseScore(Context::kCpuId);
|
||||
for (const auto &page : p_fmat->GetBatches<SparsePage>()) {
|
||||
auto const& batch = page.GetView();
|
||||
// output convention: nrow * k, where nrow is number of rows
|
||||
@@ -285,8 +289,7 @@ class GBLinear : public GradientBooster {
|
||||
const size_t ridx = page.base_rowid + i;
|
||||
// loop over output groups
|
||||
for (int gid = 0; gid < ngroup; ++gid) {
|
||||
float margin =
|
||||
(base_margin.Size() != 0) ? base_margin(ridx, gid) : learner_model_param_->base_score;
|
||||
float margin = (base_margin.Size() != 0) ? base_margin(ridx, gid) : base_score(0);
|
||||
this->Pred(batch[i], &preds[ridx * ngroup], gid, margin);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -638,13 +638,12 @@ void GPUDartPredictInc(common::Span<float> out_predts,
|
||||
}
|
||||
#endif
|
||||
|
||||
void GPUDartInplacePredictInc(common::Span<float> out_predts,
|
||||
common::Span<float> predts, float tree_w,
|
||||
size_t n_rows, float base_score,
|
||||
bst_group_t n_groups,
|
||||
bst_group_t group)
|
||||
void GPUDartInplacePredictInc(common::Span<float> /*out_predts*/, common::Span<float> /*predts*/,
|
||||
float /*tree_w*/, size_t /*n_rows*/,
|
||||
linalg::TensorView<float const, 1> /*base_score*/,
|
||||
bst_group_t /*n_groups*/, bst_group_t /*group*/)
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
; // NOLINT
|
||||
; // NOLINT
|
||||
#else
|
||||
{
|
||||
common::AssertGPUSupport();
|
||||
@@ -850,15 +849,17 @@ class Dart : public GBTree {
|
||||
size_t n_rows = p_fmat->Info().num_row_;
|
||||
if (predts.predictions.DeviceIdx() != Context::kCpuId) {
|
||||
p_out_preds->predictions.SetDevice(predts.predictions.DeviceIdx());
|
||||
auto base_score = model_.learner_model_param->BaseScore(predts.predictions.DeviceIdx());
|
||||
GPUDartInplacePredictInc(p_out_preds->predictions.DeviceSpan(),
|
||||
predts.predictions.DeviceSpan(), w, n_rows,
|
||||
model_.learner_model_param->base_score, n_groups, group);
|
||||
predts.predictions.DeviceSpan(), w, n_rows, base_score, n_groups,
|
||||
group);
|
||||
} else {
|
||||
auto base_score = model_.learner_model_param->BaseScore(Context::kCpuId);
|
||||
auto& h_predts = predts.predictions.HostVector();
|
||||
auto& h_out_predts = p_out_preds->predictions.HostVector();
|
||||
common::ParallelFor(n_rows, ctx_->Threads(), [&](auto ridx) {
|
||||
const size_t offset = ridx * n_groups + group;
|
||||
h_out_predts[offset] += (h_predts[offset] - model_.learner_model_param->base_score) * w;
|
||||
h_out_predts[offset] += (h_predts[offset] - base_score(0)) * w;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,13 +31,14 @@ void GPUDartPredictInc(common::Span<float> out_predts,
|
||||
});
|
||||
}
|
||||
|
||||
void GPUDartInplacePredictInc(common::Span<float> out_predts,
|
||||
common::Span<float> predts, float tree_w,
|
||||
size_t n_rows, float base_score,
|
||||
bst_group_t n_groups, bst_group_t group) {
|
||||
void GPUDartInplacePredictInc(common::Span<float> out_predts, common::Span<float> predts,
|
||||
float tree_w, size_t n_rows,
|
||||
linalg::TensorView<float const, 1> base_score, bst_group_t n_groups,
|
||||
bst_group_t group) {
|
||||
CHECK_EQ(base_score.Size(), 1);
|
||||
dh::LaunchN(n_rows, [=] XGBOOST_DEVICE(size_t ridx) {
|
||||
const size_t offset = ridx * n_groups + group;
|
||||
out_predts[offset] += (predts[offset] - base_score) * tree_w;
|
||||
out_predts[offset] += (predts[offset] - base_score(0)) * tree_w;
|
||||
});
|
||||
}
|
||||
} // namespace gbm
|
||||
|
||||
Reference in New Issue
Block a user