Calculate base_score based on input labels for mae. (#8107)

Fit an intercept as base score for abs loss.
2022-09-20 20:53:54 +08:00
parent 4f42aa5f12
commit fffb1fca52
42 changed files with 999 additions and 343 deletions
--- a/src/gbm/gblinear.cc
+++ b/src/gbm/gblinear.cc
@@ -161,9 +161,10 @@ class GBLinear : public GradientBooster {
                       uint32_t layer_begin, uint32_t) override {
    LinearCheckLayer(layer_begin);
    const int ngroup = model_.learner_model_param->num_output_group;
+
+    auto base_score = learner_model_param_->BaseScore(ctx_);
    for (int gid = 0; gid < ngroup; ++gid) {
-      this->Pred(inst, dmlc::BeginPtr(*out_preds), gid,
-                 learner_model_param_->base_score);
+      this->Pred(inst, dmlc::BeginPtr(*out_preds), gid, base_score(0));
    }
  }

@@ -184,6 +185,7 @@ class GBLinear : public GradientBooster {
    contribs.resize(p_fmat->Info().num_row_ * ncolumns * ngroup);
    // make sure contributions is zeroed, we could be reusing a previously allocated one
    std::fill(contribs.begin(), contribs.end(), 0);
+    auto base_score = learner_model_param_->BaseScore(ctx_);
    // start collecting the contributions
    for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
      // parallel over local batch
@@ -202,8 +204,8 @@ class GBLinear : public GradientBooster {
          }
          // add base margin to BIAS
          p_contribs[ncolumns - 1] =
-              model_.Bias()[gid] + ((base_margin.Size() != 0) ? base_margin(row_idx, gid)
-                                                              : learner_model_param_->base_score);
+              model_.Bias()[gid] +
+              ((base_margin.Size() != 0) ? base_margin(row_idx, gid) : base_score(0));
        }
      });
    }
@@ -268,10 +270,12 @@ class GBLinear : public GradientBooster {
    monitor_.Start("PredictBatchInternal");
    model_.LazyInitModel();
    std::vector<bst_float> &preds = *out_preds;
-    auto base_margin = p_fmat->Info().base_margin_.View(GenericParameter::kCpuId);
+    auto base_margin = p_fmat->Info().base_margin_.View(Context::kCpuId);
    // start collecting the prediction
    const int ngroup = model_.learner_model_param->num_output_group;
    preds.resize(p_fmat->Info().num_row_ * ngroup);
+
+    auto base_score = learner_model_param_->BaseScore(Context::kCpuId);
    for (const auto &page : p_fmat->GetBatches<SparsePage>()) {
      auto const& batch = page.GetView();
      // output convention: nrow * k, where nrow is number of rows
@@ -285,8 +289,7 @@ class GBLinear : public GradientBooster {
        const size_t ridx = page.base_rowid + i;
        // loop over output groups
        for (int gid = 0; gid < ngroup; ++gid) {
-          float margin =
-              (base_margin.Size() != 0) ? base_margin(ridx, gid) : learner_model_param_->base_score;
+          float margin = (base_margin.Size() != 0) ? base_margin(ridx, gid) : base_score(0);
          this->Pred(batch[i], &preds[ridx * ngroup], gid, margin);
        }
      });
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@@ -638,13 +638,12 @@ void GPUDartPredictInc(common::Span<float> out_predts,
 }
 #endif

-void GPUDartInplacePredictInc(common::Span<float> out_predts,
-                              common::Span<float> predts, float tree_w,
-                              size_t n_rows, float base_score,
-                              bst_group_t n_groups,
-                              bst_group_t group)
+void GPUDartInplacePredictInc(common::Span<float> /*out_predts*/, common::Span<float> /*predts*/,
+                              float /*tree_w*/, size_t /*n_rows*/,
+                              linalg::TensorView<float const, 1> /*base_score*/,
+                              bst_group_t /*n_groups*/, bst_group_t /*group*/)
 #if defined(XGBOOST_USE_CUDA)
-;  // NOLINT
+    ;  // NOLINT
 #else
 {
  common::AssertGPUSupport();
@@ -850,15 +849,17 @@ class Dart : public GBTree {
      size_t n_rows = p_fmat->Info().num_row_;
      if (predts.predictions.DeviceIdx() != Context::kCpuId) {
        p_out_preds->predictions.SetDevice(predts.predictions.DeviceIdx());
+        auto base_score = model_.learner_model_param->BaseScore(predts.predictions.DeviceIdx());
        GPUDartInplacePredictInc(p_out_preds->predictions.DeviceSpan(),
-                                 predts.predictions.DeviceSpan(), w, n_rows,
-                                 model_.learner_model_param->base_score, n_groups, group);
+                                 predts.predictions.DeviceSpan(), w, n_rows, base_score, n_groups,
+                                 group);
      } else {
+        auto base_score = model_.learner_model_param->BaseScore(Context::kCpuId);
        auto& h_predts = predts.predictions.HostVector();
        auto& h_out_predts = p_out_preds->predictions.HostVector();
        common::ParallelFor(n_rows, ctx_->Threads(), [&](auto ridx) {
          const size_t offset = ridx * n_groups + group;
-          h_out_predts[offset] += (h_predts[offset] - model_.learner_model_param->base_score) * w;
+          h_out_predts[offset] += (h_predts[offset] - base_score(0)) * w;
        });
      }
    }
--- a/src/gbm/gbtree.cu
+++ b/src/gbm/gbtree.cu
@@ -31,13 +31,14 @@ void GPUDartPredictInc(common::Span<float> out_predts,
  });
 }

-void GPUDartInplacePredictInc(common::Span<float> out_predts,
-                              common::Span<float> predts, float tree_w,
-                              size_t n_rows, float base_score,
-                              bst_group_t n_groups, bst_group_t group) {
+void GPUDartInplacePredictInc(common::Span<float> out_predts, common::Span<float> predts,
+                              float tree_w, size_t n_rows,
+                              linalg::TensorView<float const, 1> base_score, bst_group_t n_groups,
+                              bst_group_t group) {
+  CHECK_EQ(base_score.Size(), 1);
  dh::LaunchN(n_rows, [=] XGBOOST_DEVICE(size_t ridx) {
    const size_t offset = ridx * n_groups + group;
-    out_predts[offset] += (predts[offset] - base_score) * tree_w;
+    out_predts[offset] += (predts[offset] - base_score(0)) * tree_w;
  });
 }
 }  // namespace gbm