Calculate base_score based on input labels for mae. (#8107)

Fit an intercept as base score for abs loss.
2022-09-20 20:53:54 +08:00
parent 4f42aa5f12
commit fffb1fca52
42 changed files with 999 additions and 343 deletions
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -4,47 +4,48 @@
 * \brief Implementation of learning algorithm.
 * \author Tianqi Chen
 */
+#include "xgboost/learner.h"
+
+#include <dmlc/any.h>
 #include <dmlc/io.h>
 #include <dmlc/parameter.h>
 #include <dmlc/thread_local.h>

-#include <atomic>
-#include <mutex>
 #include <algorithm>
+#include <atomic>
 #include <iomanip>
-#include <limits>
+#include <limits>  // std::numeric_limits
 #include <memory>
+#include <mutex>
 #include <sstream>
-#include <string>
 #include <stack>
+#include <string>
 #include <utility>
 #include <vector>

-#include "dmlc/any.h"
+#include "common/charconv.h"
+#include "common/common.h"
+#include "common/io.h"
+#include "common/linalg_op.h"
+#include "common/observer.h"
+#include "common/random.h"
+#include "common/threading_utils.h"
+#include "common/timer.h"
+#include "common/version.h"
 #include "xgboost/base.h"
 #include "xgboost/c_api.h"
 #include "xgboost/data.h"
-#include "xgboost/model.h"
-#include "xgboost/predictor.h"
 #include "xgboost/feature_map.h"
 #include "xgboost/gbm.h"
 #include "xgboost/generic_parameters.h"
 #include "xgboost/host_device_vector.h"
 #include "xgboost/json.h"
-#include "xgboost/learner.h"
 #include "xgboost/logging.h"
 #include "xgboost/metric.h"
+#include "xgboost/model.h"
 #include "xgboost/objective.h"
 #include "xgboost/parameter.h"
-
-#include "common/common.h"
-#include "common/io.h"
-#include "common/observer.h"
-#include "common/random.h"
-#include "common/timer.h"
-#include "common/charconv.h"
-#include "common/version.h"
-#include "common/threading_utils.h"
+#include "xgboost/predictor.h"

 namespace {

@@ -85,26 +86,29 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
  uint32_t minor_version;

  uint32_t num_target{1};
+
+  int32_t base_score_estimated{0};
  /*! \brief reserved field */
-  int reserved[26];
+  int reserved[25];
  /*! \brief constructor */
  LearnerModelParamLegacy() {
    std::memset(this, 0, sizeof(LearnerModelParamLegacy));
-    base_score = 0.5f;
+    base_score = ObjFunction::DefaultBaseScore();
    num_target = 1;
    major_version = std::get<0>(Version::Self());
    minor_version = std::get<1>(Version::Self());
+    base_score_estimated = 0;
    static_assert(sizeof(LearnerModelParamLegacy) == 136,
                  "Do not change the size of this struct, as it will break binary IO.");
  }
+
  // Skip other legacy fields.
  Json ToJson() const {
    Object obj;
    char floats[NumericLimits<float>::kToCharsSize];
    auto ret = to_chars(floats, floats + NumericLimits<float>::kToCharsSize, base_score);
-    CHECK(ret.ec == std::errc());
-    obj["base_score"] =
-        std::string{floats, static_cast<size_t>(std::distance(floats, ret.ptr))};
+    CHECK(ret.ec == std::errc{});
+    obj["base_score"] = std::string{floats, static_cast<size_t>(std::distance(floats, ret.ptr))};

    char integers[NumericLimits<int64_t>::kToCharsSize];
    ret = to_chars(integers, integers + NumericLimits<int64_t>::kToCharsSize,
@@ -136,10 +140,14 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
    }

    this->Init(m);
+
    std::string str = get<String const>(j_param.at("base_score"));
    from_chars(str.c_str(), str.c_str() + str.size(), base_score);
+    // It can only be estimated during the first training, we consider it estimated afterward
+    base_score_estimated = 1;
  }
-  inline LearnerModelParamLegacy ByteSwap() const {
+
+  LearnerModelParamLegacy ByteSwap() const {
    LearnerModelParamLegacy x = *this;
    dmlc::ByteSwap(&x.base_score, sizeof(x.base_score), 1);
    dmlc::ByteSwap(&x.num_feature, sizeof(x.num_feature), 1);
@@ -149,14 +157,30 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
    dmlc::ByteSwap(&x.major_version, sizeof(x.major_version), 1);
    dmlc::ByteSwap(&x.minor_version, sizeof(x.minor_version), 1);
    dmlc::ByteSwap(&x.num_target, sizeof(x.num_target), 1);
+    dmlc::ByteSwap(&x.base_score_estimated, sizeof(x.base_score_estimated), 1);
    dmlc::ByteSwap(x.reserved, sizeof(x.reserved[0]), sizeof(x.reserved) / sizeof(x.reserved[0]));
    return x;
  }

+  template <typename Container>
+  Args UpdateAllowUnknown(Container const& kwargs) {
+    // Detect whether user has made their own base score.
+    if (std::find_if(kwargs.cbegin(), kwargs.cend(),
+                     [](auto const& kv) { return kv.first == "base_score"; }) != kwargs.cend()) {
+      base_score_estimated = true;
+    }
+    if (std::find_if(kwargs.cbegin(), kwargs.cend(), [](auto const& kv) {
+          return kv.first == "base_score_estimated";
+        }) != kwargs.cend()) {
+      LOG(FATAL) << "`base_score_estimated` cannot be specified as hyper-parameter.";
+    }
+    return dmlc::Parameter<LearnerModelParamLegacy>::UpdateAllowUnknown(kwargs);
+  }
+
  // declare parameters
  DMLC_DECLARE_PARAMETER(LearnerModelParamLegacy) {
    DMLC_DECLARE_FIELD(base_score)
-        .set_default(0.5f)
+        .set_default(ObjFunction::DefaultBaseScore())
        .describe("Global bias of the model.");
    DMLC_DECLARE_FIELD(num_feature)
        .set_default(0)
@@ -170,12 +194,12 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
        .set_default(1)
        .set_lower_bound(1)
        .describe("Number of target for multi-target regression.");
+    DMLC_DECLARE_FIELD(base_score_estimated).set_default(0);
  }
 };

-LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param, float base_margin,
-                                     ObjInfo t)
-    : base_score{base_margin}, num_feature{user_param.num_feature}, task{t} {
+LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t)
+    : num_feature{user_param.num_feature}, task{t} {
  auto n_classes = std::max(static_cast<uint32_t>(user_param.num_class), 1u);
  auto n_targets = user_param.num_target;
  num_output_group = std::max(n_classes, n_targets);
@@ -185,6 +209,53 @@ LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param,
      << ", n_targets:" << n_targets;
 }

+LearnerModelParam::LearnerModelParam(Context const* ctx, LearnerModelParamLegacy const& user_param,
+                                     linalg::Tensor<float, 1> base_margin, ObjInfo t)
+    : LearnerModelParam{user_param, t} {
+  std::swap(base_score_, base_margin);
+  // Make sure read access everywhere for thread-safe prediction.
+  common::AsConst(base_score_).HostView();
+  if (!ctx->IsCPU()) {
+    common::AsConst(base_score_).View(ctx->gpu_id);
+  }
+  CHECK(common::AsConst(base_score_).Data()->HostCanRead());
+}
+
+linalg::TensorView<float const, 1> LearnerModelParam::BaseScore(int32_t device) const {
+  // multi-class is not yet supported.
+  CHECK_EQ(base_score_.Size(), 1);
+  if (device == Context::kCpuId) {
+    // Make sure that we won't run into race condition.
+    CHECK(base_score_.Data()->HostCanRead());
+    return base_score_.HostView();
+  }
+  // Make sure that we won't run into race condition.
+  CHECK(base_score_.Data()->DeviceCanRead());
+  auto v = base_score_.View(device);
+  CHECK(base_score_.Data()->HostCanRead());  // make sure read access is not removed.
+  return v;
+}
+
+linalg::TensorView<float const, 1> LearnerModelParam::BaseScore(Context const* ctx) const {
+  return this->BaseScore(ctx->gpu_id);
+}
+
+void LearnerModelParam::Copy(LearnerModelParam const& that) {
+  base_score_.Reshape(that.base_score_.Shape());
+  base_score_.Data()->SetDevice(that.base_score_.DeviceIdx());
+  base_score_.Data()->Copy(*that.base_score_.Data());
+  common::AsConst(base_score_).HostView();
+  if (that.base_score_.DeviceIdx() != Context::kCpuId) {
+    common::AsConst(base_score_).View(that.base_score_.DeviceIdx());
+  }
+  CHECK_EQ(base_score_.Data()->DeviceCanRead(), that.base_score_.Data()->DeviceCanRead());
+  CHECK(base_score_.Data()->HostCanRead());
+
+  num_feature = that.num_feature;
+  num_output_group = that.num_output_group;
+  task = that.task;
+}
+
 struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
  // data split mode, can be row, col, or none.
  DataSplitMode dsplit {DataSplitMode::kAuto};
@@ -308,8 +379,61 @@ class LearnerConfiguration : public Learner {
  LearnerModelParamLegacy mparam_;
  LearnerModelParam learner_model_param_;
  LearnerTrainParam tparam_;
+  // Initial prediction.
  std::vector<std::string> metric_names_;

+  /**
+   * \brief Calculate the `base_score` based on input data.
+   *
+   * \param p_fmat The training DMatrix used to estimate the base score.
+   */
+  void InitBaseScore(DMatrix const* p_fmat) {
+    // Before 1.0.0, we save `base_score` into binary as a transformed value by objective.
+    // After 1.0.0 we save the value provided by user and keep it immutable instead.  To
+    // keep the stability, we initialize it in binary LoadModel instead of configuration.
+    // Under what condition should we omit the transformation:
+    //
+    // - base_score is loaded from old binary model.
+    //
+    // What are the other possible conditions:
+    //
+    // - model loaded from new binary or JSON.
+    // - model is created from scratch.
+    // - model is configured second time due to change of parameter
+    CHECK(obj_);
+    if (!mparam_.base_score_estimated) {
+      if (p_fmat) {
+        // We estimate it from input data.
+        linalg::Tensor<float, 1> base_score;
+        obj_->InitEstimation(p_fmat->Info(), &base_score);
+        mparam_.base_score = base_score(0);
+        CHECK(!std::isnan(mparam_.base_score));
+      } else {
+        mparam_.base_score = ObjFunction::DefaultBaseScore();
+      }
+      mparam_.base_score_estimated = true;
+      // Update the shared model parameter
+      this->ConfigureModelParam();
+    }
+  }
+
+  // Convert mparam to learner_model_param
+  void ConfigureModelParam() {
+    this->ConfigureTargets();
+
+    CHECK(obj_);
+    auto task = obj_->Task();
+    linalg::Tensor<float, 1> base_score({1}, Ctx()->gpu_id);
+    auto h_base_score = base_score.HostView();
+
+    // transform to margin
+    h_base_score(0) = obj_->ProbToMargin(mparam_.base_score);
+    // move it to model param, which is shared with all other components.
+    learner_model_param_ = LearnerModelParam(Ctx(), mparam_, std::move(base_score), task);
+    CHECK(learner_model_param_.Initialized());
+    CHECK_NE(learner_model_param_.BaseScore(Ctx()).Size(), 0);
+  }
+
 public:
  explicit LearnerConfiguration(std::vector<std::shared_ptr<DMatrix> > cache)
      : need_configuration_{true} {
@@ -329,22 +453,24 @@ class LearnerConfiguration : public Learner {
  // Configuration before data is known.
  void Configure() override {
    // Varient of double checked lock
-    if (!this->need_configuration_) { return; }
+    if (!this->need_configuration_) {
+      return;
+    }
    std::lock_guard<std::mutex> guard(config_lock_);
-    if (!this->need_configuration_) { return; }
+    if (!this->need_configuration_) {
+      return;
+    }

    monitor_.Start("Configure");
    auto old_tparam = tparam_;
    Args args = {cfg_.cbegin(), cfg_.cend()};

    tparam_.UpdateAllowUnknown(args);
-    auto mparam_backup = mparam_;
-
    mparam_.UpdateAllowUnknown(args);

-    auto initialized = generic_parameters_.GetInitialised();
-    auto old_seed = generic_parameters_.seed;
-    generic_parameters_.UpdateAllowUnknown(args);
+    auto initialized = ctx_.GetInitialised();
+    auto old_seed = ctx_.seed;
+    ctx_.UpdateAllowUnknown(args);

    ConsoleLogger::Configure(args);

@@ -355,8 +481,8 @@ class LearnerConfiguration : public Learner {
    }

    // set seed only before the model is initialized
-    if (!initialized || generic_parameters_.seed != old_seed) {
-      common::GlobalRandom().seed(generic_parameters_.seed);
+    if (!initialized || ctx_.seed != old_seed) {
+      common::GlobalRandom().seed(ctx_.seed);
    }

    // must precede configure gbm since num_features is required for gbm
@@ -364,31 +490,15 @@ class LearnerConfiguration : public Learner {
    args = {cfg_.cbegin(), cfg_.cend()};  // renew
    this->ConfigureObjective(old_tparam, &args);

-    auto task = this->ConfigureTargets();
-
-    // Before 1.0.0, we save `base_score` into binary as a transformed value by objective.
-    // After 1.0.0 we save the value provided by user and keep it immutable instead.  To
-    // keep the stability, we initialize it in binary LoadModel instead of configuration.
-    // Under what condition should we omit the transformation:
-    //
-    // - base_score is loaded from old binary model.
-    //
-    // What are the other possible conditions:
-    //
-    // - model loaded from new binary or JSON.
-    // - model is created from scratch.
-    // - model is configured second time due to change of parameter
-    if (!learner_model_param_.Initialized() || mparam_.base_score != mparam_backup.base_score) {
-      learner_model_param_ =
-          LearnerModelParam(mparam_, obj_->ProbToMargin(mparam_.base_score), task);
-    }
-
+    learner_model_param_.task = obj_->Task();  // required by gbm configuration.
    this->ConfigureGBM(old_tparam, args);
-    generic_parameters_.ConfigureGpuId(this->gbm_->UseGPU());
+    ctx_.ConfigureGpuId(this->gbm_->UseGPU());
+    this->ConfigureModelParam();
+
    this->ConfigureMetrics(args);

    this->need_configuration_ = false;
-    if (generic_parameters_.validate_parameters) {
+    if (ctx_.validate_parameters) {
      this->ValidateParameters();
    }

@@ -396,6 +506,11 @@ class LearnerConfiguration : public Learner {
    monitor_.Stop("Configure");
  }

+  void CheckModelInitialized() const {
+    CHECK(learner_model_param_.Initialized()) << "Model not yet initialized.";
+    CHECK_NE(learner_model_param_.BaseScore(this->Ctx()).Size(), 0);
+  }
+
  virtual PredictionContainer* GetPredictionCache() const {
    return &((*ThreadLocalPredictionCache::Get())[this]);
  }
@@ -417,7 +532,7 @@ class LearnerConfiguration : public Learner {

    auto const& objective_fn = learner_parameters.at("objective");
    if (!obj_) {
-      obj_.reset(ObjFunction::Create(tparam_.objective, &generic_parameters_));
+      obj_.reset(ObjFunction::Create(tparam_.objective, &ctx_));
    }
    obj_->LoadConfig(objective_fn);
    learner_model_param_.task = obj_->Task();
@@ -425,7 +540,7 @@ class LearnerConfiguration : public Learner {
    tparam_.booster = get<String>(gradient_booster["name"]);
    if (!gbm_) {
      gbm_.reset(GradientBooster::Create(tparam_.booster,
-                                         &generic_parameters_, &learner_model_param_));
+                                         &ctx_, &learner_model_param_));
    }
    gbm_->LoadConfig(gradient_booster);

@@ -441,15 +556,15 @@ class LearnerConfiguration : public Learner {
      } else {
        metric_names_[i] = get<String>(j_metrics[i]["name"]);
      }
-      metrics_[i] = std::unique_ptr<Metric>(Metric::Create(metric_names_[i], &generic_parameters_));
+      metrics_[i] = std::unique_ptr<Metric>(Metric::Create(metric_names_[i], &ctx_));
      if (!old_serialization) {
        metrics_[i]->LoadConfig(j_metrics[i]);
      }
    }

-    FromJson(learner_parameters.at("generic_param"), &generic_parameters_);
+    FromJson(learner_parameters.at("generic_param"), &ctx_);
    // make sure the GPU ID is valid in new environment before start running configure.
-    generic_parameters_.ConfigureGpuId(false);
+    ctx_.ConfigureGpuId(false);

    this->need_configuration_ = true;
  }
@@ -478,7 +593,7 @@ class LearnerConfiguration : public Learner {
    }
    learner_parameters["metrics"] = Array(std::move(metrics));

-    learner_parameters["generic_param"] = ToJson(generic_parameters_);
+    learner_parameters["generic_param"] = ToJson(ctx_);
  }

  void SetParam(const std::string& key, const std::string& value) override {
@@ -551,7 +666,7 @@ class LearnerConfiguration : public Learner {
    return cfg_;
  }

-  GenericParameter const* Ctx() const override { return &generic_parameters_; }
+  Context const* Ctx() const override { return &ctx_; }

 private:
  void ValidateParameters() {
@@ -654,7 +769,7 @@ class LearnerConfiguration : public Learner {

  void ConfigureGBM(LearnerTrainParam const& old, Args const& args) {
    if (gbm_ == nullptr || old.booster != tparam_.booster) {
-      gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_parameters_,
+      gbm_.reset(GradientBooster::Create(tparam_.booster, &ctx_,
                                         &learner_model_param_));
    }
    gbm_->Configure(args);
@@ -678,7 +793,7 @@ class LearnerConfiguration : public Learner {
      cfg_["max_delta_step"] = kMaxDeltaStepDefaultValue;
    }
    if (obj_ == nullptr || tparam_.objective != old.objective) {
-      obj_.reset(ObjFunction::Create(tparam_.objective, &generic_parameters_));
+      obj_.reset(ObjFunction::Create(tparam_.objective, &ctx_));
    }
    auto& args = *p_args;
    args = {cfg_.cbegin(), cfg_.cend()};  // renew
@@ -691,7 +806,7 @@ class LearnerConfiguration : public Learner {
                        return m->Name() != name;
                      };
      if (std::all_of(metrics_.begin(), metrics_.end(), DupCheck)) {
-        metrics_.emplace_back(std::unique_ptr<Metric>(Metric::Create(name, &generic_parameters_)));
+        metrics_.emplace_back(std::unique_ptr<Metric>(Metric::Create(name, &ctx_)));
        mparam_.contain_eval_metrics = 1;
      }
    }
@@ -703,7 +818,7 @@ class LearnerConfiguration : public Learner {
  /**
   * Get number of targets from objective function.
   */
-  ObjInfo ConfigureTargets() {
+  void ConfigureTargets() {
    CHECK(this->obj_);
    auto const& cache = this->GetPredictionCache()->Container();
    size_t n_targets = 1;
@@ -722,7 +837,6 @@ class LearnerConfiguration : public Learner {
    } else {
      mparam_.num_target = n_targets;
    }
-    return this->obj_->Task();
  }
 };

@@ -754,14 +868,14 @@ class LearnerIO : public LearnerConfiguration {

    std::string name = get<String>(objective_fn["name"]);
    tparam_.UpdateAllowUnknown(Args{{"objective", name}});
-    obj_.reset(ObjFunction::Create(name, &generic_parameters_));
+    obj_.reset(ObjFunction::Create(name, &ctx_));
    obj_->LoadConfig(objective_fn);

    auto const& gradient_booster = learner.at("gradient_booster");
    name = get<String>(gradient_booster["name"]);
    tparam_.UpdateAllowUnknown(Args{{"booster", name}});
    gbm_.reset(
-        GradientBooster::Create(tparam_.booster, &generic_parameters_, &learner_model_param_));
+        GradientBooster::Create(tparam_.booster, &ctx_, &learner_model_param_));
    gbm_->LoadModel(gradient_booster);

    auto const& j_attributes = get<Object const>(learner.at("attributes"));
@@ -791,6 +905,7 @@ class LearnerIO : public LearnerConfiguration {

  void SaveModel(Json* p_out) const override {
    CHECK(!this->need_configuration_) << "Call Configure before saving model.";
+    this->CheckModelInitialized();

    Version::Save(p_out);
    Json& out { *p_out };
@@ -826,7 +941,7 @@ class LearnerIO : public LearnerConfiguration {

  // About to be deprecated by JSON format
  void LoadModel(dmlc::Stream* fi) override {
-    generic_parameters_.UpdateAllowUnknown(Args{});
+    ctx_.UpdateAllowUnknown(Args{});
    tparam_.Init(std::vector<std::pair<std::string, std::string>>{});
    // TODO(tqchen) mark deprecation of old format.
    common::PeekableInStream fp(fi);
@@ -881,8 +996,8 @@ class LearnerIO : public LearnerConfiguration {
    CHECK(fi->Read(&tparam_.objective)) << "BoostLearner: wrong model format";
    CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format";

-    obj_.reset(ObjFunction::Create(tparam_.objective, &generic_parameters_));
-    gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_parameters_,
+    obj_.reset(ObjFunction::Create(tparam_.objective, &ctx_));
+    gbm_.reset(GradientBooster::Create(tparam_.booster, &ctx_,
                                       &learner_model_param_));
    gbm_->Load(fi);
    if (mparam_.contain_extra_attrs != 0) {
@@ -925,7 +1040,14 @@ class LearnerIO : public LearnerConfiguration {
    }

    learner_model_param_ =
-        LearnerModelParam(mparam_, obj_->ProbToMargin(mparam_.base_score), obj_->Task());
+        LearnerModelParam(&ctx_, mparam_,
+                          linalg::Tensor<float, 1>{{std::isnan(mparam_.base_score)
+                                                        ? std::numeric_limits<float>::quiet_NaN()
+                                                        : obj_->ProbToMargin(mparam_.base_score)},
+                                                   {1},
+                                                   Context::kCpuId},
+                          obj_->Task());
+
    if (attributes_.find("objective") != attributes_.cend()) {
      auto obj_str = attributes_.at("objective");
      auto j_obj = Json::Load({obj_str.c_str(), obj_str.size()});
@@ -969,6 +1091,8 @@ class LearnerIO : public LearnerConfiguration {
  // Save model into binary format.  The code is about to be deprecated by more robust
  // JSON serialization format.
  void SaveModel(dmlc::Stream* fo) const override {
+    this->CheckModelInitialized();
+
    LearnerModelParamLegacy mparam = mparam_;  // make a copy to potentially modify
    std::vector<std::pair<std::string, std::string> > extra_attr;
    mparam.contain_extra_attrs = 1;
@@ -1000,6 +1124,7 @@ class LearnerIO : public LearnerConfiguration {
      }
      extra_attr.emplace_back("metrics", os.str());
    }
+
    std::string header {"binf"};
    fo->Write(header.data(), 4);
    if (DMLC_IO_NO_ENDIAN_SWAP) {
@@ -1022,6 +1147,8 @@ class LearnerIO : public LearnerConfiguration {
  }

  void Save(dmlc::Stream* fo) const override {
+    this->CheckModelInitialized();
+
    Json memory_snapshot{Object()};
    memory_snapshot["Model"] = Object();
    auto& model = memory_snapshot["Model"];
@@ -1108,28 +1235,30 @@ class LearnerImpl : public LearnerIO {
    }
  }

-  std::vector<std::string> DumpModel(const FeatureMap& fmap,
-                                     bool with_stats,
+  std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,
                                     std::string format) override {
    this->Configure();
+    this->CheckModelInitialized();
+
    return gbm_->DumpModel(fmap, with_stats, format);
  }

-  Learner *Slice(int32_t begin_layer, int32_t end_layer, int32_t step,
-                 bool *out_of_bound) override {
+  Learner* Slice(int32_t begin_layer, int32_t end_layer, int32_t step,
+                 bool* out_of_bound) override {
    this->Configure();
+    this->CheckModelInitialized();
+
    CHECK_NE(this->learner_model_param_.num_feature, 0);
    CHECK_GE(begin_layer, 0);
-    auto *out_impl = new LearnerImpl({});
-    out_impl->learner_model_param_ = this->learner_model_param_;
-    out_impl->generic_parameters_ = this->generic_parameters_;
+    auto* out_impl = new LearnerImpl({});
+    out_impl->learner_model_param_.Copy(this->learner_model_param_);
+    out_impl->ctx_ = this->ctx_;
    auto gbm = std::unique_ptr<GradientBooster>(GradientBooster::Create(
-        this->tparam_.booster, &out_impl->generic_parameters_,
-        &out_impl->learner_model_param_));
+        this->tparam_.booster, &out_impl->ctx_, &out_impl->learner_model_param_));
    this->gbm_->Slice(begin_layer, end_layer, step, gbm.get(), out_of_bound);
    out_impl->gbm_ = std::move(gbm);

-    Json config { Object() };
+    Json config{Object()};
    this->SaveConfig(&config);
    out_impl->mparam_ = this->mparam_;
    out_impl->attributes_ = this->attributes_;
@@ -1156,15 +1285,17 @@ class LearnerImpl : public LearnerIO {
    monitor_.Start("UpdateOneIter");
    TrainingObserver::Instance().Update(iter);
    this->Configure();
-    if (generic_parameters_.seed_per_iteration) {
-      common::GlobalRandom().seed(generic_parameters_.seed * kRandSeedMagic + iter);
+    this->InitBaseScore(train.get());
+
+    if (ctx_.seed_per_iteration) {
+      common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);
    }

    this->CheckDataSplitMode();
    this->ValidateDMatrix(train.get(), true);

    auto local_cache = this->GetPredictionCache();
-    auto& predt = local_cache->Cache(train, generic_parameters_.gpu_id);
+    auto& predt = local_cache->Cache(train, ctx_.gpu_id);

    monitor_.Start("PredictRaw");
    this->PredictRaw(train.get(), &predt, true, 0, 0);
@@ -1184,14 +1315,18 @@ class LearnerImpl : public LearnerIO {
                    HostDeviceVector<GradientPair>* in_gpair) override {
    monitor_.Start("BoostOneIter");
    this->Configure();
-    if (generic_parameters_.seed_per_iteration) {
-      common::GlobalRandom().seed(generic_parameters_.seed * kRandSeedMagic + iter);
+    // Should have been set to default in the first prediction.
+    CHECK(mparam_.base_score_estimated);
+
+    if (ctx_.seed_per_iteration) {
+      common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);
    }

    this->CheckDataSplitMode();
    this->ValidateDMatrix(train.get(), true);
+
    auto local_cache = this->GetPredictionCache();
-    local_cache->Cache(train, generic_parameters_.gpu_id);
+    local_cache->Cache(train, ctx_.gpu_id);

    gbm_->DoBoost(train.get(), in_gpair, &local_cache->Entry(train.get()), obj_.get());
    monitor_.Stop("BoostOneIter");
@@ -1202,23 +1337,24 @@ class LearnerImpl : public LearnerIO {
                          const std::vector<std::string>& data_names) override {
    monitor_.Start("EvalOneIter");
    this->Configure();
+    this->CheckModelInitialized();

    std::ostringstream os;
    os.precision(std::numeric_limits<double>::max_digits10);
    os << '[' << iter << ']' << std::setiosflags(std::ios::fixed);
    if (metrics_.size() == 0 && tparam_.disable_default_eval_metric <= 0) {
-      metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric(), &generic_parameters_));
+      metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric(), &ctx_));
      metrics_.back()->Configure({cfg_.begin(), cfg_.end()});
    }

    auto local_cache = this->GetPredictionCache();
    for (size_t i = 0; i < data_sets.size(); ++i) {
      std::shared_ptr<DMatrix> m = data_sets[i];
-      auto &predt = local_cache->Cache(m, generic_parameters_.gpu_id);
+      auto &predt = local_cache->Cache(m, ctx_.gpu_id);
      this->ValidateDMatrix(m.get(), false);
      this->PredictRaw(m.get(), &predt, false, 0, 0);

-      auto &out = output_predictions_.Cache(m, generic_parameters_.gpu_id).predictions;
+      auto &out = output_predictions_.Cache(m, ctx_.gpu_id).predictions;
      out.Resize(predt.predictions.Size());
      out.Copy(predt.predictions);

@@ -1241,6 +1377,9 @@ class LearnerImpl : public LearnerIO {
                               static_cast<int>(pred_interactions) +
                               static_cast<int>(pred_contribs);
    this->Configure();
+    this->InitBaseScore(nullptr);
+    this->CheckModelInitialized();
+
    CHECK_LE(multiple_predictions, 1) << "Perform one kind of prediction at a time.";
    if (pred_contribs) {
      gbm_->PredictContribution(data.get(), out_preds, layer_begin, layer_end, approx_contribs);
@@ -1251,10 +1390,10 @@ class LearnerImpl : public LearnerIO {
      gbm_->PredictLeaf(data.get(), out_preds, layer_begin, layer_end);
    } else {
      auto local_cache = this->GetPredictionCache();
-      auto& prediction = local_cache->Cache(data, generic_parameters_.gpu_id);
+      auto& prediction = local_cache->Cache(data, ctx_.gpu_id);
      this->PredictRaw(data.get(), &prediction, training, layer_begin, layer_end);
      // Copy the prediction cache to output prediction. out_preds comes from C API
-      out_preds->SetDevice(generic_parameters_.gpu_id);
+      out_preds->SetDevice(ctx_.gpu_id);
      out_preds->Resize(prediction.predictions.Size());
      out_preds->Copy(prediction.predictions);
      if (!output_margin) {
@@ -1268,8 +1407,10 @@ class LearnerImpl : public LearnerIO {
    CHECK(!this->need_configuration_);
    return this->gbm_->BoostedRounds();
  }
+
  uint32_t Groups() const override {
    CHECK(!this->need_configuration_);
+    this->CheckModelInitialized();
    return this->learner_model_param_.num_output_group;
  }

@@ -1281,6 +1422,9 @@ class LearnerImpl : public LearnerIO {
                      HostDeviceVector<bst_float>** out_preds, uint32_t iteration_begin,
                      uint32_t iteration_end) override {
    this->Configure();
+    this->InitBaseScore(nullptr);
+    this->CheckModelInitialized();
+
    auto& out_predictions = this->GetThreadLocal().prediction_entry;
    this->gbm_->InplacePredict(p_m, missing, &out_predictions, iteration_begin, iteration_end);
    if (type == PredictionType::kValue) {
@@ -1296,6 +1440,8 @@ class LearnerImpl : public LearnerIO {
  void CalcFeatureScore(std::string const& importance_type, common::Span<int32_t const> trees,
                        std::vector<bst_feature_t>* features, std::vector<float>* scores) override {
    this->Configure();
+    this->CheckModelInitialized();
+
    gbm_->FeatureScore(importance_type, trees, features, scores);
  }

@@ -1315,17 +1461,17 @@ class LearnerImpl : public LearnerIO {
  void PredictRaw(DMatrix *data, PredictionCacheEntry *out_preds, bool training,
                  unsigned layer_begin, unsigned layer_end) const {
    CHECK(gbm_ != nullptr) << "Predict must happen after Load or configuration";
+    this->CheckModelInitialized();
    this->ValidateDMatrix(data, false);
    gbm_->PredictBatch(data, out_preds, training, layer_begin, layer_end);
  }

  void ValidateDMatrix(DMatrix* p_fmat, bool is_training) const {
    MetaInfo const& info = p_fmat->Info();
-    info.Validate(generic_parameters_.gpu_id);
+    info.Validate(ctx_.gpu_id);

    auto const row_based_split = [this]() {
-      return tparam_.dsplit == DataSplitMode::kRow ||
-             tparam_.dsplit == DataSplitMode::kAuto;
+      return tparam_.dsplit == DataSplitMode::kRow || tparam_.dsplit == DataSplitMode::kAuto;
    };
    if (row_based_split()) {
      if (is_training) {