Pass pointer to model parameters. (#5101)

* Pass pointer to model parameters. This PR de-duplicates most of the model parameters except the one in `tree_model.h`. One difficulty is `base_score` is a model property but can be changed at runtime by objective function. Hence when performing model IO, we need to save the one provided by users, instead of the one transformed by objective. Here we created an immutable version of `LearnerModelParam` that represents the value of model parameter after configuration.
2019-12-10 12:11:22 +08:00
parent 979f74d51a
commit e089e16e3d
33 changed files with 623 additions and 404 deletions
--- a/src/gbm/gblinear.cc
+++ b/src/gbm/gblinear.cc
@@ -7,15 +7,18 @@
 */
 #include <dmlc/omp.h>
 #include <dmlc/parameter.h>
-#include <xgboost/gbm.h>
-#include <xgboost/logging.h>
-#include <xgboost/linear_updater.h>

 #include <vector>
 #include <string>
 #include <sstream>
 #include <algorithm>

+#include "xgboost/gbm.h"
+#include "xgboost/json.h"
+#include "xgboost/linear_updater.h"
+#include "xgboost/logging.h"
+#include "xgboost/learner.h"
+
 #include "gblinear_model.h"
 #include "../common/timer.h"

@@ -48,8 +51,10 @@ struct GBLinearTrainParam : public XGBoostParameter<GBLinearTrainParam> {
 class GBLinear : public GradientBooster {
 public:
  explicit GBLinear(const std::vector<std::shared_ptr<DMatrix> > &cache,
-                    bst_float base_margin)
-      : base_margin_(base_margin),
+                    LearnerModelParam const* learner_model_param)
+      : learner_model_param_{learner_model_param},
+        model_{learner_model_param_},
+        previous_model_{learner_model_param_},
        sum_instance_weight_(0),
        sum_weight_complete_(false),
        is_converged_(false) {
@@ -62,7 +67,7 @@ class GBLinear : public GradientBooster {
  }
  void Configure(const Args& cfg) override {
    if (model_.weight.size() == 0) {
-      model_.param.InitAllowUnknown(cfg);
+      model_.Configure(cfg);
    }
    param_.UpdateAllowUnknown(cfg);
    updater_.reset(LinearUpdater::Create(param_.updater, generic_param_));
@@ -116,11 +121,12 @@ class GBLinear : public GradientBooster {
  }
  // add base margin
  void PredictInstance(const SparsePage::Inst &inst,
-               std::vector<bst_float> *out_preds,
-               unsigned ntree_limit) override {
-    const int ngroup = model_.param.num_output_group;
+                       std::vector<bst_float> *out_preds,
+                       unsigned ntree_limit) override {
+    const int ngroup = model_.learner_model_param_->num_output_group;
    for (int gid = 0; gid < ngroup; ++gid) {
-      this->Pred(inst, dmlc::BeginPtr(*out_preds), gid, base_margin_);
+      this->Pred(inst, dmlc::BeginPtr(*out_preds), gid,
+                 learner_model_param_->base_score);
    }
  }

@@ -138,8 +144,8 @@ class GBLinear : public GradientBooster {
    CHECK_EQ(ntree_limit, 0U)
        << "GBLinear::PredictContribution: ntrees is only valid for gbtree predictor";
    const auto& base_margin = p_fmat->Info().base_margin_.ConstHostVector();
-    const int ngroup = model_.param.num_output_group;
-    const size_t ncolumns = model_.param.num_feature + 1;
+    const int ngroup = model_.learner_model_param_->num_output_group;
+    const size_t ncolumns = model_.learner_model_param_->num_feature + 1;
    // allocate space for (#features + bias) times #groups times #rows
    std::vector<bst_float>& contribs = *out_contribs;
    contribs.resize(p_fmat->Info().num_row_ * ncolumns * ngroup);
@@ -149,35 +155,38 @@ class GBLinear : public GradientBooster {
    for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
      // parallel over local batch
      const auto nsize = static_cast<bst_omp_uint>(batch.Size());
-      #pragma omp parallel for schedule(static)
+#pragma omp parallel for schedule(static)
      for (bst_omp_uint i = 0; i < nsize; ++i) {
-         auto inst = batch[i];
+        auto inst = batch[i];
        auto row_idx = static_cast<size_t>(batch.base_rowid + i);
        // loop over output groups
        for (int gid = 0; gid < ngroup; ++gid) {
          bst_float *p_contribs = &contribs[(row_idx * ngroup + gid) * ncolumns];
          // calculate linear terms' contributions
          for (auto& ins : inst) {
-            if (ins.index >= model_.param.num_feature) continue;
+            if (ins.index >= model_.learner_model_param_->num_feature) continue;
            p_contribs[ins.index] = ins.fvalue * model_[ins.index][gid];
          }
          // add base margin to BIAS
          p_contribs[ncolumns - 1] = model_.bias()[gid] +
-            ((base_margin.size() != 0) ? base_margin[row_idx * ngroup + gid] : base_margin_);
+            ((base_margin.size() != 0) ? base_margin[row_idx * ngroup + gid] :
+                                         learner_model_param_->base_score);
        }
      }
    }
  }

  void PredictInteractionContributions(DMatrix* p_fmat,
-                           std::vector<bst_float>* out_contribs,
-                           unsigned ntree_limit, bool approximate) override {
-                             std::vector<bst_float>& contribs = *out_contribs;
+                                       std::vector<bst_float>* out_contribs,
+                                       unsigned ntree_limit, bool approximate) override {
+    std::vector<bst_float>& contribs = *out_contribs;

-     // linear models have no interaction effects
-     const size_t nelements = model_.param.num_feature*model_.param.num_feature;
-     contribs.resize(p_fmat->Info().num_row_ * nelements * model_.param.num_output_group);
-     std::fill(contribs.begin(), contribs.end(), 0);
+    // linear models have no interaction effects
+    const size_t nelements = model_.learner_model_param_->num_feature *
+                             model_.learner_model_param_->num_feature;
+    contribs.resize(p_fmat->Info().num_row_ * nelements *
+                    model_.learner_model_param_->num_output_group);
+    std::fill(contribs.begin(), contribs.end(), 0);
  }

  std::vector<std::string> DumpModel(const FeatureMap& fmap,
@@ -196,26 +205,26 @@ class GBLinear : public GradientBooster {

 protected:
  void PredictBatchInternal(DMatrix *p_fmat,
-               std::vector<bst_float> *out_preds) {
+                            std::vector<bst_float> *out_preds) {
    monitor_.Start("PredictBatchInternal");
      model_.LazyInitModel();
    std::vector<bst_float> &preds = *out_preds;
    const auto& base_margin = p_fmat->Info().base_margin_.ConstHostVector();
    // start collecting the prediction
-    const int ngroup = model_.param.num_output_group;
+    const int ngroup = model_.learner_model_param_->num_output_group;
    preds.resize(p_fmat->Info().num_row_ * ngroup);
    for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
      // output convention: nrow * k, where nrow is number of rows
      // k is number of group
      // parallel over local batch
      const auto nsize = static_cast<omp_ulong>(batch.Size());
-      #pragma omp parallel for schedule(static)
+#pragma omp parallel for schedule(static)
      for (omp_ulong i = 0; i < nsize; ++i) {
        const size_t ridx = batch.base_rowid + i;
        // loop over output groups
        for (int gid = 0; gid < ngroup; ++gid) {
          bst_float margin =  (base_margin.size() != 0) ?
-              base_margin[ridx * ngroup + gid] : base_margin_;
+              base_margin[ridx * ngroup + gid] : learner_model_param_->base_score;
          this->Pred(batch[i], &preds[ridx * ngroup], gid, margin);
        }
      }
@@ -227,7 +236,7 @@ class GBLinear : public GradientBooster {
    for (auto &kv : cache_) {
      PredictionCacheEntry &e = kv.second;
      if (e.predictions.size() == 0) {
-        size_t n = model_.param.num_output_group * e.data->Info().num_row_;
+        size_t n = model_.learner_model_param_->num_output_group * e.data->Info().num_row_;
        e.predictions.resize(n);
      }
      this->PredictBatchInternal(e.data.get(), &e.predictions);
@@ -262,18 +271,18 @@ class GBLinear : public GradientBooster {
    }
  }

-  inline void Pred(const SparsePage::Inst &inst, bst_float *preds, int gid,
-                   bst_float base) {
+  void Pred(const SparsePage::Inst &inst, bst_float *preds, int gid,
+            bst_float base) {
    bst_float psum = model_.bias()[gid] + base;
    for (const auto& ins : inst) {
-      if (ins.index >= model_.param.num_feature) continue;
+      if (ins.index >= model_.learner_model_param_->num_feature) continue;
      psum += ins.fvalue * model_[ins.index][gid];
    }
    preds[gid] = psum;
  }

  // biase margin score
-  bst_float base_margin_;
+  LearnerModelParam const* learner_model_param_;
  // model field
  GBLinearModel model_;
  GBLinearModel previous_model_;
@@ -302,14 +311,13 @@ class GBLinear : public GradientBooster {
 };

 // register the objective functions
-DMLC_REGISTER_PARAMETER(GBLinearModelParam);
 DMLC_REGISTER_PARAMETER(GBLinearTrainParam);

 XGBOOST_REGISTER_GBM(GBLinear, "gblinear")
    .describe("Linear booster, implement generalized linear model.")
    .set_body([](const std::vector<std::shared_ptr<DMatrix> > &cache,
-                 bst_float base_margin) {
-      return new GBLinear(cache, base_margin);
+                 LearnerModelParam const* booster_config) {
+      return new GBLinear(cache, booster_config);
    });
 }  // namespace gbm
 }  // namespace xgboost
--- a/src/gbm/gblinear_model.h
+++ b/src/gbm/gblinear_model.h
@@ -1,51 +1,65 @@
 /*!
- * Copyright by Contributors 2018
+ * Copyright 2018-2019 by Contributors
 */
 #pragma once
 #include <dmlc/io.h>
 #include <dmlc/parameter.h>
-#include <xgboost/base.h>
-#include <xgboost/feature_map.h>
-#include <xgboost/model.h>
+#include <xgboost/learner.h>
+
 #include <vector>
 #include <string>
 #include <cstring>

+#include "xgboost/base.h"
+#include "xgboost/feature_map.h"
+#include "xgboost/model.h"
+#include "xgboost/json.h"
+#include "xgboost/parameter.h"
+
 namespace xgboost {
+class Json;
 namespace gbm {
-// model parameter
-struct GBLinearModelParam : public dmlc::Parameter<GBLinearModelParam> {
+// Deprecated in 1.0.0. model parameter.  Only staying here for compatible binary model IO.
+struct DeprecatedGBLinearModelParam : public dmlc::Parameter<DeprecatedGBLinearModelParam> {
  // number of feature dimension
-  unsigned num_feature;
-  // number of output group
-  int num_output_group;
+  uint32_t deprecated_num_feature;
+  // deprecated. use learner_model_param_->num_output_group.
+  int32_t deprecated_num_output_group;
  // reserved field
-  int reserved[32];
+  int32_t reserved[32];
  // constructor
-  GBLinearModelParam() { std::memset(this, 0, sizeof(GBLinearModelParam)); }
-  DMLC_DECLARE_PARAMETER(GBLinearModelParam) {
-    DMLC_DECLARE_FIELD(num_feature)
-        .set_lower_bound(0)
-        .describe("Number of features used in classification.");
-    DMLC_DECLARE_FIELD(num_output_group)
-        .set_lower_bound(1)
-        .set_default(1)
-        .describe("Number of output groups in the setting.");
+  DeprecatedGBLinearModelParam() {
+    static_assert(sizeof(*this) == sizeof(int32_t) * 34,
+                  "Model parameter size can not be changed.");
+    std::memset(this, 0, sizeof(DeprecatedGBLinearModelParam));
  }
+
+  DMLC_DECLARE_PARAMETER(DeprecatedGBLinearModelParam) {}
 };

 // model for linear booster
 class GBLinearModel : public Model {
+ private:
+  // Deprecated in 1.0.0
+  DeprecatedGBLinearModelParam param;
+
 public:
-  // parameter
-  GBLinearModelParam param;
+  LearnerModelParam const* learner_model_param_;
+
+ public:
+  explicit GBLinearModel(LearnerModelParam const* learner_model_param) :
+      learner_model_param_ {learner_model_param} {}
+  void Configure(Args const &cfg) { }
+
  // weight for each of feature, bias is the last one
  std::vector<bst_float> weight;
  // initialize the model parameter
  inline void LazyInitModel() {
-    if (!weight.empty()) return;
+    if (!weight.empty())
+      return;
    // bias is the last weight
-    weight.resize((param.num_feature + 1) * param.num_output_group);
+    weight.resize((learner_model_param_->num_feature + 1) *
+                  learner_model_param_->num_output_group);
    std::fill(weight.begin(), weight.end(), 0.0f);
  }
  // save the model to file
@@ -70,33 +84,37 @@ class GBLinearModel : public Model {
  }

  // model bias
-  inline bst_float* bias() {
-    return &weight[param.num_feature * param.num_output_group];
+  inline bst_float *bias() {
+    return &weight[learner_model_param_->num_feature *
+                   learner_model_param_->num_output_group];
  }
-  inline const bst_float* bias() const {
-    return &weight[param.num_feature * param.num_output_group];
+  inline const bst_float *bias() const {
+    return &weight[learner_model_param_->num_feature *
+                   learner_model_param_->num_output_group];
  }
  // get i-th weight
-  inline bst_float* operator[](size_t i) {
-    return &weight[i * param.num_output_group];
+  inline bst_float *operator[](size_t i) {
+    return &weight[i * learner_model_param_->num_output_group];
  }
-  inline const bst_float* operator[](size_t i) const {
-    return &weight[i * param.num_output_group];
+  inline const bst_float *operator[](size_t i) const {
+    return &weight[i * learner_model_param_->num_output_group];
  }

-  std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,
+  std::vector<std::string> DumpModel(const FeatureMap &fmap, bool with_stats,
                                     std::string format) const {
-    const int ngroup = param.num_output_group;
-    const unsigned nfeature = param.num_feature;
+    const int ngroup = learner_model_param_->num_output_group;
+    const unsigned nfeature = learner_model_param_->num_feature;

    std::stringstream fo("");
    if (format == "json") {
      fo << "  { \"bias\": [" << std::endl;
      for (int gid = 0; gid < ngroup; ++gid) {
-        if (gid != 0) fo << "," << std::endl;
+        if (gid != 0)
+          fo << "," << std::endl;
        fo << "      " << this->bias()[gid];
      }
-      fo << std::endl << "    ]," << std::endl
+      fo << std::endl
+         << "    ]," << std::endl
         << "    \"weight\": [" << std::endl;
      for (unsigned i = 0; i < nfeature; ++i) {
        for (int gid = 0; gid < ngroup; ++gid) {
--- a/src/gbm/gbm.cc
+++ b/src/gbm/gbm.cc
@@ -3,8 +3,14 @@
 * \file gbm.cc
 * \brief Registry of gradient boosters.
 */
-#include <xgboost/gbm.h>
 #include <dmlc/registry.h>
+#include <string>
+#include <vector>
+#include <memory>
+
+#include "xgboost/gbm.h"
+#include "xgboost/learner.h"
+#include "xgboost/generic_parameters.h"

 namespace dmlc {
 DMLC_REGISTRY_ENABLE(::xgboost::GradientBoosterReg);
@@ -14,17 +20,16 @@ namespace xgboost {
 GradientBooster* GradientBooster::Create(
    const std::string& name,
    GenericParameter const* generic_param,
-    const std::vector<std::shared_ptr<DMatrix> >& cache_mats,
-    bst_float base_margin) {
+    LearnerModelParam const* learner_model_param,
+    const std::vector<std::shared_ptr<DMatrix> >& cache_mats) {
  auto *e = ::dmlc::Registry< ::xgboost::GradientBoosterReg>::Get()->Find(name);
  if (e == nullptr) {
    LOG(FATAL) << "Unknown gbm type " << name;
  }
-  auto p_bst =  (e->body)(cache_mats, base_margin);
+  auto p_bst =  (e->body)(cache_mats, learner_model_param);
  p_bst->generic_param_ = generic_param;
  return p_bst;
 }
-
 }  // namespace xgboost

 namespace xgboost {
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@@ -14,8 +14,9 @@
 #include <limits>
 #include <algorithm>

-#include "xgboost/logging.h"
 #include "xgboost/gbm.h"
+#include "xgboost/logging.h"
+#include "xgboost/json.h"
 #include "xgboost/predictor.h"
 #include "xgboost/tree_updater.h"
 #include "xgboost/host_device_vector.h"
@@ -26,7 +27,6 @@
 #include "../common/random.h"
 #include "../common/timer.h"

-
 namespace xgboost {
 namespace gbm {

@@ -161,10 +161,11 @@ void GBTree::ConfigureUpdaters() {
          "single updater grow_quantile_histmaker.";
      tparam_.updater_seq = "grow_quantile_histmaker";
      break;
-    case TreeMethod::kGPUHist:
+    case TreeMethod::kGPUHist: {
      this->AssertGPUSupport();
      tparam_.updater_seq = "grow_gpu_hist";
      break;
+    }
    default:
      LOG(FATAL) << "Unknown tree_method ("
                 << static_cast<int>(tparam_.tree_method) << ") detected";
@@ -175,9 +176,10 @@ void GBTree::DoBoost(DMatrix* p_fmat,
                     HostDeviceVector<GradientPair>* in_gpair,
                     ObjFunction* obj) {
  std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
-  const int ngroup = model_.param.num_output_group;
+  const int ngroup = model_.learner_model_param_->num_output_group;
  ConfigureWithKnownData(this->cfg_, p_fmat);
  monitor_.Start("BoostNewTrees");
+  CHECK_NE(ngroup, 0);
  if (ngroup == 1) {
    std::vector<std::unique_ptr<RegTree> > ret;
    BoostNewTrees(in_gpair, p_fmat, 0, &ret);
@@ -234,9 +236,11 @@ void GBTree::InitUpdater(Args const& cfg) {
        LOG(FATAL) << ss.str();
      }
    }
+    // Do not push new updater in.
    return;
  }

+  // create new updaters
  for (const std::string& pstr : ups) {
    std::unique_ptr<TreeUpdater> up(TreeUpdater::Create(pstr.c_str(), generic_param_));
    up->Configure(cfg);
@@ -255,7 +259,7 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
    if (tparam_.process_type == TreeProcessType::kDefault) {
      // create new tree
      std::unique_ptr<RegTree> ptr(new RegTree());
-      ptr->param.InitAllowUnknown(this->cfg_);
+      ptr->param.UpdateAllowUnknown(this->cfg_);
      new_trees.push_back(ptr.get());
      ret->push_back(std::move(ptr));
    } else if (tparam_.process_type == TreeProcessType::kUpdate) {
@@ -276,7 +280,7 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
 void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) {
  monitor_.Start("CommitModel");
  int num_new_trees = 0;
-  for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
+  for (uint32_t gid = 0; gid < model_.learner_model_param_->num_output_group; ++gid) {
    num_new_trees += new_trees[gid].size();
    model_.CommitModel(std::move(new_trees[gid]), gid);
  }
@@ -289,7 +293,8 @@ void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& ne
 // dart
 class Dart : public GBTree {
 public:
-  explicit Dart(bst_float base_margin) : GBTree(base_margin) {}
+  explicit Dart(LearnerModelParam const* booster_config) :
+      GBTree(booster_config) {}

  void Configure(const Args& cfg) override {
    GBTree::Configure(cfg);
@@ -305,7 +310,6 @@ class Dart : public GBTree {
      fi->Read(&weight_drop_);
    }
  }
-
  void Save(dmlc::Stream* fo) const override {
    GBTree::Save(fo);
    if (weight_drop_.size() != 0) {
@@ -326,18 +330,18 @@ class Dart : public GBTree {
    DropTrees(1);
    if (thread_temp_.size() == 0) {
      thread_temp_.resize(1, RegTree::FVec());
-      thread_temp_[0].Init(model_.param.num_feature);
+      thread_temp_[0].Init(model_.learner_model_param_->num_feature);
    }
-    out_preds->resize(model_.param.num_output_group);
-    ntree_limit *= model_.param.num_output_group;
+    out_preds->resize(model_.learner_model_param_->num_output_group);
+    ntree_limit *= model_.learner_model_param_->num_output_group;
    if (ntree_limit == 0 || ntree_limit > model_.trees.size()) {
      ntree_limit = static_cast<unsigned>(model_.trees.size());
    }
    // loop over output groups
-    for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
+    for (uint32_t gid = 0; gid < model_.learner_model_param_->num_output_group; ++gid) {
      (*out_preds)[gid] =
          PredValue(inst, gid, &thread_temp_[0], 0, ntree_limit) +
-          model_.base_margin;
+          model_.learner_model_param_->base_score;
    }
  }

@@ -362,6 +366,7 @@ class Dart : public GBTree {
                                                    ntree_limit, &weight_drop_, approximate);
  }

+
 protected:
  friend class GBTree;
  // internal prediction loop
@@ -373,7 +378,7 @@ class Dart : public GBTree {
      unsigned tree_begin,
      unsigned ntree_limit,
      bool init_out_preds) {
-    int num_group = model_.param.num_output_group;
+    int num_group = model_.learner_model_param_->num_output_group;
    ntree_limit *= num_group;
    if (ntree_limit == 0 || ntree_limit > model_.trees.size()) {
      ntree_limit = static_cast<unsigned>(model_.trees.size());
@@ -388,17 +393,12 @@ class Dart : public GBTree {
        CHECK_EQ(out_preds->size(), n);
        std::copy(base_margin.begin(), base_margin.end(), out_preds->begin());
      } else {
-        std::fill(out_preds->begin(), out_preds->end(), model_.base_margin);
+        std::fill(out_preds->begin(), out_preds->end(),
+                  model_.learner_model_param_->base_score);
      }
    }
-
-    if (num_group == 1) {
-      PredLoopSpecalize<Derived>(p_fmat, out_preds, 1,
-                                 tree_begin, ntree_limit);
-    } else {
-      PredLoopSpecalize<Derived>(p_fmat, out_preds, num_group,
-                                 tree_begin, ntree_limit);
-    }
+    PredLoopSpecalize<Derived>(p_fmat, out_preds, num_group, tree_begin,
+                               ntree_limit);
  }

  template<typename Derived>
@@ -409,7 +409,7 @@ class Dart : public GBTree {
      unsigned tree_begin,
      unsigned tree_end) {
    const int nthread = omp_get_max_threads();
-    CHECK_EQ(num_group, model_.param.num_output_group);
+    CHECK_EQ(num_group, model_.learner_model_param_->num_output_group);
    InitThreadTemp(nthread);
    std::vector<bst_float>& preds = *out_preds;
    CHECK_EQ(model_.param.size_leaf_vector, 0)
@@ -443,6 +443,7 @@ class Dart : public GBTree {
          }
        }
      }
+
      for (bst_omp_uint i = nsize - rest; i < nsize; ++i) {
        RegTree::FVec& feats = thread_temp_[0];
        const auto ridx = static_cast<int64_t>(batch.base_rowid + i);
@@ -461,7 +462,7 @@ class Dart : public GBTree {
  void
  CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) override {
    int num_new_trees = 0;
-    for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
+    for (uint32_t gid = 0; gid < model_.learner_model_param_->num_output_group; ++gid) {
      num_new_trees += new_trees[gid].size();
      model_.CommitModel(std::move(new_trees[gid]), gid);
    }
@@ -480,7 +481,7 @@ class Dart : public GBTree {
    p_feats->Fill(inst);
    for (size_t i = tree_begin; i < tree_end; ++i) {
      if (model_.tree_info[i] == bst_group) {
-        bool drop = (std::binary_search(idx_drop_.begin(), idx_drop_.end(), i));
+        bool drop = std::binary_search(idx_drop_.begin(), idx_drop_.end(), i);
        if (!drop) {
          int tid = model_.trees[i]->GetLeafIndex(*p_feats);
          psum += weight_drop_[i] * (*model_.trees[i])[tid].LeafValue();
@@ -577,7 +578,7 @@ class Dart : public GBTree {
    if (prev_thread_temp_size < nthread) {
      thread_temp_.resize(nthread, RegTree::FVec());
      for (int i = prev_thread_temp_size; i < nthread; ++i) {
-        thread_temp_[i].Init(model_.param.num_feature);
+        thread_temp_[i].Init(model_.learner_model_param_->num_feature);
      }
    }
  }
@@ -600,15 +601,17 @@ DMLC_REGISTER_PARAMETER(DartTrainParam);

 XGBOOST_REGISTER_GBM(GBTree, "gbtree")
 .describe("Tree booster, gradient boosted trees.")
-.set_body([](const std::vector<std::shared_ptr<DMatrix> >& cached_mats, bst_float base_margin) {
-    auto* p = new GBTree(base_margin);
+.set_body([](const std::vector<std::shared_ptr<DMatrix> >& cached_mats,
+             LearnerModelParam const* booster_config) {
+    auto* p = new GBTree(booster_config);
    p->InitCache(cached_mats);
    return p;
  });
 XGBOOST_REGISTER_GBM(Dart, "dart")
 .describe("Tree booster, dart.")
-.set_body([](const std::vector<std::shared_ptr<DMatrix> >& cached_mats, bst_float base_margin) {
-    GBTree* p = new Dart(base_margin);
+.set_body([](const std::vector<std::shared_ptr<DMatrix> >& cached_mats,
+             LearnerModelParam const* booster_config) {
+    GBTree* p = new Dart(booster_config);
    return p;
  });
 }  // namespace gbm
--- a/src/gbm/gbtree.h
+++ b/src/gbm/gbtree.h
@@ -8,7 +8,6 @@
 #define XGBOOST_GBM_GBTREE_H_

 #include <dmlc/omp.h>
-#include <dmlc/parameter.h>

 #include <vector>
 #include <map>
@@ -86,7 +85,6 @@ struct GBTreeTrainParam : public XGBoostParameter<GBTreeTrainParam> {
        .add_enum("update", TreeProcessType::kUpdate)
        .describe("Whether to run the normal boosting process that creates new trees,"\
                  " or to update the trees in an existing model.");
-    // add alias
    DMLC_DECLARE_ALIAS(updater_seq, updater);
    DMLC_DECLARE_FIELD(predictor)
        .set_default(PredictorType::kAuto)
@@ -153,7 +151,7 @@ struct DartTrainParam : public XGBoostParameter<DartTrainParam> {
 // gradient boosted trees
 class GBTree : public GradientBooster {
 public:
-  explicit GBTree(bst_float base_margin) : model_(base_margin) {}
+  explicit GBTree(LearnerModelParam const* booster_config) : model_(booster_config) {}

  void InitCache(const std::vector<std::shared_ptr<DMatrix> > &cache) {
    cache_ = std::make_shared<std::unordered_map<DMatrix*, PredictionCacheEntry>>();
@@ -181,24 +179,21 @@ class GBTree : public GradientBooster {
        tparam_.tree_method == TreeMethod::kGPUHist;
  }

-  void Load(dmlc::Stream* fi) override {
-    model_.Load(fi);
-
-    this->cfg_.clear();
-    this->cfg_.emplace_back(std::string("num_feature"),
-                            common::ToString(model_.param.num_feature));
-  }
-
  GBTreeTrainParam const& GetTrainParam() const {
    return tparam_;
  }

+  void Load(dmlc::Stream* fi) override {
+    model_.Load(fi);
+    this->cfg_.clear();
+  }
+
  void Save(dmlc::Stream* fo) const override {
    model_.Save(fo);
  }

  bool AllowLazyCheckPoint() const override {
-    return model_.param.num_output_group == 1 ||
+    return model_.learner_model_param_->num_output_group == 1 ||
        tparam_.updater_seq.find("distcol") != std::string::npos;
  }

--- a/src/gbm/gbtree_model.h
+++ b/src/gbm/gbtree_model.h
@@ -1,11 +1,16 @@
 /*!
- * Copyright by Contributors 2017
+ * Copyright 2017-2019 by Contributors
+ * \file gbtree_model.h
 */
-#pragma once
+#ifndef XGBOOST_GBM_GBTREE_MODEL_H_
+#define XGBOOST_GBM_GBTREE_MODEL_H_
+
 #include <dmlc/parameter.h>
 #include <dmlc/io.h>
 #include <xgboost/model.h>
 #include <xgboost/tree_model.h>
+#include <xgboost/parameter.h>
+#include <xgboost/learner.h>

 #include <memory>
 #include <utility>
@@ -13,45 +18,42 @@
 #include <vector>

 namespace xgboost {
+
+class Json;
+
 namespace gbm {
 /*! \brief model parameters */
 struct GBTreeModelParam : public dmlc::Parameter<GBTreeModelParam> {
+ public:
  /*! \brief number of trees */
-  int num_trees;
+  int32_t num_trees;
  /*! \brief (Deprecated) number of roots */
-  int deprecated_num_roots;
+  int32_t deprecated_num_roots;
  /*! \brief number of features to be used by trees */
-  int num_feature;
+  int32_t deprecated_num_feature;
  /*! \brief pad this space, for backward compatibility reason.*/
-  int pad_32bit;
+  int32_t pad_32bit;
  /*! \brief deprecated padding space. */
-  int64_t num_pbuffer_deprecated;
-  /*!
-   * \brief how many output group a single instance can produce
-   *  this affects the behavior of number of output we have:
-   *    suppose we have n instance and k group, output will be k * n
-   */
-  int num_output_group;
+  int64_t deprecated_num_pbuffer;
+  // deprecated. use learner_model_param_->num_output_group.
+  int32_t deprecated_num_output_group;
  /*! \brief size of leaf vector needed in tree */
-  int size_leaf_vector;
+  int32_t size_leaf_vector;
  /*! \brief reserved parameters */
-  int reserved[32];
+  int32_t reserved[32];
+
  /*! \brief constructor */
  GBTreeModelParam() {
-    std::memset(this, 0, sizeof(GBTreeModelParam));
-    static_assert(sizeof(GBTreeModelParam) == (4 + 2 + 2 + 32) * sizeof(int),
+    std::memset(this, 0, sizeof(GBTreeModelParam));  // FIXME(trivialfis): Why?
+    static_assert(sizeof(GBTreeModelParam) == (4 + 2 + 2 + 32) * sizeof(int32_t),
                  "64/32 bit compatibility issue");
  }
+
  // declare parameters, only declare those that need to be set.
  DMLC_DECLARE_PARAMETER(GBTreeModelParam) {
-    DMLC_DECLARE_FIELD(num_output_group)
-        .set_lower_bound(1)
-        .set_default(1)
-        .describe(
-            "Number of output groups to be predicted,"
-            " used for multi-class classification.");
-    DMLC_DECLARE_FIELD(num_feature)
+    DMLC_DECLARE_FIELD(num_trees)
        .set_lower_bound(0)
+        .set_default(0)
        .describe("Number of features used for training and prediction.");
    DMLC_DECLARE_FIELD(size_leaf_vector)
        .set_lower_bound(0)
@@ -61,11 +63,13 @@ struct GBTreeModelParam : public dmlc::Parameter<GBTreeModelParam> {
 };

 struct GBTreeModel : public Model {
-  explicit GBTreeModel(bst_float base_margin) : base_margin(base_margin) {}
+ public:
+  explicit GBTreeModel(LearnerModelParam const* learner_model_param) :
+      learner_model_param_{learner_model_param} {}
  void Configure(const Args& cfg) {
    // initialize model parameters if not yet been initialized.
    if (trees.size() == 0) {
-      param.InitAllowUnknown(cfg);
+      param.UpdateAllowUnknown(cfg);
    }
  }

@@ -136,7 +140,7 @@ struct GBTreeModel : public Model {
  }

  // base margin
-  bst_float base_margin;
+  LearnerModelParam const* learner_model_param_;
  // model parameter
  GBTreeModelParam param;
  /*! \brief vector of trees stored in the model */
@@ -148,3 +152,5 @@ struct GBTreeModel : public Model {
 };
 }  // namespace gbm
 }  // namespace xgboost
+
+#endif  // XGBOOST_GBM_GBTREE_MODEL_H_