diff --git a/cmake/Version.cmake b/cmake/Version.cmake
index a7dfb1c68..f38ce3ce3 100644
--- a/cmake/Version.cmake
+++ b/cmake/Version.cmake
@@ -5,5 +5,5 @@ function (write_version)
     ${xgboost_SOURCE_DIR}/include/xgboost/version_config.h @ONLY)
   configure_file(
     ${xgboost_SOURCE_DIR}/cmake/Python_version.in
-    ${xgboost_SOURCE_DIR}/python-package/xgboost/VERSION)
+    ${xgboost_SOURCE_DIR}/python-package/xgboost/VERSION @ONLY)
 endfunction (write_version)
diff --git a/include/xgboost/base.h b/include/xgboost/base.h
index 1a4df84c0..9b8af5f17 100644
--- a/include/xgboost/base.h
+++ b/include/xgboost/base.h
@@ -112,7 +112,7 @@ using bst_float = float;  // NOLINT
 
 /*! \brief Type for data column (feature) index. */
 using bst_feature_t = uint32_t;  // NOLINT
-/*! \breif Type for data row index.
+/*! \brief Type for data row index.
  *
  * Be careful `std::size_t' is implementation-defined.  Meaning that the binary
  * representation of DMatrix might not be portable across platform.  Booster model should
diff --git a/include/xgboost/data.h b/include/xgboost/data.h
index ba4a73f70..1587507f3 100644
--- a/include/xgboost/data.h
+++ b/include/xgboost/data.h
@@ -22,8 +22,6 @@
 #include <vector>
 
 namespace xgboost {
-// forward declare learner.
-class LearnerImpl;
 // forward declare dmatrix.
 class DMatrix;
 
diff --git a/include/xgboost/gbm.h b/include/xgboost/gbm.h
index 9779c76d9..7eff453df 100644
--- a/include/xgboost/gbm.h
+++ b/include/xgboost/gbm.h
@@ -11,10 +11,8 @@
 #include <dmlc/registry.h>
 #include <xgboost/base.h>
 #include <xgboost/data.h>
-#include <xgboost/objective.h>
-#include <xgboost/feature_map.h>
-#include <xgboost/generic_parameters.h>
 #include <xgboost/host_device_vector.h>
+#include <xgboost/model.h>
 
 #include <vector>
 #include <utility>
@@ -23,6 +21,14 @@
 #include <memory>
 
 namespace xgboost {
+
+class Json;
+class FeatureMap;
+class ObjFunction;
+
+struct GenericParameter;
+struct LearnerModelParam;
+
 /*!
  * \brief interface of gradient boosting model.
  */
@@ -117,13 +123,14 @@ class GradientBooster {
    * \param condition_feature feature to condition on (i.e. fix) during calculations
    */
   virtual void PredictContribution(DMatrix* dmat,
-                           std::vector<bst_float>* out_contribs,
-                           unsigned ntree_limit = 0, bool approximate = false,
-                           int condition = 0, unsigned condition_feature = 0) = 0;
+                                   std::vector<bst_float>* out_contribs,
+                                   unsigned ntree_limit = 0,
+                                   bool approximate = false, int condition = 0,
+                                   unsigned condition_feature = 0) = 0;
 
   virtual void PredictInteractionContributions(DMatrix* dmat,
-                           std::vector<bst_float>* out_contribs,
-                           unsigned ntree_limit, bool approximate) = 0;
+                                               std::vector<bst_float>* out_contribs,
+                                               unsigned ntree_limit, bool approximate) = 0;
 
   /*!
    * \brief dump the model in the requested format
@@ -136,21 +143,22 @@ class GradientBooster {
                                              bool with_stats,
                                              std::string format) const = 0;
   /*!
-   * \brief Whether the current booster use GPU.
+   * \brief Whether the current booster uses GPU.
    */
   virtual bool UseGPU() const = 0;
   /*!
    * \brief create a gradient booster from given name
    * \param name name of gradient booster
+   * \param generic_param Pointer to runtime parameters
+   * \param learner_model_param pointer to global model parameters
    * \param cache_mats The cache data matrix of the Booster.
-   * \param base_margin The base margin of prediction.
    * \return The created booster.
    */
   static GradientBooster* Create(
       const std::string& name,
-      GenericParameter const* gparam,
-      const std::vector<std::shared_ptr<DMatrix> >& cache_mats,
-      bst_float base_margin);
+      GenericParameter const* generic_param,
+      LearnerModelParam const* learner_model_param,
+      const std::vector<std::shared_ptr<DMatrix> >& cache_mats);
 
   static void AssertGPUSupport() {
 #ifndef XGBOOST_USE_CUDA
@@ -166,7 +174,7 @@ struct GradientBoosterReg
     : public dmlc::FunctionRegEntryBase<
   GradientBoosterReg,
   std::function<GradientBooster* (const std::vector<std::shared_ptr<DMatrix> > &cached_mats,
-                                  bst_float base_margin)> > {
+                                  LearnerModelParam const* learner_model_param)> > {
 };
 
 /*!
diff --git a/include/xgboost/generic_parameters.h b/include/xgboost/generic_parameters.h
index 17b4225b1..90dcc0f1c 100644
--- a/include/xgboost/generic_parameters.h
+++ b/include/xgboost/generic_parameters.h
@@ -13,8 +13,9 @@
 namespace xgboost {
 struct GenericParameter : public XGBoostParameter<GenericParameter> {
   // Constant representing the device ID of CPU.
-  static int constexpr kCpuId = -1;
+  static int32_t constexpr kCpuId = -1;
 
+ public:
   // stored random seed
   int seed;
   // whether seed the PRNG each iteration
@@ -26,8 +27,7 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
   int gpu_id;
   // gpu page size in external memory mode, 0 means using the default.
   size_t gpu_page_size;
-
-  void ConfigureGpuId(bool require_gpu);
+  bool enable_experimental_json_serialization {false};
 
   void CheckDeprecated() {
     if (this->n_gpus != 0) {
@@ -36,6 +36,12 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
           << this->__MANAGER__()->Find("n_gpus")->GetFieldInfo().description;
     }
   }
+  /*!
+   * \brief Configure the parameter `gpu_id'.
+   *
+   * \param require_gpu  Whether GPU is explicitly required from user.
+   */
+  void ConfigureGpuId(bool require_gpu);
 
   // declare parameters
   DMLC_DECLARE_PARAMETER(GenericParameter) {
@@ -60,6 +66,10 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
         .set_default(0)
         .set_lower_bound(0)
         .describe("GPU page size when running in external memory mode.");
+    DMLC_DECLARE_FIELD(enable_experimental_json_serialization)
+        .set_default(false)
+        .describe("Enable using JSON for memory serialization (Python Pickle, "
+                  "rabit checkpoints etc.).");
     DMLC_DECLARE_FIELD(n_gpus)
         .set_default(0)
         .set_range(0, 1)
diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h
index fbdc68b8a..8b953af35 100644
--- a/include/xgboost/learner.h
+++ b/include/xgboost/learner.h
@@ -9,13 +9,10 @@
 #define XGBOOST_LEARNER_H_
 
 #include <rabit/rabit.h>
-
 #include <xgboost/base.h>
-#include <xgboost/gbm.h>
-#include <xgboost/metric.h>
-#include <xgboost/objective.h>
 #include <xgboost/feature_map.h>
 #include <xgboost/generic_parameters.h>
+#include <xgboost/host_device_vector.h>
 #include <xgboost/model.h>
 
 #include <utility>
@@ -26,6 +23,12 @@
 
 namespace xgboost {
 
+class Metric;
+class GradientBooster;
+class ObjFunction;
+class DMatrix;
+class Json;
+
 /*!
  * \brief Learner class that does training and prediction.
  *  This is the user facing module of xgboost training.
@@ -45,7 +48,7 @@ namespace xgboost {
 class Learner : public Model, public rabit::Serializable {
  public:
   /*! \brief virtual destructor */
-  ~Learner() override = default;
+  ~Learner() override;
   /*!
    * \brief Configure Learner based on set parameters.
    */
@@ -180,8 +183,6 @@ class Learner : public Model, public rabit::Serializable {
   virtual const std::map<std::string, std::string>& GetConfigurationArguments() const = 0;
 
  protected:
-  /*! \brief internal base score of the model */
-  bst_float base_score_;
   /*! \brief objective function */
   std::unique_ptr<ObjFunction> obj_;
   /*! \brief The gradient booster used by the model*/
@@ -189,7 +190,26 @@ class Learner : public Model, public rabit::Serializable {
   /*! \brief The evaluation metrics used to evaluate the model. */
   std::vector<std::unique_ptr<Metric> > metrics_;
   /*! \brief Training parameter. */
-  GenericParameter generic_param_;
+  GenericParameter generic_parameters_;
+};
+
+struct LearnerModelParamLegacy;
+
+/*
+ * \brief Basic Model Parameters, used to describe the booster.
+ */
+struct LearnerModelParam {
+  /* \brief global bias */
+  bst_float base_score;
+  /* \brief number of features  */
+  uint32_t num_feature;
+  /* \brief number of classes, if it is multi-class classification  */
+  uint32_t num_output_group;
+
+  LearnerModelParam() : base_score {0.5}, num_feature{0}, num_output_group{0} {}
+  // As the old `LearnerModelParamLegacy` is still used by binary IO, we keep
+  // this one as an immutable copy.
+  LearnerModelParam(LearnerModelParamLegacy const& user_param, float base_margin);
 };
 
 }  // namespace xgboost
diff --git a/include/xgboost/objective.h b/include/xgboost/objective.h
index 8affd8e32..e55a63542 100644
--- a/include/xgboost/objective.h
+++ b/include/xgboost/objective.h
@@ -1,5 +1,5 @@
 /*!
- * Copyright 2014 by Contributors
+ * Copyright 2014-2019 by Contributors
  * \file objective.h
  * \brief interface of objective function used by xgboost.
  * \author Tianqi Chen, Kailong Chen
diff --git a/include/xgboost/tree_model.h b/include/xgboost/tree_model.h
index 256b40ccf..965e8c0d3 100644
--- a/include/xgboost/tree_model.h
+++ b/include/xgboost/tree_model.h
@@ -27,6 +27,9 @@ namespace xgboost {
 
 struct PathElement;  // forward declaration
 
+class Json;
+// FIXME(trivialfis): Once binary IO is gone, make this parameter internal as it should
+// not be configured by users.
 /*! \brief meta parameters of the tree */
 struct TreeParam : public dmlc::Parameter<TreeParam> {
   /*! \brief (Deprecated) number of start root */
@@ -36,7 +39,7 @@ struct TreeParam : public dmlc::Parameter<TreeParam> {
   /*!\brief number of deleted nodes */
   int num_deleted;
   /*! \brief maximum depth, this is a statistics of the tree */
-  int max_depth;
+  int deprecated_max_depth;
   /*! \brief number of features used for tree construction */
   int num_feature;
   /*!
@@ -67,7 +70,7 @@ struct TreeParam : public dmlc::Parameter<TreeParam> {
 
   bool operator==(const TreeParam& b) const {
     return num_nodes == b.num_nodes &&
-           num_deleted == b.num_deleted && max_depth == b.max_depth &&
+           num_deleted == b.num_deleted &&
            num_feature == b.num_feature &&
            size_leaf_vector == b.size_leaf_vector;
   }
diff --git a/include/xgboost/tree_updater.h b/include/xgboost/tree_updater.h
index f4f9adc67..e24bd39bd 100644
--- a/include/xgboost/tree_updater.h
+++ b/include/xgboost/tree_updater.h
@@ -22,6 +22,9 @@
 #include <string>
 
 namespace xgboost {
+
+class Json;
+
 /*!
  * \brief interface of tree update module, that performs update of a tree.
  */
diff --git a/src/common/io.cc b/src/common/io.cc
index 1f80676a0..de0541030 100644
--- a/src/common/io.cc
+++ b/src/common/io.cc
@@ -136,9 +136,9 @@ std::string LoadSequentialFile(std::string fname) {
 
   buffer.resize(fsize + 1);
   fread(&buffer[0], 1, fsize, f);
-  buffer.back() = '\0';
   fclose(f);
 #endif  // defined(__unix__)
+  buffer.back() = '\0';
   return buffer;
 }
 
diff --git a/src/gbm/gblinear.cc b/src/gbm/gblinear.cc
index f5735202a..ae9dddbac 100644
--- a/src/gbm/gblinear.cc
+++ b/src/gbm/gblinear.cc
@@ -7,15 +7,18 @@
  */
 #include <dmlc/omp.h>
 #include <dmlc/parameter.h>
-#include <xgboost/gbm.h>
-#include <xgboost/logging.h>
-#include <xgboost/linear_updater.h>
 
 #include <vector>
 #include <string>
 #include <sstream>
 #include <algorithm>
 
+#include "xgboost/gbm.h"
+#include "xgboost/json.h"
+#include "xgboost/linear_updater.h"
+#include "xgboost/logging.h"
+#include "xgboost/learner.h"
+
 #include "gblinear_model.h"
 #include "../common/timer.h"
 
@@ -48,8 +51,10 @@ struct GBLinearTrainParam : public XGBoostParameter<GBLinearTrainParam> {
 class GBLinear : public GradientBooster {
  public:
   explicit GBLinear(const std::vector<std::shared_ptr<DMatrix> > &cache,
-                    bst_float base_margin)
-      : base_margin_(base_margin),
+                    LearnerModelParam const* learner_model_param)
+      : learner_model_param_{learner_model_param},
+        model_{learner_model_param_},
+        previous_model_{learner_model_param_},
         sum_instance_weight_(0),
         sum_weight_complete_(false),
         is_converged_(false) {
@@ -62,7 +67,7 @@ class GBLinear : public GradientBooster {
   }
   void Configure(const Args& cfg) override {
     if (model_.weight.size() == 0) {
-      model_.param.InitAllowUnknown(cfg);
+      model_.Configure(cfg);
     }
     param_.UpdateAllowUnknown(cfg);
     updater_.reset(LinearUpdater::Create(param_.updater, generic_param_));
@@ -116,11 +121,12 @@ class GBLinear : public GradientBooster {
   }
   // add base margin
   void PredictInstance(const SparsePage::Inst &inst,
-               std::vector<bst_float> *out_preds,
-               unsigned ntree_limit) override {
-    const int ngroup = model_.param.num_output_group;
+                       std::vector<bst_float> *out_preds,
+                       unsigned ntree_limit) override {
+    const int ngroup = model_.learner_model_param_->num_output_group;
     for (int gid = 0; gid < ngroup; ++gid) {
-      this->Pred(inst, dmlc::BeginPtr(*out_preds), gid, base_margin_);
+      this->Pred(inst, dmlc::BeginPtr(*out_preds), gid,
+                 learner_model_param_->base_score);
     }
   }
 
@@ -138,8 +144,8 @@ class GBLinear : public GradientBooster {
     CHECK_EQ(ntree_limit, 0U)
         << "GBLinear::PredictContribution: ntrees is only valid for gbtree predictor";
     const auto& base_margin = p_fmat->Info().base_margin_.ConstHostVector();
-    const int ngroup = model_.param.num_output_group;
-    const size_t ncolumns = model_.param.num_feature + 1;
+    const int ngroup = model_.learner_model_param_->num_output_group;
+    const size_t ncolumns = model_.learner_model_param_->num_feature + 1;
     // allocate space for (#features + bias) times #groups times #rows
     std::vector<bst_float>& contribs = *out_contribs;
     contribs.resize(p_fmat->Info().num_row_ * ncolumns * ngroup);
@@ -149,35 +155,38 @@ class GBLinear : public GradientBooster {
     for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
       // parallel over local batch
       const auto nsize = static_cast<bst_omp_uint>(batch.Size());
-      #pragma omp parallel for schedule(static)
+#pragma omp parallel for schedule(static)
       for (bst_omp_uint i = 0; i < nsize; ++i) {
-         auto inst = batch[i];
+        auto inst = batch[i];
         auto row_idx = static_cast<size_t>(batch.base_rowid + i);
         // loop over output groups
         for (int gid = 0; gid < ngroup; ++gid) {
           bst_float *p_contribs = &contribs[(row_idx * ngroup + gid) * ncolumns];
           // calculate linear terms' contributions
           for (auto& ins : inst) {
-            if (ins.index >= model_.param.num_feature) continue;
+            if (ins.index >= model_.learner_model_param_->num_feature) continue;
             p_contribs[ins.index] = ins.fvalue * model_[ins.index][gid];
           }
           // add base margin to BIAS
           p_contribs[ncolumns - 1] = model_.bias()[gid] +
-            ((base_margin.size() != 0) ? base_margin[row_idx * ngroup + gid] : base_margin_);
+            ((base_margin.size() != 0) ? base_margin[row_idx * ngroup + gid] :
+                                         learner_model_param_->base_score);
         }
       }
     }
   }
 
   void PredictInteractionContributions(DMatrix* p_fmat,
-                           std::vector<bst_float>* out_contribs,
-                           unsigned ntree_limit, bool approximate) override {
-                             std::vector<bst_float>& contribs = *out_contribs;
+                                       std::vector<bst_float>* out_contribs,
+                                       unsigned ntree_limit, bool approximate) override {
+    std::vector<bst_float>& contribs = *out_contribs;
 
-     // linear models have no interaction effects
-     const size_t nelements = model_.param.num_feature*model_.param.num_feature;
-     contribs.resize(p_fmat->Info().num_row_ * nelements * model_.param.num_output_group);
-     std::fill(contribs.begin(), contribs.end(), 0);
+    // linear models have no interaction effects
+    const size_t nelements = model_.learner_model_param_->num_feature *
+                             model_.learner_model_param_->num_feature;
+    contribs.resize(p_fmat->Info().num_row_ * nelements *
+                    model_.learner_model_param_->num_output_group);
+    std::fill(contribs.begin(), contribs.end(), 0);
   }
 
   std::vector<std::string> DumpModel(const FeatureMap& fmap,
@@ -196,26 +205,26 @@ class GBLinear : public GradientBooster {
 
  protected:
   void PredictBatchInternal(DMatrix *p_fmat,
-               std::vector<bst_float> *out_preds) {
+                            std::vector<bst_float> *out_preds) {
     monitor_.Start("PredictBatchInternal");
       model_.LazyInitModel();
     std::vector<bst_float> &preds = *out_preds;
     const auto& base_margin = p_fmat->Info().base_margin_.ConstHostVector();
     // start collecting the prediction
-    const int ngroup = model_.param.num_output_group;
+    const int ngroup = model_.learner_model_param_->num_output_group;
     preds.resize(p_fmat->Info().num_row_ * ngroup);
     for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
       // output convention: nrow * k, where nrow is number of rows
       // k is number of group
       // parallel over local batch
       const auto nsize = static_cast<omp_ulong>(batch.Size());
-      #pragma omp parallel for schedule(static)
+#pragma omp parallel for schedule(static)
       for (omp_ulong i = 0; i < nsize; ++i) {
         const size_t ridx = batch.base_rowid + i;
         // loop over output groups
         for (int gid = 0; gid < ngroup; ++gid) {
           bst_float margin =  (base_margin.size() != 0) ?
-              base_margin[ridx * ngroup + gid] : base_margin_;
+              base_margin[ridx * ngroup + gid] : learner_model_param_->base_score;
           this->Pred(batch[i], &preds[ridx * ngroup], gid, margin);
         }
       }
@@ -227,7 +236,7 @@ class GBLinear : public GradientBooster {
     for (auto &kv : cache_) {
       PredictionCacheEntry &e = kv.second;
       if (e.predictions.size() == 0) {
-        size_t n = model_.param.num_output_group * e.data->Info().num_row_;
+        size_t n = model_.learner_model_param_->num_output_group * e.data->Info().num_row_;
         e.predictions.resize(n);
       }
       this->PredictBatchInternal(e.data.get(), &e.predictions);
@@ -262,18 +271,18 @@ class GBLinear : public GradientBooster {
     }
   }
 
-  inline void Pred(const SparsePage::Inst &inst, bst_float *preds, int gid,
-                   bst_float base) {
+  void Pred(const SparsePage::Inst &inst, bst_float *preds, int gid,
+            bst_float base) {
     bst_float psum = model_.bias()[gid] + base;
     for (const auto& ins : inst) {
-      if (ins.index >= model_.param.num_feature) continue;
+      if (ins.index >= model_.learner_model_param_->num_feature) continue;
       psum += ins.fvalue * model_[ins.index][gid];
     }
     preds[gid] = psum;
   }
 
   // biase margin score
-  bst_float base_margin_;
+  LearnerModelParam const* learner_model_param_;
   // model field
   GBLinearModel model_;
   GBLinearModel previous_model_;
@@ -302,14 +311,13 @@ class GBLinear : public GradientBooster {
 };
 
 // register the objective functions
-DMLC_REGISTER_PARAMETER(GBLinearModelParam);
 DMLC_REGISTER_PARAMETER(GBLinearTrainParam);
 
 XGBOOST_REGISTER_GBM(GBLinear, "gblinear")
     .describe("Linear booster, implement generalized linear model.")
     .set_body([](const std::vector<std::shared_ptr<DMatrix> > &cache,
-                 bst_float base_margin) {
-      return new GBLinear(cache, base_margin);
+                 LearnerModelParam const* booster_config) {
+      return new GBLinear(cache, booster_config);
     });
 }  // namespace gbm
 }  // namespace xgboost
diff --git a/src/gbm/gblinear_model.h b/src/gbm/gblinear_model.h
index 2c059dc2e..e91fecde6 100644
--- a/src/gbm/gblinear_model.h
+++ b/src/gbm/gblinear_model.h
@@ -1,51 +1,65 @@
 /*!
- * Copyright by Contributors 2018
+ * Copyright 2018-2019 by Contributors
  */
 #pragma once
 #include <dmlc/io.h>
 #include <dmlc/parameter.h>
-#include <xgboost/base.h>
-#include <xgboost/feature_map.h>
-#include <xgboost/model.h>
+#include <xgboost/learner.h>
+
 #include <vector>
 #include <string>
 #include <cstring>
 
+#include "xgboost/base.h"
+#include "xgboost/feature_map.h"
+#include "xgboost/model.h"
+#include "xgboost/json.h"
+#include "xgboost/parameter.h"
+
 namespace xgboost {
+class Json;
 namespace gbm {
-// model parameter
-struct GBLinearModelParam : public dmlc::Parameter<GBLinearModelParam> {
+// Deprecated in 1.0.0. model parameter.  Only staying here for compatible binary model IO.
+struct DeprecatedGBLinearModelParam : public dmlc::Parameter<DeprecatedGBLinearModelParam> {
   // number of feature dimension
-  unsigned num_feature;
-  // number of output group
-  int num_output_group;
+  uint32_t deprecated_num_feature;
+  // deprecated. use learner_model_param_->num_output_group.
+  int32_t deprecated_num_output_group;
   // reserved field
-  int reserved[32];
+  int32_t reserved[32];
   // constructor
-  GBLinearModelParam() { std::memset(this, 0, sizeof(GBLinearModelParam)); }
-  DMLC_DECLARE_PARAMETER(GBLinearModelParam) {
-    DMLC_DECLARE_FIELD(num_feature)
-        .set_lower_bound(0)
-        .describe("Number of features used in classification.");
-    DMLC_DECLARE_FIELD(num_output_group)
-        .set_lower_bound(1)
-        .set_default(1)
-        .describe("Number of output groups in the setting.");
+  DeprecatedGBLinearModelParam() {
+    static_assert(sizeof(*this) == sizeof(int32_t) * 34,
+                  "Model parameter size can not be changed.");
+    std::memset(this, 0, sizeof(DeprecatedGBLinearModelParam));
   }
+
+  DMLC_DECLARE_PARAMETER(DeprecatedGBLinearModelParam) {}
 };
 
 // model for linear booster
 class GBLinearModel : public Model {
+ private:
+  // Deprecated in 1.0.0
+  DeprecatedGBLinearModelParam param;
+
  public:
-  // parameter
-  GBLinearModelParam param;
+  LearnerModelParam const* learner_model_param_;
+
+ public:
+  explicit GBLinearModel(LearnerModelParam const* learner_model_param) :
+      learner_model_param_ {learner_model_param} {}
+  void Configure(Args const &cfg) { }
+
   // weight for each of feature, bias is the last one
   std::vector<bst_float> weight;
   // initialize the model parameter
   inline void LazyInitModel() {
-    if (!weight.empty()) return;
+    if (!weight.empty())
+      return;
     // bias is the last weight
-    weight.resize((param.num_feature + 1) * param.num_output_group);
+    weight.resize((learner_model_param_->num_feature + 1) *
+                  learner_model_param_->num_output_group);
     std::fill(weight.begin(), weight.end(), 0.0f);
   }
   // save the model to file
@@ -70,33 +84,37 @@ class GBLinearModel : public Model {
   }
 
   // model bias
-  inline bst_float* bias() {
-    return &weight[param.num_feature * param.num_output_group];
+  inline bst_float *bias() {
+    return &weight[learner_model_param_->num_feature *
+                   learner_model_param_->num_output_group];
   }
-  inline const bst_float* bias() const {
-    return &weight[param.num_feature * param.num_output_group];
+  inline const bst_float *bias() const {
+    return &weight[learner_model_param_->num_feature *
+                   learner_model_param_->num_output_group];
   }
   // get i-th weight
-  inline bst_float* operator[](size_t i) {
-    return &weight[i * param.num_output_group];
+  inline bst_float *operator[](size_t i) {
+    return &weight[i * learner_model_param_->num_output_group];
   }
-  inline const bst_float* operator[](size_t i) const {
-    return &weight[i * param.num_output_group];
+  inline const bst_float *operator[](size_t i) const {
+    return &weight[i * learner_model_param_->num_output_group];
   }
 
-  std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,
+  std::vector<std::string> DumpModel(const FeatureMap &fmap, bool with_stats,
                                      std::string format) const {
-    const int ngroup = param.num_output_group;
-    const unsigned nfeature = param.num_feature;
+    const int ngroup = learner_model_param_->num_output_group;
+    const unsigned nfeature = learner_model_param_->num_feature;
 
     std::stringstream fo("");
     if (format == "json") {
       fo << "  { \"bias\": [" << std::endl;
       for (int gid = 0; gid < ngroup; ++gid) {
-        if (gid != 0) fo << "," << std::endl;
+        if (gid != 0)
+          fo << "," << std::endl;
         fo << "      " << this->bias()[gid];
       }
-      fo << std::endl << "    ]," << std::endl
+      fo << std::endl
+         << "    ]," << std::endl
          << "    \"weight\": [" << std::endl;
       for (unsigned i = 0; i < nfeature; ++i) {
         for (int gid = 0; gid < ngroup; ++gid) {
diff --git a/src/gbm/gbm.cc b/src/gbm/gbm.cc
index 644888b88..2d6a89556 100644
--- a/src/gbm/gbm.cc
+++ b/src/gbm/gbm.cc
@@ -3,8 +3,14 @@
  * \file gbm.cc
  * \brief Registry of gradient boosters.
  */
-#include <xgboost/gbm.h>
 #include <dmlc/registry.h>
+#include <string>
+#include <vector>
+#include <memory>
+
+#include "xgboost/gbm.h"
+#include "xgboost/learner.h"
+#include "xgboost/generic_parameters.h"
 
 namespace dmlc {
 DMLC_REGISTRY_ENABLE(::xgboost::GradientBoosterReg);
@@ -14,17 +20,16 @@ namespace xgboost {
 GradientBooster* GradientBooster::Create(
     const std::string& name,
     GenericParameter const* generic_param,
-    const std::vector<std::shared_ptr<DMatrix> >& cache_mats,
-    bst_float base_margin) {
+    LearnerModelParam const* learner_model_param,
+    const std::vector<std::shared_ptr<DMatrix> >& cache_mats) {
   auto *e = ::dmlc::Registry< ::xgboost::GradientBoosterReg>::Get()->Find(name);
   if (e == nullptr) {
     LOG(FATAL) << "Unknown gbm type " << name;
   }
-  auto p_bst =  (e->body)(cache_mats, base_margin);
+  auto p_bst =  (e->body)(cache_mats, learner_model_param);
   p_bst->generic_param_ = generic_param;
   return p_bst;
 }
-
 }  // namespace xgboost
 
 namespace xgboost {
diff --git a/src/gbm/gbtree.cc b/src/gbm/gbtree.cc
index ec21cdd01..96635e744 100644
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@@ -14,8 +14,9 @@
 #include <limits>
 #include <algorithm>
 
-#include "xgboost/logging.h"
 #include "xgboost/gbm.h"
+#include "xgboost/logging.h"
+#include "xgboost/json.h"
 #include "xgboost/predictor.h"
 #include "xgboost/tree_updater.h"
 #include "xgboost/host_device_vector.h"
@@ -26,7 +27,6 @@
 #include "../common/random.h"
 #include "../common/timer.h"
 
-
 namespace xgboost {
 namespace gbm {
 
@@ -161,10 +161,11 @@ void GBTree::ConfigureUpdaters() {
           "single updater grow_quantile_histmaker.";
       tparam_.updater_seq = "grow_quantile_histmaker";
       break;
-    case TreeMethod::kGPUHist:
+    case TreeMethod::kGPUHist: {
       this->AssertGPUSupport();
       tparam_.updater_seq = "grow_gpu_hist";
       break;
+    }
     default:
       LOG(FATAL) << "Unknown tree_method ("
                  << static_cast<int>(tparam_.tree_method) << ") detected";
@@ -175,9 +176,10 @@ void GBTree::DoBoost(DMatrix* p_fmat,
                      HostDeviceVector<GradientPair>* in_gpair,
                      ObjFunction* obj) {
   std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
-  const int ngroup = model_.param.num_output_group;
+  const int ngroup = model_.learner_model_param_->num_output_group;
   ConfigureWithKnownData(this->cfg_, p_fmat);
   monitor_.Start("BoostNewTrees");
+  CHECK_NE(ngroup, 0);
   if (ngroup == 1) {
     std::vector<std::unique_ptr<RegTree> > ret;
     BoostNewTrees(in_gpair, p_fmat, 0, &ret);
@@ -234,9 +236,11 @@ void GBTree::InitUpdater(Args const& cfg) {
         LOG(FATAL) << ss.str();
       }
     }
+    // Do not push new updater in.
     return;
   }
 
+  // create new updaters
   for (const std::string& pstr : ups) {
     std::unique_ptr<TreeUpdater> up(TreeUpdater::Create(pstr.c_str(), generic_param_));
     up->Configure(cfg);
@@ -255,7 +259,7 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
     if (tparam_.process_type == TreeProcessType::kDefault) {
       // create new tree
       std::unique_ptr<RegTree> ptr(new RegTree());
-      ptr->param.InitAllowUnknown(this->cfg_);
+      ptr->param.UpdateAllowUnknown(this->cfg_);
       new_trees.push_back(ptr.get());
       ret->push_back(std::move(ptr));
     } else if (tparam_.process_type == TreeProcessType::kUpdate) {
@@ -276,7 +280,7 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
 void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) {
   monitor_.Start("CommitModel");
   int num_new_trees = 0;
-  for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
+  for (uint32_t gid = 0; gid < model_.learner_model_param_->num_output_group; ++gid) {
     num_new_trees += new_trees[gid].size();
     model_.CommitModel(std::move(new_trees[gid]), gid);
   }
@@ -289,7 +293,8 @@ void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& ne
 // dart
 class Dart : public GBTree {
  public:
-  explicit Dart(bst_float base_margin) : GBTree(base_margin) {}
+  explicit Dart(LearnerModelParam const* booster_config) :
+      GBTree(booster_config) {}
 
   void Configure(const Args& cfg) override {
     GBTree::Configure(cfg);
@@ -305,7 +310,6 @@ class Dart : public GBTree {
       fi->Read(&weight_drop_);
     }
   }
-
   void Save(dmlc::Stream* fo) const override {
     GBTree::Save(fo);
     if (weight_drop_.size() != 0) {
@@ -326,18 +330,18 @@ class Dart : public GBTree {
     DropTrees(1);
     if (thread_temp_.size() == 0) {
       thread_temp_.resize(1, RegTree::FVec());
-      thread_temp_[0].Init(model_.param.num_feature);
+      thread_temp_[0].Init(model_.learner_model_param_->num_feature);
     }
-    out_preds->resize(model_.param.num_output_group);
-    ntree_limit *= model_.param.num_output_group;
+    out_preds->resize(model_.learner_model_param_->num_output_group);
+    ntree_limit *= model_.learner_model_param_->num_output_group;
     if (ntree_limit == 0 || ntree_limit > model_.trees.size()) {
       ntree_limit = static_cast<unsigned>(model_.trees.size());
     }
     // loop over output groups
-    for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
+    for (uint32_t gid = 0; gid < model_.learner_model_param_->num_output_group; ++gid) {
       (*out_preds)[gid] =
           PredValue(inst, gid, &thread_temp_[0], 0, ntree_limit) +
-          model_.base_margin;
+          model_.learner_model_param_->base_score;
     }
   }
 
@@ -362,6 +366,7 @@ class Dart : public GBTree {
                                                     ntree_limit, &weight_drop_, approximate);
   }
 
+
  protected:
   friend class GBTree;
   // internal prediction loop
@@ -373,7 +378,7 @@ class Dart : public GBTree {
       unsigned tree_begin,
       unsigned ntree_limit,
       bool init_out_preds) {
-    int num_group = model_.param.num_output_group;
+    int num_group = model_.learner_model_param_->num_output_group;
     ntree_limit *= num_group;
     if (ntree_limit == 0 || ntree_limit > model_.trees.size()) {
       ntree_limit = static_cast<unsigned>(model_.trees.size());
@@ -388,17 +393,12 @@ class Dart : public GBTree {
         CHECK_EQ(out_preds->size(), n);
         std::copy(base_margin.begin(), base_margin.end(), out_preds->begin());
       } else {
-        std::fill(out_preds->begin(), out_preds->end(), model_.base_margin);
+        std::fill(out_preds->begin(), out_preds->end(),
+                  model_.learner_model_param_->base_score);
       }
     }
-
-    if (num_group == 1) {
-      PredLoopSpecalize<Derived>(p_fmat, out_preds, 1,
-                                 tree_begin, ntree_limit);
-    } else {
-      PredLoopSpecalize<Derived>(p_fmat, out_preds, num_group,
-                                 tree_begin, ntree_limit);
-    }
+    PredLoopSpecalize<Derived>(p_fmat, out_preds, num_group, tree_begin,
+                               ntree_limit);
   }
 
   template<typename Derived>
@@ -409,7 +409,7 @@ class Dart : public GBTree {
       unsigned tree_begin,
       unsigned tree_end) {
     const int nthread = omp_get_max_threads();
-    CHECK_EQ(num_group, model_.param.num_output_group);
+    CHECK_EQ(num_group, model_.learner_model_param_->num_output_group);
     InitThreadTemp(nthread);
     std::vector<bst_float>& preds = *out_preds;
     CHECK_EQ(model_.param.size_leaf_vector, 0)
@@ -443,6 +443,7 @@ class Dart : public GBTree {
           }
         }
       }
+
       for (bst_omp_uint i = nsize - rest; i < nsize; ++i) {
         RegTree::FVec& feats = thread_temp_[0];
         const auto ridx = static_cast<int64_t>(batch.base_rowid + i);
@@ -461,7 +462,7 @@ class Dart : public GBTree {
   void
   CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) override {
     int num_new_trees = 0;
-    for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
+    for (uint32_t gid = 0; gid < model_.learner_model_param_->num_output_group; ++gid) {
       num_new_trees += new_trees[gid].size();
       model_.CommitModel(std::move(new_trees[gid]), gid);
     }
@@ -480,7 +481,7 @@ class Dart : public GBTree {
     p_feats->Fill(inst);
     for (size_t i = tree_begin; i < tree_end; ++i) {
       if (model_.tree_info[i] == bst_group) {
-        bool drop = (std::binary_search(idx_drop_.begin(), idx_drop_.end(), i));
+        bool drop = std::binary_search(idx_drop_.begin(), idx_drop_.end(), i);
         if (!drop) {
           int tid = model_.trees[i]->GetLeafIndex(*p_feats);
           psum += weight_drop_[i] * (*model_.trees[i])[tid].LeafValue();
@@ -577,7 +578,7 @@ class Dart : public GBTree {
     if (prev_thread_temp_size < nthread) {
       thread_temp_.resize(nthread, RegTree::FVec());
       for (int i = prev_thread_temp_size; i < nthread; ++i) {
-        thread_temp_[i].Init(model_.param.num_feature);
+        thread_temp_[i].Init(model_.learner_model_param_->num_feature);
       }
     }
   }
@@ -600,15 +601,17 @@ DMLC_REGISTER_PARAMETER(DartTrainParam);
 
 XGBOOST_REGISTER_GBM(GBTree, "gbtree")
 .describe("Tree booster, gradient boosted trees.")
-.set_body([](const std::vector<std::shared_ptr<DMatrix> >& cached_mats, bst_float base_margin) {
-    auto* p = new GBTree(base_margin);
+.set_body([](const std::vector<std::shared_ptr<DMatrix> >& cached_mats,
+             LearnerModelParam const* booster_config) {
+    auto* p = new GBTree(booster_config);
     p->InitCache(cached_mats);
     return p;
   });
 XGBOOST_REGISTER_GBM(Dart, "dart")
 .describe("Tree booster, dart.")
-.set_body([](const std::vector<std::shared_ptr<DMatrix> >& cached_mats, bst_float base_margin) {
-    GBTree* p = new Dart(base_margin);
+.set_body([](const std::vector<std::shared_ptr<DMatrix> >& cached_mats,
+             LearnerModelParam const* booster_config) {
+    GBTree* p = new Dart(booster_config);
     return p;
   });
 }  // namespace gbm
diff --git a/src/gbm/gbtree.h b/src/gbm/gbtree.h
index cc10e2c91..126456488 100644
--- a/src/gbm/gbtree.h
+++ b/src/gbm/gbtree.h
@@ -8,7 +8,6 @@
 #define XGBOOST_GBM_GBTREE_H_
 
 #include <dmlc/omp.h>
-#include <dmlc/parameter.h>
 
 #include <vector>
 #include <map>
@@ -86,7 +85,6 @@ struct GBTreeTrainParam : public XGBoostParameter<GBTreeTrainParam> {
         .add_enum("update", TreeProcessType::kUpdate)
         .describe("Whether to run the normal boosting process that creates new trees,"\
                   " or to update the trees in an existing model.");
-    // add alias
     DMLC_DECLARE_ALIAS(updater_seq, updater);
     DMLC_DECLARE_FIELD(predictor)
         .set_default(PredictorType::kAuto)
@@ -153,7 +151,7 @@ struct DartTrainParam : public XGBoostParameter<DartTrainParam> {
 // gradient boosted trees
 class GBTree : public GradientBooster {
  public:
-  explicit GBTree(bst_float base_margin) : model_(base_margin) {}
+  explicit GBTree(LearnerModelParam const* booster_config) : model_(booster_config) {}
 
   void InitCache(const std::vector<std::shared_ptr<DMatrix> > &cache) {
     cache_ = std::make_shared<std::unordered_map<DMatrix*, PredictionCacheEntry>>();
@@ -181,24 +179,21 @@ class GBTree : public GradientBooster {
         tparam_.tree_method == TreeMethod::kGPUHist;
   }
 
-  void Load(dmlc::Stream* fi) override {
-    model_.Load(fi);
-
-    this->cfg_.clear();
-    this->cfg_.emplace_back(std::string("num_feature"),
-                            common::ToString(model_.param.num_feature));
-  }
-
   GBTreeTrainParam const& GetTrainParam() const {
     return tparam_;
   }
 
+  void Load(dmlc::Stream* fi) override {
+    model_.Load(fi);
+    this->cfg_.clear();
+  }
+
   void Save(dmlc::Stream* fo) const override {
     model_.Save(fo);
   }
 
   bool AllowLazyCheckPoint() const override {
-    return model_.param.num_output_group == 1 ||
+    return model_.learner_model_param_->num_output_group == 1 ||
         tparam_.updater_seq.find("distcol") != std::string::npos;
   }
 
diff --git a/src/gbm/gbtree_model.h b/src/gbm/gbtree_model.h
index 1e34a756f..c231d4ade 100644
--- a/src/gbm/gbtree_model.h
+++ b/src/gbm/gbtree_model.h
@@ -1,11 +1,16 @@
 /*!
- * Copyright by Contributors 2017
+ * Copyright 2017-2019 by Contributors
+ * \file gbtree_model.h
  */
-#pragma once
+#ifndef XGBOOST_GBM_GBTREE_MODEL_H_
+#define XGBOOST_GBM_GBTREE_MODEL_H_
+
 #include <dmlc/parameter.h>
 #include <dmlc/io.h>
 #include <xgboost/model.h>
 #include <xgboost/tree_model.h>
+#include <xgboost/parameter.h>
+#include <xgboost/learner.h>
 
 #include <memory>
 #include <utility>
@@ -13,45 +18,42 @@
 #include <vector>
 
 namespace xgboost {
+
+class Json;
+
 namespace gbm {
 /*! \brief model parameters */
 struct GBTreeModelParam : public dmlc::Parameter<GBTreeModelParam> {
+ public:
   /*! \brief number of trees */
-  int num_trees;
+  int32_t num_trees;
   /*! \brief (Deprecated) number of roots */
-  int deprecated_num_roots;
+  int32_t deprecated_num_roots;
   /*! \brief number of features to be used by trees */
-  int num_feature;
+  int32_t deprecated_num_feature;
   /*! \brief pad this space, for backward compatibility reason.*/
-  int pad_32bit;
+  int32_t pad_32bit;
   /*! \brief deprecated padding space. */
-  int64_t num_pbuffer_deprecated;
-  /*!
-   * \brief how many output group a single instance can produce
-   *  this affects the behavior of number of output we have:
-   *    suppose we have n instance and k group, output will be k * n
-   */
-  int num_output_group;
+  int64_t deprecated_num_pbuffer;
+  // deprecated. use learner_model_param_->num_output_group.
+  int32_t deprecated_num_output_group;
   /*! \brief size of leaf vector needed in tree */
-  int size_leaf_vector;
+  int32_t size_leaf_vector;
   /*! \brief reserved parameters */
-  int reserved[32];
+  int32_t reserved[32];
+
   /*! \brief constructor */
   GBTreeModelParam() {
-    std::memset(this, 0, sizeof(GBTreeModelParam));
-    static_assert(sizeof(GBTreeModelParam) == (4 + 2 + 2 + 32) * sizeof(int),
+    std::memset(this, 0, sizeof(GBTreeModelParam));  // FIXME(trivialfis): Why?
+    static_assert(sizeof(GBTreeModelParam) == (4 + 2 + 2 + 32) * sizeof(int32_t),
                   "64/32 bit compatibility issue");
   }
+
   // declare parameters, only declare those that need to be set.
   DMLC_DECLARE_PARAMETER(GBTreeModelParam) {
-    DMLC_DECLARE_FIELD(num_output_group)
-        .set_lower_bound(1)
-        .set_default(1)
-        .describe(
-            "Number of output groups to be predicted,"
-            " used for multi-class classification.");
-    DMLC_DECLARE_FIELD(num_feature)
+    DMLC_DECLARE_FIELD(num_trees)
         .set_lower_bound(0)
+        .set_default(0)
         .describe("Number of features used for training and prediction.");
     DMLC_DECLARE_FIELD(size_leaf_vector)
         .set_lower_bound(0)
@@ -61,11 +63,13 @@ struct GBTreeModelParam : public dmlc::Parameter<GBTreeModelParam> {
 };
 
 struct GBTreeModel : public Model {
-  explicit GBTreeModel(bst_float base_margin) : base_margin(base_margin) {}
+ public:
+  explicit GBTreeModel(LearnerModelParam const* learner_model_param) :
+      learner_model_param_{learner_model_param} {}
   void Configure(const Args& cfg) {
     // initialize model parameters if not yet been initialized.
     if (trees.size() == 0) {
-      param.InitAllowUnknown(cfg);
+      param.UpdateAllowUnknown(cfg);
     }
   }
 
@@ -136,7 +140,7 @@ struct GBTreeModel : public Model {
   }
 
   // base margin
-  bst_float base_margin;
+  LearnerModelParam const* learner_model_param_;
   // model parameter
   GBTreeModelParam param;
   /*! \brief vector of trees stored in the model */
@@ -148,3 +152,5 @@ struct GBTreeModel : public Model {
 };
 }  // namespace gbm
 }  // namespace xgboost
+
+#endif  // XGBOOST_GBM_GBTREE_MODEL_H_
diff --git a/src/learner.cc b/src/learner.cc
index 244ef92f6..da3cbe69a 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -16,17 +16,23 @@
 #include <utility>
 #include <vector>
 
-#include "xgboost/feature_map.h"
-#include "xgboost/learner.h"
 #include "xgboost/base.h"
-#include "xgboost/parameter.h"
-#include "xgboost/logging.h"
+#include "xgboost/feature_map.h"
+#include "xgboost/gbm.h"
 #include "xgboost/generic_parameters.h"
 #include "xgboost/host_device_vector.h"
+#include "xgboost/json.h"
+#include "xgboost/learner.h"
+#include "xgboost/logging.h"
+#include "xgboost/metric.h"
+#include "xgboost/objective.h"
+#include "xgboost/parameter.h"
+
 #include "common/common.h"
 #include "common/io.h"
 #include "common/random.h"
 #include "common/timer.h"
+#include "common/version.h"
 
 namespace {
 
@@ -69,8 +75,15 @@ bool Learner::AllowLazyCheckPoint() const {
   return gbm_->AllowLazyCheckPoint();
 }
 
-/*! \brief training parameter for regression */
-struct LearnerModelParam : public dmlc::Parameter<LearnerModelParam> {
+Learner::~Learner() = default;
+
+/*! \brief training parameter for regression
+ *
+ * Should be deprecated, but still used for being compatible with binary IO.
+ * Once it's gone, `LearnerModelParam` should handle transforming `base_margin`
+ * with objective by itself.
+ */
+struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy> {
   /* \brief global bias */
   bst_float base_score;
   /* \brief number of features  */
@@ -84,12 +97,28 @@ struct LearnerModelParam : public dmlc::Parameter<LearnerModelParam> {
   /*! \brief reserved field */
   int reserved[29];
   /*! \brief constructor */
-  LearnerModelParam() {
-    std::memset(this, 0, sizeof(LearnerModelParam));
+  LearnerModelParamLegacy() {
+    std::memset(this, 0, sizeof(LearnerModelParamLegacy));
     base_score = 0.5f;
   }
+  // Skip other legacy fields.
+  Json ToJson() const {
+    Object obj;
+    obj["base_score"] = std::to_string(base_score);
+    obj["num_feature"] = std::to_string(num_feature);
+    obj["num_class"] = std::to_string(num_class);
+    return Json(std::move(obj));
+  }
+  void FromJson(Json const& obj) {
+    auto const& j_param = get<Object const>(obj);
+    std::map<std::string, std::string> m;
+    m["base_score"] = get<String const>(j_param.at("base_score"));
+    m["num_feature"] = get<String const>(j_param.at("num_feature"));
+    m["num_class"] = get<String const>(j_param.at("num_class"));
+    this->Init(m);
+  }
   // declare parameters
-  DMLC_DECLARE_PARAMETER(LearnerModelParam) {
+  DMLC_DECLARE_PARAMETER(LearnerModelParamLegacy) {
     DMLC_DECLARE_FIELD(base_score)
         .set_default(0.5f)
         .describe("Global bias of the model.");
@@ -104,12 +133,20 @@ struct LearnerModelParam : public dmlc::Parameter<LearnerModelParam> {
   }
 };
 
+LearnerModelParam::LearnerModelParam(
+    LearnerModelParamLegacy const &user_param, float base_margin)
+    : base_score{base_margin}, num_feature{user_param.num_feature},
+      num_output_group{user_param.num_class == 0
+                           ? 1
+                           : static_cast<uint32_t>(user_param.num_class)} {}
+
 struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
   // data split mode, can be row, col, or none.
   DataSplitMode dsplit;
   // flag to disable default metric
   int disable_default_eval_metric;
-
+  // FIXME(trivialfis): The following parameters belong to model itself, but can be
+  // specified by users.  Move them to model parameter once we can get rid of binary IO.
   std::string booster;
   std::string objective;
 
@@ -134,7 +171,7 @@ struct LearnerTrainParam : public XGBoostParameter<LearnerTrainParam> {
 };
 
 
-DMLC_REGISTER_PARAMETER(LearnerModelParam);
+DMLC_REGISTER_PARAMETER(LearnerModelParamLegacy);
 DMLC_REGISTER_PARAMETER(LearnerTrainParam);
 DMLC_REGISTER_PARAMETER(GenericParameter);
 
@@ -142,14 +179,7 @@ int constexpr GenericParameter::kCpuId;
 
 void GenericParameter::ConfigureGpuId(bool require_gpu) {
 #if defined(XGBOOST_USE_CUDA)
-  int32_t n_visible = common::AllVisibleGPUs();
-  if (n_visible == 0) {
-    // Running XGBoost compiled with CUDA on CPU only machine.
-    this->UpdateAllowUnknown(Args{{"gpu_id", std::to_string(kCpuId)}});
-    return;
-  }
-
-  if (this->gpu_id == kCpuId) {  // 0. User didn't specify the `gpu_id'
+  if (gpu_id == kCpuId) {  // 0. User didn't specify the `gpu_id'
     if (require_gpu) {     // 1. `tree_method' or `predictor' or both are using
                            // GPU.
       // 2. Use device 0 as default.
@@ -159,7 +189,10 @@ void GenericParameter::ConfigureGpuId(bool require_gpu) {
 
   // 3. When booster is loaded from a memory image (Python pickle or R
   // raw model), number of available GPUs could be different.  Wrap around it.
-  if (this->gpu_id != kCpuId && this->gpu_id >= n_visible) {
+  int32_t n_gpus = common::AllVisibleGPUs();
+  if (n_gpus == 0) {
+    this->UpdateAllowUnknown(Args{{"gpu_id", std::to_string(kCpuId)}});
+  } else if (gpu_id != kCpuId && gpu_id >= n_gpus) {
     this->UpdateAllowUnknown(Args{{"gpu_id", std::to_string(gpu_id % n_gpus)}});
   }
 #else
@@ -175,25 +208,25 @@ void GenericParameter::ConfigureGpuId(bool require_gpu) {
 class LearnerImpl : public Learner {
  public:
   explicit LearnerImpl(std::vector<std::shared_ptr<DMatrix> >  cache)
-      : configured_{false}, cache_(std::move(cache)) {
+      : need_configuration_{true}, cache_(std::move(cache)) {
     monitor_.Init("Learner");
   }
   // Configuration before data is known.
   void Configure() override {
-    if (configured_) { return; }
+    if (!this->need_configuration_) { return; }
 
     monitor_.Start("Configure");
     auto old_tparam = tparam_;
     Args args = {cfg_.cbegin(), cfg_.cend()};
 
     tparam_.UpdateAllowUnknown(args);
-
-    generic_param_.UpdateAllowUnknown(args);
-    generic_param_.CheckDeprecated();
+    mparam_.UpdateAllowUnknown(args);
+    generic_parameters_.UpdateAllowUnknown(args);
+    generic_parameters_.CheckDeprecated();
 
     ConsoleLogger::Configure(args);
-    if (generic_param_.nthread != 0) {
-      omp_set_num_threads(generic_param_.nthread);
+    if (generic_parameters_.nthread != 0) {
+      omp_set_num_threads(generic_parameters_.nthread);
     }
 
     // add additional parameters
@@ -202,9 +235,9 @@ class LearnerImpl : public Learner {
       tparam_.dsplit = DataSplitMode::kRow;
     }
 
-    mparam_.InitAllowUnknown(args);
+
     // set seed only before the model is initialized
-    common::GlobalRandom().seed(generic_param_.seed);
+    common::GlobalRandom().seed(generic_parameters_.seed);
     // must precede configure gbm since num_features is required for gbm
     this->ConfigureNumFeatures();
     args = {cfg_.cbegin(), cfg_.cend()};  // renew
@@ -212,9 +245,12 @@ class LearnerImpl : public Learner {
     this->ConfigureGBM(old_tparam, args);
     this->ConfigureMetrics(args);
 
-    generic_param_.ConfigureGpuId(this->gbm_->UseGPU());
+    generic_parameters_.ConfigureGpuId(this->gbm_->UseGPU());
 
-    this->configured_ = true;
+    learner_model_param_ = LearnerModelParam(mparam_,
+                                             obj_->ProbToMargin(mparam_.base_score));
+
+    this->need_configuration_ = false;
     monitor_.Stop("Configure");
   }
 
@@ -241,7 +277,7 @@ class LearnerImpl : public Learner {
   }
 
   void Load(dmlc::Stream* fi) override {
-    generic_param_.UpdateAllowUnknown(Args{});
+    generic_parameters_.UpdateAllowUnknown(Args{});
     tparam_.Init(std::vector<std::pair<std::string, std::string>>{});
     // TODO(tqchen) mark deprecation of old format.
     common::PeekableInStream fp(fi);
@@ -279,9 +315,9 @@ class LearnerImpl : public Learner {
     }
     CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format";
     // duplicated code with LazyInitModel
-    obj_.reset(ObjFunction::Create(tparam_.objective, &generic_param_));
-    gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_param_,
-                                       cache_, mparam_.base_score));
+    obj_.reset(ObjFunction::Create(tparam_.objective, &generic_parameters_));
+    gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_parameters_,
+                                       &learner_model_param_, cache_));
     gbm_->Load(fi);
     if (mparam_.contain_extra_attrs != 0) {
       std::vector<std::pair<std::string, std::string> > attr;
@@ -340,7 +376,7 @@ class LearnerImpl : public Learner {
       std::vector<std::string> metr;
       fi->Read(&metr);
       for (auto name : metr) {
-        metrics_.emplace_back(Metric::Create(name, &generic_param_));
+        metrics_.emplace_back(Metric::Create(name, &generic_parameters_));
       }
     }
 
@@ -351,7 +387,7 @@ class LearnerImpl : public Learner {
     cfg_.insert(n.cbegin(), n.cend());
 
     Args args = {cfg_.cbegin(), cfg_.cend()};
-    generic_param_.UpdateAllowUnknown(args);
+    generic_parameters_.UpdateAllowUnknown(args);
     gbm_->Configure(args);
     obj_->Configure({cfg_.begin(), cfg_.end()});
 
@@ -364,13 +400,14 @@ class LearnerImpl : public Learner {
       tparam_.dsplit = DataSplitMode::kRow;
     }
 
-    this->generic_param_.ConfigureGpuId(gbm_->UseGPU());
-    this->configured_ = true;
+    // There's no logic for state machine for binary IO, as it has a mix of everything and
+    // half loaded model.
+    this->Configure();
   }
 
   // rabit save model to rabit checkpoint
   void Save(dmlc::Stream* fo) const override {
-    if (!this->configured_) {
+    if (this->need_configuration_) {
       // Save empty model.  Calling Configure in a dummy LearnerImpl avoids violating
       // constness.
       LearnerImpl empty(std::move(this->cache_));
@@ -383,7 +420,7 @@ class LearnerImpl : public Learner {
       return;
     }
 
-    LearnerModelParam mparam = mparam_;  // make a copy to potentially modify
+    LearnerModelParamLegacy mparam = mparam_;  // make a copy to potentially modify
     std::vector<std::pair<std::string, std::string> > extra_attr;
     // extra attributed to be added just before saving
     if (tparam_.objective == "count:poisson") {
@@ -419,11 +456,12 @@ class LearnerImpl : public Learner {
                      return it.first == "SAVED_PARAM_gpu_id";
                    })) {
         mparam.contain_extra_attrs = 1;
-        extra_attr.emplace_back("SAVED_PARAM_gpu_id", std::to_string(generic_param_.gpu_id));
+        extra_attr.emplace_back("SAVED_PARAM_gpu_id",
+                                std::to_string(generic_parameters_.gpu_id));
       }
     }
 #endif  // defined(XGBOOST_USE_CUDA)
-    fo->Write(&mparam, sizeof(LearnerModelParam));
+    fo->Write(&mparam, sizeof(LearnerModelParamLegacy));
     fo->Write(tparam_.objective);
     fo->Write(tparam_.booster);
     gbm_->Save(fo);
@@ -459,14 +497,16 @@ class LearnerImpl : public Learner {
   std::vector<std::string> DumpModel(const FeatureMap& fmap,
                                      bool with_stats,
                                      std::string format) const override {
+    CHECK(!this->need_configuration_)
+        << "The model hasn't been built yet.  Are you using raw Booster interface?";
     return gbm_->DumpModel(fmap, with_stats, format);
   }
 
   void UpdateOneIter(int iter, DMatrix* train) override {
     monitor_.Start("UpdateOneIter");
     this->Configure();
-    if (generic_param_.seed_per_iteration || rabit::IsDistributed()) {
-      common::GlobalRandom().seed(generic_param_.seed * kRandSeedMagic + iter);
+    if (generic_parameters_.seed_per_iteration || rabit::IsDistributed()) {
+      common::GlobalRandom().seed(generic_parameters_.seed * kRandSeedMagic + iter);
     }
     this->CheckDataSplitMode();
     this->ValidateDMatrix(train);
@@ -485,8 +525,8 @@ class LearnerImpl : public Learner {
                     HostDeviceVector<GradientPair>* in_gpair) override {
     monitor_.Start("BoostOneIter");
     this->Configure();
-    if (generic_param_.seed_per_iteration || rabit::IsDistributed()) {
-      common::GlobalRandom().seed(generic_param_.seed * kRandSeedMagic + iter);
+    if (generic_parameters_.seed_per_iteration || rabit::IsDistributed()) {
+      common::GlobalRandom().seed(generic_parameters_.seed * kRandSeedMagic + iter);
     }
     this->CheckDataSplitMode();
     this->ValidateDMatrix(train);
@@ -503,7 +543,7 @@ class LearnerImpl : public Learner {
     std::ostringstream os;
     os << '[' << iter << ']' << std::setiosflags(std::ios::fixed);
     if (metrics_.size() == 0 && tparam_.disable_default_eval_metric <= 0) {
-      metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric(), &generic_param_));
+      metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric(), &generic_parameters_));
       metrics_.back()->Configure({cfg_.begin(), cfg_.end()});
     }
     for (size_t i = 0; i < data_sets.size(); ++i) {
@@ -523,7 +563,7 @@ class LearnerImpl : public Learner {
   }
 
   void SetParam(const std::string& key, const std::string& value) override {
-    configured_ = false;
+    this->need_configuration_ = true;
     if (key == kEvalMetric) {
       if (std::find(metric_names_.cbegin(), metric_names_.cend(),
                     value) == metric_names_.cend()) {
@@ -535,7 +575,6 @@ class LearnerImpl : public Learner {
   }
   // Short hand for setting multiple parameters
   void SetParams(std::vector<std::pair<std::string, std::string>> const& args) override {
-    configured_ = false;
     for (auto const& kv : args) {
       this->SetParam(kv.first, kv.second);
     }
@@ -569,7 +608,7 @@ class LearnerImpl : public Learner {
   }
 
   GenericParameter const& GetGenericParameter() const override {
-    return generic_param_;
+    return generic_parameters_;
   }
 
   void Predict(DMatrix* data, bool output_margin,
@@ -617,6 +656,7 @@ class LearnerImpl : public Learner {
   }
 
   void ConfigureObjective(LearnerTrainParam const& old, Args* p_args) {
+    // Once binary IO is gone, NONE of these config is useful.
     if (cfg_.find("num_class") != cfg_.cend() && cfg_.at("num_class") != "0") {
       cfg_["num_output_group"] = cfg_["num_class"];
       if (atoi(cfg_["num_class"].c_str()) > 1 && cfg_.count("objective") == 0) {
@@ -627,13 +667,13 @@ class LearnerImpl : public Learner {
     if (cfg_.find("max_delta_step") == cfg_.cend() &&
         cfg_.find("objective") != cfg_.cend() &&
         tparam_.objective == "count:poisson") {
+      // max_delta_step is a duplicated parameter in Poisson regression and tree param.
+      // Rename one of them once binary IO is gone.
       cfg_["max_delta_step"] = kMaxDeltaStepDefaultValue;
     }
     if (obj_ == nullptr || tparam_.objective != old.objective) {
-      obj_.reset(ObjFunction::Create(tparam_.objective, &generic_param_));
+      obj_.reset(ObjFunction::Create(tparam_.objective, &generic_parameters_));
     }
-    // reset the base score
-    mparam_.base_score = obj_->ProbToMargin(mparam_.base_score);
     auto& args = *p_args;
     args = {cfg_.cbegin(), cfg_.cend()};  // renew
     obj_->Configure(args);
@@ -645,7 +685,7 @@ class LearnerImpl : public Learner {
                         return m->Name() != name;
                       };
       if (std::all_of(metrics_.begin(), metrics_.end(), DupCheck)) {
-        metrics_.emplace_back(std::unique_ptr<Metric>(Metric::Create(name, &generic_param_)));
+        metrics_.emplace_back(std::unique_ptr<Metric>(Metric::Create(name, &generic_parameters_)));
         mparam_.contain_eval_metrics = 1;
       }
     }
@@ -656,8 +696,8 @@ class LearnerImpl : public Learner {
 
   void ConfigureGBM(LearnerTrainParam const& old, Args const& args) {
     if (gbm_ == nullptr || old.booster != tparam_.booster) {
-      gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_param_,
-                                         cache_, mparam_.base_score));
+      gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_parameters_,
+                                         &learner_model_param_, cache_));
     }
     gbm_->Configure(args);
   }
@@ -682,7 +722,8 @@ class LearnerImpl : public Learner {
     }
     CHECK_NE(mparam_.num_feature, 0)
         << "0 feature is supplied.  Are you using raw Booster interface?";
-    // setup
+    learner_model_param_.num_feature = mparam_.num_feature;
+    // Remove these once binary IO is gone.
     cfg_["num_feature"] = common::ToString(mparam_.num_feature);
     cfg_["num_class"] = common::ToString(mparam_.num_class);
   }
@@ -701,7 +742,8 @@ class LearnerImpl : public Learner {
   }
 
   // model parameter
-  LearnerModelParam mparam_;
+  LearnerModelParamLegacy mparam_;
+  LearnerModelParam learner_model_param_;
   LearnerTrainParam tparam_;
   // configurations
   std::map<std::string, std::string> cfg_;
@@ -713,8 +755,7 @@ class LearnerImpl : public Learner {
   std::map<DMatrix*, HostDeviceVector<bst_float>> preds_;
   // gradient pairs
   HostDeviceVector<GradientPair> gpair_;
-
-  bool configured_;
+  bool need_configuration_;
 
  private:
   /*! \brief random number transformation seed. */
diff --git a/src/linear/coordinate_common.h b/src/linear/coordinate_common.h
index 895198d44..43303f645 100644
--- a/src/linear/coordinate_common.h
+++ b/src/linear/coordinate_common.h
@@ -252,7 +252,7 @@ class CyclicFeatureSelector : public FeatureSelector {
   int NextFeature(int iteration, const gbm::GBLinearModel &model,
                   int group_idx, const std::vector<GradientPair> &gpair,
                   DMatrix *p_fmat, float alpha, float lambda) override {
-    return iteration % model.param.num_feature;
+    return iteration % model.learner_model_param_->num_feature;
   }
 };
 
@@ -266,7 +266,7 @@ class ShuffleFeatureSelector : public FeatureSelector {
              const std::vector<GradientPair> &gpair,
              DMatrix *p_fmat, float alpha, float lambda, int param) override {
     if (feat_index_.size() == 0) {
-      feat_index_.resize(model.param.num_feature);
+      feat_index_.resize(model.learner_model_param_->num_feature);
       std::iota(feat_index_.begin(), feat_index_.end(), 0);
     }
     std::shuffle(feat_index_.begin(), feat_index_.end(), common::GlobalRandom());
@@ -275,7 +275,7 @@ class ShuffleFeatureSelector : public FeatureSelector {
   int NextFeature(int iteration, const gbm::GBLinearModel &model,
                   int group_idx, const std::vector<GradientPair> &gpair,
                   DMatrix *p_fmat, float alpha, float lambda) override {
-    return feat_index_[iteration % model.param.num_feature];
+    return feat_index_[iteration % model.learner_model_param_->num_feature];
   }
 
  protected:
@@ -291,7 +291,7 @@ class RandomFeatureSelector : public FeatureSelector {
   int NextFeature(int iteration, const gbm::GBLinearModel &model,
                   int group_idx, const std::vector<GradientPair> &gpair,
                   DMatrix *p_fmat, float alpha, float lambda) override {
-    return common::GlobalRandom()() % model.param.num_feature;
+    return common::GlobalRandom()() % model.learner_model_param_->num_feature;
   }
 };
 
@@ -310,11 +310,11 @@ class GreedyFeatureSelector : public FeatureSelector {
              const std::vector<GradientPair> &gpair,
              DMatrix *p_fmat, float alpha, float lambda, int param) override {
     top_k_ = static_cast<bst_uint>(param);
-    const bst_uint ngroup = model.param.num_output_group;
+    const bst_uint ngroup = model.learner_model_param_->num_output_group;
     if (param <= 0) top_k_ = std::numeric_limits<bst_uint>::max();
     if (counter_.size() == 0) {
       counter_.resize(ngroup);
-      gpair_sums_.resize(model.param.num_feature * ngroup);
+      gpair_sums_.resize(model.learner_model_param_->num_feature * ngroup);
     }
     for (bst_uint gid = 0u; gid < ngroup; ++gid) {
       counter_[gid] = 0u;
@@ -327,10 +327,10 @@ class GreedyFeatureSelector : public FeatureSelector {
     // k-th selected feature for a group
     auto k = counter_[group_idx]++;
     // stop after either reaching top-K or going through all the features in a group
-    if (k >= top_k_ || counter_[group_idx] == model.param.num_feature) return -1;
+    if (k >= top_k_ || counter_[group_idx] == model.learner_model_param_->num_feature) return -1;
 
-    const int ngroup = model.param.num_output_group;
-    const bst_omp_uint nfeat = model.param.num_feature;
+    const int ngroup = model.learner_model_param_->num_output_group;
+    const bst_omp_uint nfeat = model.learner_model_param_->num_feature;
     // Calculate univariate gradient sums
     std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
   for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
@@ -387,8 +387,8 @@ class ThriftyFeatureSelector : public FeatureSelector {
              DMatrix *p_fmat, float alpha, float lambda, int param) override {
     top_k_ = static_cast<bst_uint>(param);
     if (param <= 0) top_k_ = std::numeric_limits<bst_uint>::max();
-    const bst_uint ngroup = model.param.num_output_group;
-    const bst_omp_uint nfeat = model.param.num_feature;
+    const bst_uint ngroup = model.learner_model_param_->num_output_group;
+    const bst_omp_uint nfeat = model.learner_model_param_->num_feature;
 
     if (deltaw_.size() == 0) {
       deltaw_.resize(nfeat * ngroup);
@@ -444,9 +444,9 @@ class ThriftyFeatureSelector : public FeatureSelector {
     // k-th selected feature for a group
     auto k = counter_[group_idx]++;
     // stop after either reaching top-N or going through all the features in a group
-    if (k >= top_k_ || counter_[group_idx] == model.param.num_feature) return -1;
+    if (k >= top_k_ || counter_[group_idx] == model.learner_model_param_->num_feature) return -1;
     // note that sorted_idx stores the "long" indices
-    const size_t grp_offset = group_idx * model.param.num_feature;
+    const size_t grp_offset = group_idx * model.learner_model_param_->num_feature;
     return static_cast<int>(sorted_idx_[grp_offset + k] - grp_offset);
   }
 
diff --git a/src/linear/updater_coordinate.cc b/src/linear/updater_coordinate.cc
index d6e7c1830..aeaf855f3 100644
--- a/src/linear/updater_coordinate.cc
+++ b/src/linear/updater_coordinate.cc
@@ -35,7 +35,7 @@ class CoordinateUpdater : public LinearUpdater {
   void Update(HostDeviceVector<GradientPair> *in_gpair, DMatrix *p_fmat,
               gbm::GBLinearModel *model, double sum_instance_weight) override {
     tparam_.DenormalizePenalties(sum_instance_weight);
-    const int ngroup = model->param.num_output_group;
+    const int ngroup = model->learner_model_param_->num_output_group;
     // update bias
     for (int group_idx = 0; group_idx < ngroup; ++group_idx) {
       auto grad = GetBiasGradientParallel(group_idx, ngroup,
@@ -52,7 +52,7 @@ class CoordinateUpdater : public LinearUpdater {
                     tparam_.reg_lambda_denorm, cparam_.top_k);
     // update weights
     for (int group_idx = 0; group_idx < ngroup; ++group_idx) {
-      for (unsigned i = 0U; i < model->param.num_feature; i++) {
+      for (unsigned i = 0U; i < model->learner_model_param_->num_feature; i++) {
         int fidx = selector_->NextFeature
           (i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
            tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
@@ -65,7 +65,7 @@ class CoordinateUpdater : public LinearUpdater {
 
   inline void UpdateFeature(int fidx, int group_idx, std::vector<GradientPair> *in_gpair,
                             DMatrix *p_fmat, gbm::GBLinearModel *model) {
-    const int ngroup = model->param.num_output_group;
+    const int ngroup = model->learner_model_param_->num_output_group;
     bst_float &w = (*model)[fidx][group_idx];
     auto gradient =
         GetGradientParallel(group_idx, ngroup, fidx, *in_gpair, p_fmat);
diff --git a/src/linear/updater_gpu_coordinate.cu b/src/linear/updater_gpu_coordinate.cu
index 08c5448d9..7eec2cc42 100644
--- a/src/linear/updater_gpu_coordinate.cu
+++ b/src/linear/updater_gpu_coordinate.cu
@@ -41,7 +41,7 @@ class GPUCoordinateUpdater : public LinearUpdater {  // NOLINT
     monitor_.Init("GPUCoordinateUpdater");
   }
 
-  void LazyInitDevice(DMatrix *p_fmat, const gbm::GBLinearModelParam &model_param) {
+  void LazyInitDevice(DMatrix *p_fmat, const LearnerModelParam &model_param) {
     if (learner_param_->gpu_id < 0) return;
 
     num_row_ = static_cast<size_t>(p_fmat->Info().num_row_);
@@ -88,14 +88,14 @@ class GPUCoordinateUpdater : public LinearUpdater {  // NOLINT
               gbm::GBLinearModel *model, double sum_instance_weight) override {
     tparam_.DenormalizePenalties(sum_instance_weight);
     monitor_.Start("LazyInitDevice");
-    this->LazyInitDevice(p_fmat, model->param);
+    this->LazyInitDevice(p_fmat, *(model->learner_model_param_));
     monitor_.Stop("LazyInitDevice");
 
     monitor_.Start("UpdateGpair");
     auto &in_gpair_host = in_gpair->ConstHostVector();
     // Update gpair
     if (learner_param_->gpu_id >= 0) {
-      this->UpdateGpair(in_gpair_host, model->param);
+      this->UpdateGpair(in_gpair_host);
     }
     monitor_.Stop("UpdateGpair");
 
@@ -107,8 +107,9 @@ class GPUCoordinateUpdater : public LinearUpdater {  // NOLINT
                      tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm,
                      coord_param_.top_k);
     monitor_.Start("UpdateFeature");
-    for (auto group_idx = 0; group_idx < model->param.num_output_group; ++group_idx) {
-      for (auto i = 0U; i < model->param.num_feature; i++) {
+    for (auto group_idx = 0; group_idx < model->learner_model_param_->num_output_group;
+         ++group_idx) {
+      for (auto i = 0U; i < model->learner_model_param_->num_feature; i++) {
         auto fidx = selector_->NextFeature(
             i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
             tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
@@ -120,11 +121,12 @@ class GPUCoordinateUpdater : public LinearUpdater {  // NOLINT
   }
 
   void UpdateBias(DMatrix *p_fmat, gbm::GBLinearModel *model) {
-    for (int group_idx = 0; group_idx < model->param.num_output_group; ++group_idx) {
+    for (int group_idx = 0; group_idx < model->learner_model_param_->num_output_group;
+         ++group_idx) {
       // Get gradient
       auto grad = GradientPair(0, 0);
       if (learner_param_->gpu_id >= 0) {
-        grad = GetBiasGradient(group_idx, model->param.num_output_group);
+        grad = GetBiasGradient(group_idx, model->learner_model_param_->num_output_group);
       }
       auto dbias = static_cast<float>(
           tparam_.learning_rate *
@@ -133,7 +135,7 @@ class GPUCoordinateUpdater : public LinearUpdater {  // NOLINT
 
       // Update residual
       if (learner_param_->gpu_id >= 0) {
-        UpdateBiasResidual(dbias, group_idx, model->param.num_output_group);
+        UpdateBiasResidual(dbias, group_idx, model->learner_model_param_->num_output_group);
       }
     }
   }
@@ -145,7 +147,7 @@ class GPUCoordinateUpdater : public LinearUpdater {  // NOLINT
     // Get gradient
     auto grad = GradientPair(0, 0);
     if (learner_param_->gpu_id >= 0) {
-      grad = GetGradient(group_idx, model->param.num_output_group, fidx);
+      grad = GetGradient(group_idx, model->learner_model_param_->num_output_group, fidx);
     }
     auto dw = static_cast<float>(tparam_.learning_rate *
                                  CoordinateDelta(grad.GetGrad(), grad.GetHess(),
@@ -154,7 +156,7 @@ class GPUCoordinateUpdater : public LinearUpdater {  // NOLINT
     w += dw;
 
     if (learner_param_->gpu_id >= 0) {
-      UpdateResidual(dw, group_idx, model->param.num_output_group, fidx);
+      UpdateResidual(dw, group_idx, model->learner_model_param_->num_output_group, fidx);
     }
   }
 
@@ -217,8 +219,7 @@ class GPUCoordinateUpdater : public LinearUpdater {  // NOLINT
     return num_row_ == 0;
   }
 
-  void UpdateGpair(const std::vector<GradientPair> &host_gpair,
-                   const gbm::GBLinearModelParam &model_param) {
+  void UpdateGpair(const std::vector<GradientPair> &host_gpair) {
     dh::safe_cuda(cudaMemcpyAsync(
         gpair_.data(),
         host_gpair.data(),
diff --git a/src/linear/updater_shotgun.cc b/src/linear/updater_shotgun.cc
index 76558cf2c..5fcd27bd9 100644
--- a/src/linear/updater_shotgun.cc
+++ b/src/linear/updater_shotgun.cc
@@ -27,7 +27,7 @@ class ShotgunUpdater : public LinearUpdater {
               gbm::GBLinearModel *model, double sum_instance_weight) override {
     auto &gpair = in_gpair->HostVector();
     param_.DenormalizePenalties(sum_instance_weight);
-    const int ngroup = model->param.num_output_group;
+    const int ngroup = model->learner_model_param_->num_output_group;
 
     // update bias
     for (int gid = 0; gid < ngroup; ++gid) {
diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc
index f9db9bf70..bc14f4ccd 100644
--- a/src/predictor/cpu_predictor.cc
+++ b/src/predictor/cpu_predictor.cc
@@ -49,9 +49,9 @@ class CPUPredictor : public Predictor {
   void PredLoopInternal(DMatrix* p_fmat, std::vector<bst_float>* out_preds,
                         gbm::GBTreeModel const& model, int32_t tree_begin,
                         int32_t tree_end) {
-    int32_t const num_group = model.param.num_output_group;
+    int32_t const num_group = model.learner_model_param_->num_output_group;
     const int nthread = omp_get_max_threads();
-    InitThreadTemp(nthread, model.param.num_feature);
+    InitThreadTemp(nthread, model.learner_model_param_->num_feature);
     std::vector<bst_float>& preds = *out_preds;
     CHECK_EQ(model.param.size_leaf_vector, 0)
         << "size_leaf_vector is enforced to 0 so far";
@@ -108,7 +108,7 @@ class CPUPredictor : public Predictor {
                         unsigned ntree_limit) const {
     CHECK(cache_);
     if (ntree_limit == 0 ||
-        ntree_limit * model.param.num_output_group >= model.trees.size()) {
+        ntree_limit * model.learner_model_param_->num_output_group >= model.trees.size()) {
       auto it = cache_->find(dmat);
       if (it != cache_->end()) {
         const HostDeviceVector<bst_float>& y = it->second.predictions;
@@ -126,8 +126,8 @@ class CPUPredictor : public Predictor {
   void InitOutPredictions(const MetaInfo& info,
                           HostDeviceVector<bst_float>* out_preds,
                           const gbm::GBTreeModel& model) const {
-    CHECK_NE(model.param.num_output_group, 0);
-    size_t n = model.param.num_output_group * info.num_row_;
+    CHECK_NE(model.learner_model_param_->num_output_group, 0);
+    size_t n = model.learner_model_param_->num_output_group * info.num_row_;
     const auto& base_margin = info.base_margin_.HostVector();
     out_preds->Resize(n);
     std::vector<bst_float>& out_preds_h = out_preds->HostVector();
@@ -139,19 +139,19 @@ class CPUPredictor : public Predictor {
         std::ostringstream oss;
         oss << "Ignoring the base margin, since it has incorrect length. "
             << "The base margin must be an array of length ";
-        if (model.param.num_output_group > 1) {
+        if (model.learner_model_param_->num_output_group > 1) {
           oss << "[num_class] * [number of data points], i.e. "
-              << model.param.num_output_group << " * " << info.num_row_
+              << model.learner_model_param_->num_output_group << " * " << info.num_row_
               << " = " << n << ". ";
         } else {
           oss << "[number of data points], i.e. " << info.num_row_ << ". ";
         }
         oss << "Instead, all data points will use "
-            << "base_margin = " << model.base_margin;
+            << "base_score = " << model.learner_model_param_->base_score;
         LOG(WARNING) << oss.str();
       }
       std::fill(out_preds_h.begin(), out_preds_h.end(),
-                model.base_margin);
+                model.learner_model_param_->base_score);
     }
   }
 
@@ -167,7 +167,7 @@ class CPUPredictor : public Predictor {
     }
     this->InitOutPredictions(dmat->Info(), out_preds, model);
 
-    ntree_limit *= model.param.num_output_group;
+    ntree_limit *= model.learner_model_param_->num_output_group;
     if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
       ntree_limit = static_cast<unsigned>(model.trees.size());
     }
@@ -198,7 +198,7 @@ class CPUPredictor : public Predictor {
         InitOutPredictions(e.data->Info(), &(e.predictions), model);
         PredLoopInternal(e.data.get(), &(e.predictions.HostVector()), model, 0,
                          model.trees.size());
-      } else if (model.param.num_output_group == 1 && updaters->size() > 0 &&
+      } else if (model.learner_model_param_->num_output_group == 1 && updaters->size() > 0 &&
                  num_new_trees == 1 &&
                  updaters->back()->UpdatePredictionCache(e.data.get(),
                                                          &(e.predictions))) {
@@ -215,29 +215,29 @@ class CPUPredictor : public Predictor {
                        const gbm::GBTreeModel& model, unsigned ntree_limit) override {
     if (thread_temp.size() == 0) {
       thread_temp.resize(1, RegTree::FVec());
-      thread_temp[0].Init(model.param.num_feature);
+      thread_temp[0].Init(model.learner_model_param_->num_feature);
     }
-    ntree_limit *= model.param.num_output_group;
+    ntree_limit *= model.learner_model_param_->num_output_group;
     if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
       ntree_limit = static_cast<unsigned>(model.trees.size());
     }
-    out_preds->resize(model.param.num_output_group *
+    out_preds->resize(model.learner_model_param_->num_output_group *
                       (model.param.size_leaf_vector + 1));
     // loop over output groups
-    for (uint32_t gid = 0; gid < model.param.num_output_group; ++gid) {
+    for (uint32_t gid = 0; gid < model.learner_model_param_->num_output_group; ++gid) {
       (*out_preds)[gid] =
           PredValue(inst, model.trees, model.tree_info, gid,
                     &thread_temp[0], 0, ntree_limit) +
-          model.base_margin;
+          model.learner_model_param_->base_score;
     }
   }
   void PredictLeaf(DMatrix* p_fmat, std::vector<bst_float>* out_preds,
                    const gbm::GBTreeModel& model, unsigned ntree_limit) override {
     const int nthread = omp_get_max_threads();
-    InitThreadTemp(nthread, model.param.num_feature);
+    InitThreadTemp(nthread, model.learner_model_param_->num_feature);
     const MetaInfo& info = p_fmat->Info();
     // number of valid trees
-    ntree_limit *= model.param.num_output_group;
+    ntree_limit *= model.learner_model_param_->num_output_group;
     if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
       ntree_limit = static_cast<unsigned>(model.trees.size());
     }
@@ -268,20 +268,20 @@ class CPUPredictor : public Predictor {
                            bool approximate, int condition,
                            unsigned condition_feature) override {
     const int nthread = omp_get_max_threads();
-    InitThreadTemp(nthread,  model.param.num_feature);
+    InitThreadTemp(nthread,  model.learner_model_param_->num_feature);
     const MetaInfo& info = p_fmat->Info();
     // number of valid trees
-    ntree_limit *= model.param.num_output_group;
+    ntree_limit *= model.learner_model_param_->num_output_group;
     if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
       ntree_limit = static_cast<unsigned>(model.trees.size());
     }
-    const int ngroup = model.param.num_output_group;
+    const int ngroup = model.learner_model_param_->num_output_group;
     CHECK_NE(ngroup, 0);
-    size_t const ncolumns = model.param.num_feature + 1;
+    size_t const ncolumns = model.learner_model_param_->num_feature + 1;
     CHECK_NE(ncolumns, 0);
     // allocate space for (number of features + bias) times the number of rows
     std::vector<bst_float>& contribs = *out_contribs;
-    contribs.resize(info.num_row_ * ncolumns * model.param.num_output_group);
+    contribs.resize(info.num_row_ * ncolumns * model.learner_model_param_->num_output_group);
     // make sure contributions is zeroed, we could be reusing a previously
     // allocated one
     std::fill(contribs.begin(), contribs.end(), 0);
@@ -298,8 +298,8 @@ class CPUPredictor : public Predictor {
 #pragma omp parallel for schedule(static)
       for (bst_omp_uint i = 0; i < nsize; ++i) {
         auto row_idx = static_cast<size_t>(batch.base_rowid + i);
-        std::vector<bst_float> this_tree_contribs(ncolumns);
         RegTree::FVec& feats = thread_temp[omp_get_thread_num()];
+        std::vector<bst_float> this_tree_contribs(ncolumns);
         // loop over all classes
         for (int gid = 0; gid < ngroup; ++gid) {
           bst_float* p_contribs = &contribs[(row_idx * ngroup + gid) * ncolumns];
@@ -326,7 +326,7 @@ class CPUPredictor : public Predictor {
           if (base_margin.size() != 0) {
             p_contribs[ncolumns - 1] += base_margin[row_idx * ngroup + gid];
           } else {
-            p_contribs[ncolumns - 1] += model.base_margin;
+            p_contribs[ncolumns - 1] += model.learner_model_param_->base_score;
           }
         }
       }
@@ -338,8 +338,8 @@ class CPUPredictor : public Predictor {
                                        std::vector<bst_float>* tree_weights,
                                        bool approximate) override {
     const MetaInfo& info = p_fmat->Info();
-    const int ngroup = model.param.num_output_group;
-    size_t const ncolumns = model.param.num_feature;
+    const int ngroup = model.learner_model_param_->num_output_group;
+    size_t const ncolumns = model.learner_model_param_->num_feature;
     const unsigned row_chunk = ngroup * (ncolumns + 1) * (ncolumns + 1);
     const unsigned mrow_chunk = (ncolumns + 1) * (ncolumns + 1);
     const unsigned crow_chunk = ngroup * (ncolumns + 1);
diff --git a/src/predictor/gpu_predictor.cu b/src/predictor/gpu_predictor.cu
index 9cc91b966..92baa4ebb 100644
--- a/src/predictor/gpu_predictor.cu
+++ b/src/predictor/gpu_predictor.cu
@@ -217,7 +217,7 @@ class GPUPredictor : public xgboost::Predictor {
                                   cudaMemcpyHostToDevice));
     this->tree_begin_ = tree_begin;
     this->tree_end_ = tree_end;
-    this->num_group_ = model.param.num_output_group;
+    this->num_group_ = model.learner_model_param_->num_output_group;
   }
 
   void PredictInternal(const SparsePage& batch,
@@ -286,9 +286,9 @@ class GPUPredictor : public xgboost::Predictor {
     for (auto &batch : dmat->GetBatches<SparsePage>()) {
       batch.offset.SetDevice(generic_param_->gpu_id);
       batch.data.SetDevice(generic_param_->gpu_id);
-      PredictInternal(batch, model.param.num_feature,
+      PredictInternal(batch, model.learner_model_param_->num_feature,
                       out_preds, batch_offset);
-      batch_offset += batch.Size() * model.param.num_output_group;
+      batch_offset += batch.Size() * model.learner_model_param_->num_output_group;
     }
 
     monitor_.StopCuda("DevicePredictInternal");
@@ -317,7 +317,7 @@ class GPUPredictor : public xgboost::Predictor {
     }
     this->InitOutPredictions(dmat->Info(), out_preds, model);
 
-    int32_t tree_end = ntree_limit * model.param.num_output_group;
+    int32_t tree_end = ntree_limit * model.learner_model_param_->num_output_group;
 
     if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
       tree_end = static_cast<unsigned>(model.trees.size());
@@ -347,7 +347,7 @@ class GPUPredictor : public xgboost::Predictor {
   void InitOutPredictions(const MetaInfo& info,
                           HostDeviceVector<bst_float>* out_preds,
                           const gbm::GBTreeModel& model) const {
-    size_t n_classes = model.param.num_output_group;
+    size_t n_classes = model.learner_model_param_->num_output_group;
     size_t n = n_classes * info.num_row_;
     const HostDeviceVector<bst_float>& base_margin = info.base_margin_;
     out_preds->SetDevice(generic_param_->gpu_id);
@@ -356,14 +356,14 @@ class GPUPredictor : public xgboost::Predictor {
       CHECK_EQ(base_margin.Size(), n);
       out_preds->Copy(base_margin);
     } else {
-      out_preds->Fill(model.base_margin);
+      out_preds->Fill(model.learner_model_param_->base_score);
     }
   }
 
   bool PredictFromCache(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
                         const gbm::GBTreeModel& model, unsigned ntree_limit) {
     if (ntree_limit == 0 ||
-        ntree_limit * model.param.num_output_group >= model.trees.size()) {
+        ntree_limit * model.learner_model_param_->num_output_group >= model.trees.size()) {
       auto it = (*cache_).find(dmat);
       if (it != cache_->cend()) {
         const HostDeviceVector<bst_float>& y = it->second.predictions;
@@ -395,7 +395,7 @@ class GPUPredictor : public xgboost::Predictor {
         this->InitOutPredictions(dmat->Info(), &predictions, model);
       }
 
-      if (model.param.num_output_group == 1 && updaters->size() > 0 &&
+      if (model.learner_model_param_->num_output_group == 1 && updaters->size() > 0 &&
           num_new_trees == 1 &&
           updaters->back()->UpdatePredictionCache(e.data.get(), &predictions)) {
         // do nothing
diff --git a/src/tree/param.h b/src/tree/param.h
index 7197b384a..38f29edaa 100644
--- a/src/tree/param.h
+++ b/src/tree/param.h
@@ -1,5 +1,5 @@
 /*!
- * Copyright 2014 by Contributors
+ * Copyright 2014-2019 by Contributors
  * \file param.h
  * \brief training parameters, statistics used to support tree construction.
  * \author Tianqi Chen
diff --git a/src/tree/updater_gpu_common.cuh b/src/tree/updater_gpu_common.cuh
index 53bf3a0ae..dacb32f0a 100644
--- a/src/tree/updater_gpu_common.cuh
+++ b/src/tree/updater_gpu_common.cuh
@@ -39,7 +39,7 @@ struct GPUTrainingParam {
 using NodeIdT = int32_t;
 
 /** used to assign default id to a Node */
-static const int kUnusedNode = -1;
+static const bst_node_t kUnusedNode = -1;
 
 /**
  * @enum DefaultDirection node.cuh
diff --git a/tests/cpp/common/test_hist_util.cc b/tests/cpp/common/test_hist_util.cc
index 13368322c..4d5023707 100644
--- a/tests/cpp/common/test_hist_util.cc
+++ b/tests/cpp/common/test_hist_util.cc
@@ -11,12 +11,12 @@ namespace common {
 
 TEST(CutsBuilder, SearchGroupInd) {
   size_t constexpr kNumGroups = 4;
-  size_t constexpr kNumRows = 17;
-  size_t constexpr kNumCols = 15;
+  size_t constexpr kRows = 17;
+  size_t constexpr kCols = 15;
 
-  auto pp_mat = CreateDMatrix(kNumRows, kNumCols, 0);
+  auto pp_dmat = CreateDMatrix(kRows, kCols, 0);
+  std::shared_ptr<DMatrix> p_mat {*pp_dmat};
 
-  auto& p_mat = *pp_mat;
   std::vector<bst_int> group(kNumGroups);
   group[0] = 2;
   group[1] = 3;
@@ -36,7 +36,7 @@ TEST(CutsBuilder, SearchGroupInd) {
 
   EXPECT_ANY_THROW(CutsBuilder::SearchGroupIndFromRow(p_mat->Info().group_ptr_, 17));
 
-  delete pp_mat;
+  delete pp_dmat;
 }
 
 namespace {
@@ -52,12 +52,11 @@ TEST(SparseCuts, SingleThreadedBuild) {
   size_t constexpr kCols = 31;
   size_t constexpr kBins = 256;
 
-  // Dense matrix.
-  auto pp_mat = CreateDMatrix(kRows, kCols, 0);
-  DMatrix* p_fmat = (*pp_mat).get();
+  auto pp_dmat = CreateDMatrix(kRows, kCols, 0);
+  std::shared_ptr<DMatrix> p_fmat {*pp_dmat};
 
   common::GHistIndexMatrix hmat;
-  hmat.Init(p_fmat, kBins);
+  hmat.Init(p_fmat.get(), kBins);
 
   HistogramCuts cuts;
   SparseCuts indices(&cuts);
@@ -69,7 +68,7 @@ TEST(SparseCuts, SingleThreadedBuild) {
   ASSERT_EQ(hmat.cut.Values(), cuts.Values());
   ASSERT_EQ(hmat.cut.MinValues(), cuts.MinValues());
 
-  delete pp_mat;
+  delete pp_dmat;
 }
 
 TEST(SparseCuts, MultiThreadedBuild) {
diff --git a/tests/cpp/gbm/test_gbtree.cc b/tests/cpp/gbm/test_gbtree.cc
index 621b63658..22b972e43 100644
--- a/tests/cpp/gbm/test_gbtree.cc
+++ b/tests/cpp/gbm/test_gbtree.cc
@@ -12,62 +12,55 @@ TEST(GBTree, SelectTreeMethod) {
 
   GenericParameter generic_param;
   generic_param.UpdateAllowUnknown(Args{});
+  LearnerModelParam mparam;
+  mparam.base_score = 0.5;
+  mparam.num_feature = kCols;
+  mparam.num_output_group = 1;
+
+  std::vector<std::shared_ptr<DMatrix> > caches;
   std::unique_ptr<GradientBooster> p_gbm{
-    GradientBooster::Create("gbtree", &generic_param, {}, 0)};
+    GradientBooster::Create("gbtree", &generic_param, &mparam, caches)};
   auto& gbtree = dynamic_cast<gbm::GBTree&> (*p_gbm);
 
   // Test if `tree_method` can be set
-  std::string n_feat = std::to_string(kCols);
-  Args args {{"tree_method", "approx"}, {"num_feature", n_feat}};
+  Args args {{"tree_method", "approx"}};
   gbtree.Configure({args.cbegin(), args.cend()});
 
   gbtree.Configure(args);
   auto const& tparam = gbtree.GetTrainParam();
-  gbtree.Configure({{"tree_method", "approx"}, {"num_feature", n_feat}});
+  gbtree.Configure({{"tree_method", "approx"}});
   ASSERT_EQ(tparam.updater_seq, "grow_histmaker,prune");
-  gbtree.Configure({{"tree_method", "exact"}, {"num_feature", n_feat}});
+  gbtree.Configure({{"tree_method", "exact"}});
   ASSERT_EQ(tparam.updater_seq, "grow_colmaker,prune");
-  gbtree.Configure({{"tree_method", "hist"}, {"num_feature", n_feat}});
+  gbtree.Configure({{"tree_method", "hist"}});
   ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker");
-  gbtree.Configure({{"booster", "dart"}, {"tree_method", "hist"},
-                    {"num_feature", n_feat}});
+  gbtree.Configure({{"booster", "dart"}, {"tree_method", "hist"}});
   ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker");
 
 #ifdef XGBOOST_USE_CUDA
   generic_param.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
-  gbtree.Configure({{"tree_method", "gpu_hist"}, {"num_feature", n_feat}});
+  gbtree.Configure({{"tree_method", "gpu_hist"}});
   ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
-  gbtree.Configure({{"booster", "dart"}, {"tree_method", "gpu_hist"},
-                    {"num_feature", n_feat}});
+  gbtree.Configure({{"booster", "dart"}, {"tree_method", "gpu_hist"}});
   ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
-#endif
+#endif  // XGBOOST_USE_CUDA
 }
 
 #ifdef XGBOOST_USE_CUDA
 TEST(GBTree, ChoosePredictor) {
-  size_t constexpr kNumRows = 17;
+  size_t constexpr kRows = 17;
   size_t constexpr kCols = 15;
-  auto pp_mat = CreateDMatrix(kNumRows, kCols, 0);
-  auto& p_mat = *pp_mat;
 
-  std::vector<bst_float> labels (kNumRows);
-  for (size_t i = 0; i < kNumRows; ++i) {
-    labels[i] = i % 2;
-  }
-  p_mat->Info().SetInfo("label", labels.data(), DataType::kFloat32, kNumRows);
+  auto pp_dmat = CreateDMatrix(kRows, kCols, 0);
+  std::shared_ptr<DMatrix> p_dmat {*pp_dmat};
 
-  std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {p_mat};
-  std::string n_feat = std::to_string(kCols);
-  Args args {{"tree_method", "approx"}, {"num_feature", n_feat}};
-  GenericParameter generic_param;
-  generic_param.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
+  auto& data = (*(p_dmat->GetBatches<SparsePage>().begin())).data;
+  p_dmat->Info().labels_.Resize(kRows);
 
-  auto& data = (*(p_mat->GetBatches<SparsePage>().begin())).data;
-
-  auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
-  learner->SetParams(Args{{"tree_method", "gpu_hist"}});
+  auto learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));
+  learner->SetParams(Args{{"tree_method", "gpu_hist"}, {"gpu_id", "0"}});
   for (size_t i = 0; i < 4; ++i) {
-    learner->UpdateOneIter(i, p_mat.get());
+    learner->UpdateOneIter(i, p_dmat.get());
   }
   ASSERT_TRUE(data.HostCanWrite());
   dmlc::TemporaryDirectory tempdir;
@@ -79,14 +72,14 @@ TEST(GBTree, ChoosePredictor) {
   }
 
   // a new learner
-  learner = std::unique_ptr<Learner>(Learner::Create(mat));
+  learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));
   {
     std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r"));
     learner->Load(fi.get());
   }
   learner->SetParams(Args{{"tree_method", "gpu_hist"}, {"gpu_id", "0"}});
   for (size_t i = 0; i < 4; ++i) {
-    learner->UpdateOneIter(i, p_mat.get());
+    learner->UpdateOneIter(i, p_dmat.get());
   }
   ASSERT_TRUE(data.HostCanWrite());
 
@@ -96,10 +89,10 @@ TEST(GBTree, ChoosePredictor) {
   ASSERT_FALSE(data.HostCanWrite());
 
   // another new learner
-  learner = std::unique_ptr<Learner>(Learner::Create(mat));
+  learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));
   learner->SetParams(Args{{"tree_method", "gpu_hist"}, {"gpu_id", "0"}});
   for (size_t i = 0; i < 4; ++i) {
-    learner->UpdateOneIter(i, p_mat.get());
+    learner->UpdateOneIter(i, p_dmat.get());
   }
   // data is not pulled back into host
   ASSERT_FALSE(data.HostCanWrite());
diff --git a/tests/cpp/helpers.cc b/tests/cpp/helpers.cc
index ed6c5fa40..9aa9dd2cb 100644
--- a/tests/cpp/helpers.cc
+++ b/tests/cpp/helpers.cc
@@ -1,15 +1,17 @@
 /*!
- * Copyright 2016-2018 XGBoost contributors
+ * Copyright 2016-2019 XGBoost contributors
  */
 #include <dmlc/filesystem.h>
 #include <xgboost/logging.h>
+#include <xgboost/gbm.h>
 #include <xgboost/json.h>
-
 #include <gtest/gtest.h>
 
+#include <algorithm>
 #include <random>
 #include <cinttypes>
-#include "./helpers.h"
+
+#include "helpers.h"
 #include "xgboost/c_api.h"
 
 #include "../../src/data/simple_csr_source.h"
@@ -113,7 +115,6 @@ void CheckRankingObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,
   CheckObjFunctionImpl(obj, preds, labels, weights, info, out_grad, out_hess);
 }
 
-
 xgboost::bst_float GetMetricEval(xgboost::Metric * metric,
                                  xgboost::HostDeviceVector<xgboost::bst_float> preds,
                                  std::vector<xgboost::bst_float> labels,
@@ -192,6 +193,7 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrix(
   return dmat;
 }
 
+
 std::unique_ptr<DMatrix> CreateSparsePageDMatrixWithRC(
     size_t n_rows, size_t n_cols, size_t page_size, bool deterministic,
     const dmlc::TemporaryDirectory& tempdir) {
@@ -257,16 +259,42 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrixWithRC(
   }
 }
 
-gbm::GBTreeModel CreateTestModel() {
+gbm::GBTreeModel CreateTestModel(LearnerModelParam const* param) {
   std::vector<std::unique_ptr<RegTree>> trees;
   trees.push_back(std::unique_ptr<RegTree>(new RegTree));
   (*trees.back())[0].SetLeaf(1.5f);
   (*trees.back()).Stat(0).sum_hess = 1.0f;
-  gbm::GBTreeModel model(0.5);
+  gbm::GBTreeModel model(param);
   model.CommitModel(std::move(trees), 0);
-  model.param.num_output_group = 1;
-  model.base_margin = 0;
   return model;
 }
 
+std::unique_ptr<GradientBooster> CreateTrainedGBM(
+    std::string name, Args kwargs, size_t kRows, size_t kCols,
+    LearnerModelParam const* learner_model_param,
+    GenericParameter const* generic_param) {
+  std::unique_ptr<GradientBooster> gbm {
+    GradientBooster::Create(name, generic_param, learner_model_param, {})};
+  gbm->Configure(kwargs);
+  auto pp_dmat = CreateDMatrix(kRows, kCols, 0);
+  auto p_dmat = *pp_dmat;
+
+  std::vector<float> labels(kRows);
+  for (size_t i = 0; i < kRows; ++i) {
+    labels[i] = i;
+  }
+  p_dmat->Info().labels_.HostVector() = labels;
+  HostDeviceVector<GradientPair> gpair;
+  auto& h_gpair = gpair.HostVector();
+  h_gpair.resize(kRows);
+  for (size_t i = 0; i < kRows; ++i) {
+    h_gpair[i] = {static_cast<float>(i), 1};
+  }
+
+  gbm->DoBoost(p_dmat.get(), &gpair, nullptr);
+
+  delete pp_dmat;
+  return gbm;
+}
+
 }  // namespace xgboost
diff --git a/tests/cpp/helpers.h b/tests/cpp/helpers.h
index b5bbf0ed7..10101d929 100644
--- a/tests/cpp/helpers.h
+++ b/tests/cpp/helpers.h
@@ -21,6 +21,8 @@
 #include <xgboost/json.h>
 #include <xgboost/predictor.h>
 #include <xgboost/generic_parameters.h>
+#include <xgboost/c_api.h>
+#include <xgboost/learner.h>
 
 #include "../../src/common/common.h"
 #include "../../src/common/hist_util.h"
@@ -204,7 +206,12 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrixWithRC(
     size_t n_rows, size_t n_cols, size_t page_size, bool deterministic,
     const dmlc::TemporaryDirectory& tempdir = dmlc::TemporaryDirectory());
 
-gbm::GBTreeModel CreateTestModel();
+gbm::GBTreeModel CreateTestModel(LearnerModelParam const* param);
+
+std::unique_ptr<GradientBooster> CreateTrainedGBM(
+    std::string name, Args kwargs, size_t kRows, size_t kCols,
+    LearnerModelParam const* learner_model_param,
+    GenericParameter const* generic_param);
 
 inline GenericParameter CreateEmptyGenericParam(int gpu_id) {
   xgboost::GenericParameter tparam;
diff --git a/tests/cpp/linear/test_linear.cc b/tests/cpp/linear/test_linear.cc
index 1f0ff6e2c..dd42b38c4 100644
--- a/tests/cpp/linear/test_linear.cc
+++ b/tests/cpp/linear/test_linear.cc
@@ -8,20 +8,30 @@
 
 #include "../../../src/gbm/gblinear_model.h"
 
+namespace xgboost {
+
 TEST(Linear, shotgun) {
-  auto mat = xgboost::CreateDMatrix(10, 10, 0);
+  size_t constexpr kRows = 10;
+  size_t constexpr kCols = 10;
+
+  auto pp_dmat = xgboost::CreateDMatrix(kRows, kCols, 0);
+  auto p_fmat {*pp_dmat};
+
   auto lparam = xgboost::CreateEmptyGenericParam(GPUIDX);
+  LearnerModelParam mparam;
+  mparam.num_feature = kCols;
+  mparam.num_output_group = 1;
+  mparam.base_score = 0.5;
+
   {
     auto updater = std::unique_ptr<xgboost::LinearUpdater>(
         xgboost::LinearUpdater::Create("shotgun", &lparam));
     updater->Configure({{"eta", "1."}});
     xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
-        (*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0));
-    xgboost::gbm::GBLinearModel model;
-    model.param.num_feature = (*mat)->Info().num_col_;
-    model.param.num_output_group = 1;
+        p_fmat->Info().num_row_, xgboost::GradientPair(-5, 1.0));
+    xgboost::gbm::GBLinearModel model{&mparam};
     model.LazyInitModel();
-    updater->Update(&gpair, (*mat).get(), &model, gpair.Size());
+    updater->Update(&gpair, p_fmat.get(), &model, gpair.Size());
 
     ASSERT_EQ(model.bias()[0], 5.0f);
 
@@ -31,24 +41,35 @@ TEST(Linear, shotgun) {
         xgboost::LinearUpdater::Create("shotgun", &lparam));
     EXPECT_ANY_THROW(updater->Configure({{"feature_selector", "random"}}));
   }
-  delete mat;
+
+  delete pp_dmat;
 }
 
 TEST(Linear, coordinate) {
-  auto mat = xgboost::CreateDMatrix(10, 10, 0);
+  size_t constexpr kRows = 10;
+  size_t constexpr kCols = 10;
+
+  auto pp_dmat = xgboost::CreateDMatrix(kRows, kCols, 0);
+  auto p_fmat {*pp_dmat};
+
   auto lparam = xgboost::CreateEmptyGenericParam(GPUIDX);
+  LearnerModelParam mparam;
+  mparam.num_feature = kCols;
+  mparam.num_output_group = 1;
+  mparam.base_score = 0.5;
+
   auto updater = std::unique_ptr<xgboost::LinearUpdater>(
       xgboost::LinearUpdater::Create("coord_descent", &lparam));
   updater->Configure({{"eta", "1."}});
   xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
-      (*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0));
-  xgboost::gbm::GBLinearModel model;
-  model.param.num_feature = (*mat)->Info().num_col_;
-  model.param.num_output_group = 1;
+      p_fmat->Info().num_row_, xgboost::GradientPair(-5, 1.0));
+  xgboost::gbm::GBLinearModel model{&mparam};
   model.LazyInitModel();
-  updater->Update(&gpair, (*mat).get(), &model, gpair.Size());
+  updater->Update(&gpair, p_fmat.get(), &model, gpair.Size());
 
   ASSERT_EQ(model.bias()[0], 5.0f);
 
-  delete mat;
+  delete pp_dmat;
 }
+
+}  // namespace xgboost
diff --git a/tests/cpp/linear/test_linear.cu b/tests/cpp/linear/test_linear.cu
index 4affce7f5..4a2741680 100644
--- a/tests/cpp/linear/test_linear.cu
+++ b/tests/cpp/linear/test_linear.cu
@@ -8,16 +8,24 @@
 namespace xgboost {
 
 TEST(Linear, GPUCoordinate) {
-  auto mat = xgboost::CreateDMatrix(10, 10, 0);
+  size_t constexpr kRows = 10;
+  size_t constexpr kCols = 10;
+
+  auto mat = xgboost::CreateDMatrix(kRows, kCols, 0);
   auto lparam = CreateEmptyGenericParam(GPUIDX);
+
+  LearnerModelParam mparam;
+  mparam.num_feature = kCols;
+  mparam.num_output_group = 1;
+  mparam.base_score = 0.5;
+
   auto updater = std::unique_ptr<xgboost::LinearUpdater>(
       xgboost::LinearUpdater::Create("gpu_coord_descent", &lparam));
   updater->Configure({{"eta", "1."}});
   xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
       (*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0));
-  xgboost::gbm::GBLinearModel model;
-  model.param.num_feature = (*mat)->Info().num_col_;
-  model.param.num_output_group = 1;
+  xgboost::gbm::GBLinearModel model{&mparam};
+
   model.LazyInitModel();
   updater->Update(&gpair, (*mat).get(), &model, gpair.Size());
 
diff --git a/tests/cpp/predictor/test_cpu_predictor.cc b/tests/cpp/predictor/test_cpu_predictor.cc
index 38b4eba9d..6ef39f4e6 100644
--- a/tests/cpp/predictor/test_cpu_predictor.cc
+++ b/tests/cpp/predictor/test_cpu_predictor.cc
@@ -7,18 +7,23 @@
 #include "../../../src/gbm/gbtree_model.h"
 
 namespace xgboost {
-TEST(cpu_predictor, Test) {
+TEST(CpuPredictor, Basic) {
   auto lparam = CreateEmptyGenericParam(GPUIDX);
   auto cache = std::make_shared<std::unordered_map<DMatrix*, PredictionCacheEntry>>();
   std::unique_ptr<Predictor> cpu_predictor =
       std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &lparam, cache));
 
-  gbm::GBTreeModel model = CreateTestModel();
+  int kRows = 5;
+  int kCols = 5;
 
-  int n_row = 5;
-  int n_col = 5;
+  LearnerModelParam param;
+  param.num_feature = kCols;
+  param.base_score = 0.0;
+  param.num_output_group = 1;
 
-  auto dmat = CreateDMatrix(n_row, n_col, 0);
+  gbm::GBTreeModel model = CreateTestModel(&param);
+
+  auto dmat = CreateDMatrix(kRows, kCols, 0);
 
   // Test predict batch
   HostDeviceVector<float> out_predictions;
@@ -46,19 +51,32 @@ TEST(cpu_predictor, Test) {
   // Test predict contribution
   std::vector<float> out_contribution;
   cpu_predictor->PredictContribution((*dmat).get(), &out_contribution, model);
-  for (auto const& contri : out_contribution) {
-    ASSERT_EQ(contri, 1.5);
+  ASSERT_EQ(out_contribution.size(), kRows * (kCols + 1));
+  for (size_t i = 0; i < out_contribution.size(); ++i) {
+    auto const& contri = out_contribution[i];
+    // shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
+    if ((i+1) % (kCols+1) == 0) {
+      ASSERT_EQ(out_contribution.back(), 1.5f);
+    } else {
+      ASSERT_EQ(contri, 0);
+    }
   }
   // Test predict contribution (approximate method)
-  cpu_predictor->PredictContribution((*dmat).get(), &out_contribution, model, true);
-  for (auto const& contri : out_contribution) {
-    ASSERT_EQ(contri, 1.5);
+  cpu_predictor->PredictContribution((*dmat).get(), &out_contribution, model, 0, nullptr, true);
+  for (size_t i = 0; i < out_contribution.size(); ++i) {
+    auto const& contri = out_contribution[i];
+    // shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
+    if ((i+1) % (kCols+1) == 0) {
+      ASSERT_EQ(out_contribution.back(), 1.5f);
+    } else {
+      ASSERT_EQ(contri, 0);
+    }
   }
 
   delete dmat;
 }
 
-TEST(cpu_predictor, ExternalMemoryTest) {
+TEST(CpuPredictor, ExternalMemory) {
   dmlc::TemporaryDirectory tmpdir;
   std::string filename = tmpdir.path + "/big.libsvm";
   std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(12, 64, filename);
@@ -68,13 +86,18 @@ TEST(cpu_predictor, ExternalMemoryTest) {
   std::unique_ptr<Predictor> cpu_predictor =
       std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &lparam, cache));
 
-  gbm::GBTreeModel model = CreateTestModel();
+  LearnerModelParam param;
+  param.base_score = 0;
+  param.num_feature = dmat->Info().num_col_;
+  param.num_output_group = 1;
+
+  gbm::GBTreeModel model = CreateTestModel(&param);
 
   // Test predict batch
   HostDeviceVector<float> out_predictions;
   cpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
   std::vector<float> &out_predictions_h = out_predictions.HostVector();
-  EXPECT_EQ(out_predictions.Size(), dmat->Info().num_row_);
+  ASSERT_EQ(out_predictions.Size(), dmat->Info().num_row_);
   for (const auto& v : out_predictions_h) {
     ASSERT_EQ(v, 1.5);
   }
@@ -82,7 +105,7 @@ TEST(cpu_predictor, ExternalMemoryTest) {
   // Test predict leaf
   std::vector<float> leaf_out_predictions;
   cpu_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
-  EXPECT_EQ(leaf_out_predictions.size(), dmat->Info().num_row_);
+  ASSERT_EQ(leaf_out_predictions.size(), dmat->Info().num_row_);
   for (const auto& v : leaf_out_predictions) {
     ASSERT_EQ(v, 0);
   }
@@ -90,17 +113,30 @@ TEST(cpu_predictor, ExternalMemoryTest) {
   // Test predict contribution
   std::vector<float> out_contribution;
   cpu_predictor->PredictContribution(dmat.get(), &out_contribution, model);
-  EXPECT_EQ(out_contribution.size(), dmat->Info().num_row_);
-  for (const auto& v : out_contribution) {
-    ASSERT_EQ(v, 1.5);
+  ASSERT_EQ(out_contribution.size(), dmat->Info().num_row_ * (dmat->Info().num_col_ + 1));
+  for (size_t i = 0; i < out_contribution.size(); ++i) {
+    auto const& contri = out_contribution[i];
+    // shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
+    if ((i + 1) % (dmat->Info().num_col_ + 1) == 0) {
+      ASSERT_EQ(out_contribution.back(), 1.5f);
+    } else {
+      ASSERT_EQ(contri, 0);
+    }
   }
 
   // Test predict contribution (approximate method)
   std::vector<float> out_contribution_approximate;
-  cpu_predictor->PredictContribution(dmat.get(), &out_contribution_approximate, model, true);
-  EXPECT_EQ(out_contribution_approximate.size(), dmat->Info().num_row_);
-  for (const auto& v : out_contribution_approximate) {
-    ASSERT_EQ(v, 1.5);
+  cpu_predictor->PredictContribution(dmat.get(), &out_contribution_approximate, model, 0, nullptr, true);
+  ASSERT_EQ(out_contribution_approximate.size(),
+            dmat->Info().num_row_ * (dmat->Info().num_col_ + 1));
+  for (size_t i = 0; i < out_contribution.size(); ++i) {
+    auto const& contri = out_contribution[i];
+    // shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
+    if ((i + 1) % (dmat->Info().num_col_ + 1) == 0) {
+      ASSERT_EQ(out_contribution.back(), 1.5f);
+    } else {
+      ASSERT_EQ(contri, 0);
+    }
   }
 }
 }  // namespace xgboost
diff --git a/tests/cpp/predictor/test_gpu_predictor.cu b/tests/cpp/predictor/test_gpu_predictor.cu
index b997ecf23..5f2ba1b23 100644
--- a/tests/cpp/predictor/test_gpu_predictor.cu
+++ b/tests/cpp/predictor/test_gpu_predictor.cu
@@ -33,7 +33,7 @@ QueryBoosterConfigurationArguments(BoosterHandle handle) {
 namespace xgboost {
 namespace predictor {
 
-TEST(gpu_predictor, Test) {
+TEST(GpuPredictor, Basic) {
   auto cpu_lparam = CreateEmptyGenericParam(-1);
   auto gpu_lparam = CreateEmptyGenericParam(0);
   auto cache = std::make_shared<std::unordered_map<DMatrix*, PredictionCacheEntry>>();
@@ -50,8 +50,12 @@ TEST(gpu_predictor, Test) {
     int n_row = i, n_col = i;
     auto dmat = CreateDMatrix(n_row, n_col, 0);
 
-    gbm::GBTreeModel model = CreateTestModel();
-    model.param.num_feature = n_col;
+    LearnerModelParam param;
+    param.num_feature = n_col;
+    param.num_output_group = 1;
+    param.base_score = 0.5;
+
+    gbm::GBTreeModel model = CreateTestModel(&param);
 
     // Test predict batch
     HostDeviceVector<float> gpu_out_predictions;
@@ -76,10 +80,14 @@ TEST(gpu_predictor, ExternalMemoryTest) {
   std::unique_ptr<Predictor> gpu_predictor =
       std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam, cache));
   gpu_predictor->Configure({});
-  gbm::GBTreeModel model = CreateTestModel();
-  model.param.num_feature = 3;
+
+  LearnerModelParam param;
+  param.num_feature = 2;
   const int n_classes = 3;
-  model.param.num_output_group = n_classes;
+  param.num_output_group = n_classes;
+  param.base_score = 0.5;
+
+  gbm::GBTreeModel model = CreateTestModel(&param);
   std::vector<std::unique_ptr<DMatrix>> dmats;
   dmlc::TemporaryDirectory tmpdir;
   std::string file0 = tmpdir.path + "/big_0.libsvm";