Refactor configuration [Part II]. (#4577)

* Refactor configuration [Part II]. * General changes: ** Remove `Init` methods to avoid ambiguity. ** Remove `Configure(std::map<>)` to avoid redundant copying and prepare for parameter validation. (`std::vector` is returned from `InitAllowUnknown`). ** Add name to tree updaters for easier debugging. * Learner changes: ** Make `LearnerImpl` the only source of configuration. All configurations are stored and carried out by `LearnerImpl::Configure()`. ** Remove booster in C API. Originally kept for "compatibility reason", but did not state why. So here we just remove it. ** Add a `metric_names_` field in `LearnerImpl`. ** Remove `LazyInit`. Configuration will always be lazy. ** Run `Configure` before every iteration. * Predictor changes: ** Allocate both cpu and gpu predictor. ** Remove cpu_predictor from gpu_predictor. `GBTree` is now used to dispatch the predictor. ** Remove some GPU Predictor tests. * IO No IO changes. The binary model format stability is tested by comparing hashing value of save models between two commits
2019-07-20 08:34:56 -04:00 · 2019-07-20 08:34:56 -04:00 · f0064c07ab
commit f0064c07ab
parent ad1192e8a3
69 changed files with 669 additions and 761 deletions
--- a/include/xgboost/base.h
+++ b/include/xgboost/base.h
@ -10,6 +10,9 @@
 #include <dmlc/omp.h>
 #include <cmath>
 #include <iostream>
 #include <vector>
 #include <string>
 #include <utility>
 /*!
 * \brief string flag for R library, to leave hooks when needed.
@ -199,6 +202,8 @@ using GradientPairPrecise = detail::GradientPairInternal<double>;
 * associative. */
 using GradientPairInteger = detail::GradientPairInternal<int64_t>;
 using Args = std::vector<std::pair<std::string, std::string> >;
 /*! \brief small eps gap for minimum split decision. */
 const bst_float kRtEps = 1e-6f;
--- a/include/xgboost/gbm.h
+++ b/include/xgboost/gbm.h
@ -29,19 +29,11 @@ namespace xgboost {
 */
 class GradientBooster {
 protected:
-  LearnerTrainParam const* learner_param_;
+  GenericParameter const* learner_param_;
 public:
  /*! \brief virtual destructor */
  virtual ~GradientBooster() = default;
  /*!
   * \brief set configuration from pair iterators.
   * \param begin The beginning iterator.
   * \param end The end iterator.
   * \tparam PairIter iterator<std::pair<std::string, std::string> >
   */
  template<typename PairIter>
  inline void Configure(PairIter begin, PairIter end);
  /*!
   * \brief Set the configuration of gradient boosting.
   *  User must call configure once before InitModel and Training.
@ -159,18 +151,11 @@ class GradientBooster {
   */
  static GradientBooster* Create(
      const std::string& name,
-      LearnerTrainParam const* gparam,
+      GenericParameter const* gparam,
      const std::vector<std::shared_ptr<DMatrix> >& cache_mats,
      bst_float base_margin);
 };
 // implementing configure.
 template<typename PairIter>
 inline void GradientBooster::Configure(PairIter begin, PairIter end) {
  std::vector<std::pair<std::string, std::string> > vec(begin, end);
  this->Configure(vec);
 }
 /*!
 * \brief Registry entry for tree updater.
 */
--- a/include/xgboost/generic_parameters.h
+++ b/include/xgboost/generic_parameters.h
@ -11,36 +11,20 @@
 #include <string>
 namespace xgboost {
-
+struct GenericParameter : public dmlc::Parameter<GenericParameter> {
 enum class DataSplitMode : int {
  kAuto = 0, kCol = 1, kRow = 2
 };
 }  // namespace xgboost
 DECLARE_FIELD_ENUM_CLASS(xgboost::DataSplitMode);
 namespace xgboost {
 struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
  // stored random seed
  int seed;
  // whether seed the PRNG each iteration
  bool seed_per_iteration;
  // data split mode, can be row, col, or none.
  DataSplitMode dsplit;
  // number of threads to use if OpenMP is enabled
  // if equals 0, use system default
  int nthread;
  // flag to disable default metric
  int disable_default_eval_metric;
  // primary device.
  int gpu_id;
  // number of devices to use, -1 implies using all available devices.
  int n_gpus;
  std::string booster;
  // declare parameters
-  DMLC_DECLARE_PARAMETER(LearnerTrainParam) {
+  DMLC_DECLARE_PARAMETER(GenericParameter) {
    DMLC_DECLARE_FIELD(seed).set_default(0).describe(
        "Random number seed during training.");
    DMLC_DECLARE_FIELD(seed_per_iteration)
@ -49,17 +33,8 @@ struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
            "Seed PRNG determnisticly via iterator number, "
            "this option will be switched on automatically on distributed "
            "mode.");
    DMLC_DECLARE_FIELD(dsplit)
        .set_default(DataSplitMode::kAuto)
        .add_enum("auto", DataSplitMode::kAuto)
        .add_enum("col", DataSplitMode::kCol)
        .add_enum("row", DataSplitMode::kRow)
        .describe("Data split mode for distributed training.");
    DMLC_DECLARE_FIELD(nthread).set_default(0).describe(
        "Number of threads to use.");
    DMLC_DECLARE_FIELD(disable_default_eval_metric)
        .set_default(0)
        .describe("flag to disable default metric. Set to >0 to disable");
    DMLC_DECLARE_FIELD(gpu_id)
        .set_default(0)
        .describe("The primary GPU device ordinal.");
@ -69,9 +44,6 @@ struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
        .describe("Deprecated, please use distributed training with one "
                  "process per GPU. "
                  "Number of GPUs to use for multi-gpu algorithms.");
    DMLC_DECLARE_FIELD(booster)
        .set_default("gbtree")
        .describe("Gradient booster used for training.");
  }
 };
 }  // namespace xgboost
--- a/include/xgboost/learner.h
+++ b/include/xgboost/learner.h
@ -24,6 +24,7 @@
 #include <vector>
 namespace xgboost {
 /*!
 * \brief Learner class that does training and prediction.
 *  This is the user facing module of xgboost training.
@ -45,25 +46,9 @@ class Learner : public rabit::Serializable {
  /*! \brief virtual destructor */
  ~Learner() override = default;
  /*!
-   * \brief set configuration from pair iterators.
+   * \brief Configure Learner based on set parameters.
   * \param begin The beginning iterator.
   * \param end The end iterator.
   * \tparam PairIter iterator<std::pair<std::string, std::string> >
   */
-  template<typename PairIter>
+  virtual void Configure() = 0;
  inline void Configure(PairIter begin, PairIter end);
  /*!
   * \brief Set the configuration of gradient boosting.
   *  User must call configure once before InitModel and Training.
   *
   * \param cfg configurations on both training and model parameters.
   */
  virtual void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) = 0;
  /*!
   * \brief Initialize the model using the specified configurations via Configure.
   *  An model have to be either Loaded or initialized before Update/Predict/Save can be called.
   */
  virtual void InitModel() = 0;
  /*!
   * \brief load model from stream
   * \param fi input stream.
@ -121,10 +106,27 @@ class Learner : public rabit::Serializable {
                       bool pred_contribs = false,
                       bool approx_contribs = false,
                       bool pred_interactions = false) = 0;
  /*!
   * \brief Set multiple parameters at once.
   *
   * \param args parameters.
   */
  virtual void SetParams(Args const& args) = 0;
  /*!
   * \brief Set parameter for booster
   *
   *  The property will NOT be saved along with booster
   *
   * \param key   The key of parameter
   * \param value The value of parameter
   */
  virtual void SetParam(const std::string& key, const std::string& value) = 0;
  /*!
   * \brief Set additional attribute to the Booster.
   *
   *  The property will be saved along the booster.
   *
   * \param key The key of the property.
   * \param value The value of the property.
   */
@ -148,8 +150,6 @@ class Learner : public rabit::Serializable {
   * \return vector of attribute name strings.
   */
  virtual std::vector<std::string> GetAttrNames() const = 0;
  virtual LearnerTrainParam const& GetLearnerTrainParameter() const = 0;
  /*!
   * \return whether the model allow lazy checkpoint in rabit.
   */
@ -161,24 +161,9 @@ class Learner : public rabit::Serializable {
   * \param format the format to dump the model in
   * \return a vector of dump for boosters.
   */
-  std::vector<std::string> DumpModel(const FeatureMap& fmap,
+  virtual std::vector<std::string> DumpModel(const FeatureMap& fmap,
-                                     bool with_stats,
+                                             bool with_stats,
-                                     std::string format) const;
+                                             std::string format) const = 0;
  /*!
   * \brief online prediction function, predict score for one instance at a time
   *  NOTE: use the batch prediction interface if possible, batch prediction is usually
   *        more efficient than online prediction
   *        This function is NOT threadsafe, make sure you only call from one thread.
   *
   * \param inst the instance you want to predict
   * \param output_margin whether to only predict margin value instead of transformed prediction
   * \param out_preds output vector to hold the predictions
   * \param ntree_limit limit the number of trees used in prediction
   */
  inline void Predict(const SparsePage::Inst &inst,
                      bool output_margin,
                      HostDeviceVector<bst_float> *out_preds,
                      unsigned ntree_limit = 0) const;
  /*!
   * \brief Create a new instance of learner.
   * \param cache_data The matrix to cache the prediction.
@ -186,6 +171,7 @@ class Learner : public rabit::Serializable {
   */
  static Learner* Create(const std::vector<std::shared_ptr<DMatrix> >& cache_data);
  virtual GenericParameter const& GetGenericParameter() const = 0;
  /*!
   * \brief Get configuration arguments currently stored by the learner
   * \return Key-value pairs representing configuration arguments
@ -202,26 +188,8 @@ class Learner : public rabit::Serializable {
  /*! \brief The evaluation metrics used to evaluate the model. */
  std::vector<std::unique_ptr<Metric> > metrics_;
  /*! \brief Training parameter. */
-  LearnerTrainParam tparam_;
+  GenericParameter generic_param_;
 };
 // implementation of inline functions.
 inline void Learner::Predict(const SparsePage::Inst& inst,
                             bool output_margin,
                             HostDeviceVector<bst_float>* out_preds,
                             unsigned ntree_limit) const {
  gbm_->PredictInstance(inst, &out_preds->HostVector(), ntree_limit);
  if (!output_margin) {
    obj_->PredTransform(out_preds);
  }
 }
 // implementing configure.
 template<typename PairIter>
 inline void Learner::Configure(PairIter begin, PairIter end) {
  std::vector<std::pair<std::string, std::string> > vec(begin, end);
  this->Configure(vec);
 }
 }  // namespace xgboost
 #endif  // XGBOOST_LEARNER_H_
--- a/include/xgboost/linear_updater.h
+++ b/include/xgboost/linear_updater.h
@ -20,7 +20,7 @@ namespace xgboost {
 */
 class LinearUpdater {
 protected:
-  LearnerTrainParam const* learner_param_;
+  GenericParameter const* learner_param_;
 public:
  /*! \brief virtual destructor */
@ -29,7 +29,7 @@ class LinearUpdater {
   * \brief Initialize the updater with given arguments.
   * \param args arguments to the objective function.
   */
-  virtual void Init(
+  virtual void Configure(
      const std::vector<std::pair<std::string, std::string> >& args) = 0;
  /**
@ -40,7 +40,6 @@ class LinearUpdater {
   * \param model               Model to be updated.
   * \param sum_instance_weight The sum instance weights, used to normalise l1/l2 penalty.
   */
  virtual void Update(HostDeviceVector<GradientPair>* in_gpair, DMatrix* data,
                      gbm::GBLinearModel* model,
                      double sum_instance_weight) = 0;
@ -49,7 +48,7 @@ class LinearUpdater {
   * \brief Create a linear updater given name
   * \param name Name of the linear updater.
   */
-  static LinearUpdater* Create(const std::string& name, LearnerTrainParam const*);
+  static LinearUpdater* Create(const std::string& name, GenericParameter const*);
 };
 /*!
--- a/include/xgboost/logging.h
+++ b/include/xgboost/logging.h
@ -66,14 +66,9 @@ class ConsoleLogger : public BaseLogger {
  static ConsoleLoggerParam param_;
  LogVerbosity cur_verbosity_;
  static void Configure(const std::map<std::string, std::string>& args);
 public:
-  template <typename ArgIter>
+  static void Configure(Args const& args);
  static void Configure(ArgIter begin, ArgIter end) {
    std::map<std::string, std::string> args(begin, end);
    Configure(args);
  }
  static LogVerbosity GlobalVerbosity();
  static LogVerbosity DefaultVerbosity();
--- a/include/xgboost/metric.h
+++ b/include/xgboost/metric.h
@ -26,7 +26,7 @@ namespace xgboost {
 */
 class Metric {
 protected:
-  LearnerTrainParam const* tparam_;
+  GenericParameter const* tparam_;
 public:
  /*!
@ -35,17 +35,6 @@ class Metric {
   */
  virtual void Configure(
      const std::vector<std::pair<std::string, std::string> >& args) {}
  /*!
   * \brief set configuration from pair iterators.
   * \param begin The beginning iterator.
   * \param end The end iterator.
   * \tparam PairIter iterator<std::pair<std::string, std::string> >
   */
  template<typename PairIter>
  inline void Configure(PairIter begin, PairIter end) {
    std::vector<std::pair<std::string, std::string> > vec(begin, end);
    this->Configure(vec);
  }
  /*!
   * \brief evaluate a specific metric
   * \param preds prediction
@ -68,7 +57,7 @@ class Metric {
   *  and the name will be matched in the registry.
   * \return the created metric.
   */
-  static Metric* Create(const std::string& name, LearnerTrainParam const* tparam);
+  static Metric* Create(const std::string& name, GenericParameter const* tparam);
 };
 /*!
--- a/include/xgboost/objective.h
+++ b/include/xgboost/objective.h
@ -24,19 +24,11 @@ namespace xgboost {
 /*! \brief interface of objective function */
 class ObjFunction {
 protected:
-  LearnerTrainParam const* tparam_;
+  GenericParameter const* tparam_;
 public:
  /*! \brief virtual destructor */
  virtual ~ObjFunction() = default;
  /*!
   * \brief set configuration from pair iterators.
   * \param begin The beginning iterator.
   * \param end The end iterator.
   * \tparam PairIter iterator<std::pair<std::string, std::string> >
   */
  template<typename PairIter>
  inline void Configure(PairIter begin, PairIter end);
  /*!
   * \brief Configure the objective with the specified parameters.
   * \param args arguments to the objective function.
@ -85,16 +77,9 @@ class ObjFunction {
   * \param tparam Generic parameters.
   * \param name Name of the objective.
   */
-  static ObjFunction* Create(const std::string& name, LearnerTrainParam const* tparam);
+  static ObjFunction* Create(const std::string& name, GenericParameter const* tparam);
 };
 // implementing configure.
 template<typename PairIter>
 inline void ObjFunction::Configure(PairIter begin, PairIter end) {
  std::vector<std::pair<std::string, std::string> > vec(begin, end);
  this->Configure(vec);
 }
 /*!
 * \brief Registry entry for objective factory functions.
 */
--- a/include/xgboost/predictor.h
+++ b/include/xgboost/predictor.h
@ -40,7 +40,7 @@ namespace xgboost {
 class Predictor {
 protected:
-  LearnerTrainParam const* learner_param_;
+  GenericParameter const* learner_param_;
 public:
  virtual ~Predictor() = default;
@ -55,8 +55,8 @@ class Predictor {
   * \param cache Vector of DMatrix's to be used in prediction.
   */
-  virtual void Init(const std::vector<std::pair<std::string, std::string>>& cfg,
+  virtual void Configure(const std::vector<std::pair<std::string, std::string>>& cfg,
-                    const std::vector<std::shared_ptr<DMatrix>>& cache);
+                         const std::vector<std::shared_ptr<DMatrix>>& cache);
  /**
   * \brief Generate batch predictions for a given feature matrix. May use
@ -174,7 +174,7 @@ class Predictor {
   *
   */
-  static Predictor* Create(std::string const& name, LearnerTrainParam const*);
+  static Predictor* Create(std::string const& name, GenericParameter const*);
 protected:
  /**
@ -191,7 +191,6 @@ class Predictor {
   * \brief Map of matrices and associated cached predictions to facilitate
   * storing and looking up predictions.
   */
  std::unordered_map<DMatrix*, PredictionCacheEntry> cache_;
 };
--- a/include/xgboost/tree_updater.h
+++ b/include/xgboost/tree_updater.h
@ -27,7 +27,7 @@ namespace xgboost {
 */
 class TreeUpdater {
 protected:
-  LearnerTrainParam const* tparam_;
+  GenericParameter const* tparam_;
 public:
  /*! \brief virtual destructor */
@ -36,7 +36,7 @@ class TreeUpdater {
   * \brief Initialize the updater with given arguments.
   * \param args arguments to the objective function.
   */
-  virtual void Init(const std::vector<std::pair<std::string, std::string> >& args) = 0;
+  virtual void Configure(const Args& args) = 0;
  /*!
   * \brief perform update to the tree models
   * \param gpair the gradient pair statistics of the data
@ -65,11 +65,13 @@ class TreeUpdater {
    return false;
  }
  virtual char const* Name() const = 0;
  /*!
   * \brief Create a tree updater given name
   * \param name Name of the tree updater.
   */
-  static TreeUpdater* Create(const std::string& name, LearnerTrainParam const* tparam);
+  static TreeUpdater* Create(const std::string& name, GenericParameter const* tparam);
 };
 /*!
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@ -24,82 +24,6 @@
 namespace xgboost {
 // booster wrapper for backward compatible reason.
 class Booster {
 public:
  explicit Booster(const std::vector<std::shared_ptr<DMatrix> >& cache_mats)
      : configured_(false),
        initialized_(false),
        learner_(Learner::Create(cache_mats)) {}
  inline Learner* learner() {  // NOLINT
    return learner_.get();
  }
  inline void SetParam(const std::string& name, const std::string& val) {
    auto it = std::find_if(cfg_.begin(), cfg_.end(),
      [&name, &val](decltype(*cfg_.begin()) &x) {
        if (name == "eval_metric") {
          return x.first == name && x.second == val;
        }
        return x.first == name;
      });
    if (it == cfg_.end()) {
      cfg_.emplace_back(name, val);
    } else {
      (*it).second = val;
    }
    if (configured_) {
      learner_->Configure(cfg_);
    }
  }
  inline void LazyInit() {
    if (!configured_) {
      LoadSavedParamFromAttr();
      learner_->Configure(cfg_);
      configured_ = true;
    }
    if (!initialized_) {
      learner_->InitModel();
      initialized_ = true;
    }
  }
  inline void LoadSavedParamFromAttr() {
    // Locate saved parameters from learner attributes
    const std::string prefix = "SAVED_PARAM_";
    for (const std::string& attr_name : learner_->GetAttrNames()) {
      if (attr_name.find(prefix) == 0) {
        const std::string saved_param = attr_name.substr(prefix.length());
        if (std::none_of(cfg_.begin(), cfg_.end(),
                         [&](const std::pair<std::string, std::string>& x)
                             { return x.first == saved_param; })) {
          // If cfg_ contains the parameter already, skip it
          //   (this is to allow the user to explicitly override its value)
          std::string saved_param_value;
          CHECK(learner_->GetAttr(attr_name, &saved_param_value));
          cfg_.emplace_back(saved_param, saved_param_value);
        }
      }
    }
  }
  inline void LoadModel(dmlc::Stream* fi) {
    learner_->Load(fi);
    initialized_ = true;
  }
  bool IsInitialized() const { return initialized_; }
  void Intialize() { initialized_ = true; }
 private:
  bool configured_;
  bool initialized_;
  std::unique_ptr<Learner> learner_;
  std::vector<std::pair<std::string, std::string> > cfg_;
 };
 // declare the data callback.
 XGB_EXTERN_C int XGBoostNativeDataIterSetData(
    void *handle, XGBoostBatchCSR batch);
@ -861,14 +785,14 @@ XGB_DLL int XGBoosterCreate(const DMatrixHandle dmats[],
  for (xgboost::bst_ulong i = 0; i < len; ++i) {
    mats.push_back(*static_cast<std::shared_ptr<DMatrix>*>(dmats[i]));
  }
-  *out = new Booster(mats);
+  *out = Learner::Create(mats);
  API_END();
 }
 XGB_DLL int XGBoosterFree(BoosterHandle handle) {
  API_BEGIN();
  CHECK_HANDLE();
-  delete static_cast<Booster*>(handle);
+  delete static_cast<Learner*>(handle);
  API_END();
 }
@ -877,7 +801,7 @@ XGB_DLL int XGBoosterSetParam(BoosterHandle handle,
                              const char *value) {
  API_BEGIN();
  CHECK_HANDLE();
-  static_cast<Booster*>(handle)->SetParam(name, value);
+  static_cast<Learner*>(handle)->SetParam(name, value);
  API_END();
 }
@ -886,12 +810,11 @@ XGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle,
                                   DMatrixHandle dtrain) {
  API_BEGIN();
  CHECK_HANDLE();
-  auto* bst = static_cast<Booster*>(handle);
+  auto* bst = static_cast<Learner*>(handle);
  auto *dtr =
      static_cast<std::shared_ptr<DMatrix>*>(dtrain);
-  bst->LazyInit();
+  bst->UpdateOneIter(iter, dtr->get());
  bst->learner()->UpdateOneIter(iter, dtr->get());
  API_END();
 }
@ -903,7 +826,7 @@ XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle,
  HostDeviceVector<GradientPair> tmp_gpair;
  API_BEGIN();
  CHECK_HANDLE();
-  auto* bst = static_cast<Booster*>(handle);
+  auto* bst = static_cast<Learner*>(handle);
  auto* dtr =
      static_cast<std::shared_ptr<DMatrix>*>(dtrain);
  tmp_gpair.Resize(len);
@ -912,8 +835,7 @@ XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle,
    tmp_gpair_h[i] = GradientPair(grad[i], hess[i]);
  }
-  bst->LazyInit();
+  bst->BoostOneIter(0, dtr->get(), &tmp_gpair);
  bst->learner()->BoostOneIter(0, dtr->get(), &tmp_gpair);
  API_END();
 }
@ -926,7 +848,7 @@ XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle,
  std::string& eval_str = XGBAPIThreadLocalStore::Get()->ret_str;
  API_BEGIN();
  CHECK_HANDLE();
-  auto* bst = static_cast<Booster*>(handle);
+  auto* bst = static_cast<Learner*>(handle);
  std::vector<DMatrix*> data_sets;
  std::vector<std::string> data_names;
@ -935,8 +857,7 @@ XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle,
    data_names.emplace_back(evnames[i]);
  }
-  bst->LazyInit();
+  eval_str = bst->EvalOneIter(iter, data_sets, data_names);
  eval_str = bst->learner()->EvalOneIter(iter, data_sets, data_names);
  *out_str = eval_str.c_str();
  API_END();
 }
@ -951,10 +872,9 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
    XGBAPIThreadLocalStore::Get()->ret_vec_float;
  API_BEGIN();
  CHECK_HANDLE();
-  auto *bst = static_cast<Booster*>(handle);
+  auto *bst = static_cast<Learner*>(handle);
  bst->LazyInit();
  HostDeviceVector<bst_float> tmp_preds;
-  bst->learner()->Predict(
+  bst->Predict(
      static_cast<std::shared_ptr<DMatrix>*>(dmat)->get(),
      (option_mask & 1) != 0,
      &tmp_preds, ntree_limit,
@ -972,7 +892,7 @@ XGB_DLL int XGBoosterLoadModel(BoosterHandle handle, const char* fname) {
  API_BEGIN();
  CHECK_HANDLE();
  std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname, "r"));
-  static_cast<Booster*>(handle)->LoadModel(fi.get());
+  static_cast<Learner*>(handle)->Load(fi.get());
  API_END();
 }
@ -980,9 +900,8 @@ XGB_DLL int XGBoosterSaveModel(BoosterHandle handle, const char* fname) {
  API_BEGIN();
  CHECK_HANDLE();
  std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname, "w"));
-  auto *bst = static_cast<Booster*>(handle);
+  auto *bst = static_cast<Learner*>(handle);
-  bst->LazyInit();
+  bst->Save(fo.get());
  bst->learner()->Save(fo.get());
  API_END();
 }
@ -992,7 +911,7 @@ XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle,
  API_BEGIN();
  CHECK_HANDLE();
  common::MemoryFixSizeBuffer fs((void*)buf, len);  // NOLINT(*)
-  static_cast<Booster*>(handle)->LoadModel(&fs);
+  static_cast<Learner*>(handle)->Load(&fs);
  API_END();
 }
@ -1005,9 +924,8 @@ XGB_DLL int XGBoosterGetModelRaw(BoosterHandle handle,
  API_BEGIN();
  CHECK_HANDLE();
  common::MemoryBufferStream fo(&raw_str);
-  auto *bst = static_cast<Booster*>(handle);
+  auto *bst = static_cast<Learner*>(handle);
-  bst->LazyInit();
+  bst->Save(&fo);
  bst->learner()->Save(&fo);
  *out_dptr = dmlc::BeginPtr(raw_str);
  *out_len = static_cast<xgboost::bst_ulong>(raw_str.length());
  API_END();
@ -1022,9 +940,8 @@ inline void XGBoostDumpModelImpl(
    const char*** out_models) {
  std::vector<std::string>& str_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_str;
  std::vector<const char*>& charp_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_charp;
-  auto *bst = static_cast<Booster*>(handle);
+  auto *bst = static_cast<Learner*>(handle);
-  bst->LazyInit();
+  str_vecs = bst->DumpModel(fmap, with_stats != 0, format);
  str_vecs = bst->learner()->DumpModel(fmap, with_stats != 0, format);
  charp_vecs.resize(str_vecs.size());
  for (size_t i = 0; i < str_vecs.size(); ++i) {
    charp_vecs[i] = str_vecs[i].c_str();
@ -1093,11 +1010,11 @@ XGB_DLL int XGBoosterGetAttr(BoosterHandle handle,
                     const char* key,
                     const char** out,
                     int* success) {
-  auto* bst = static_cast<Booster*>(handle);
+  auto* bst = static_cast<Learner*>(handle);
  std::string& ret_str = XGBAPIThreadLocalStore::Get()->ret_str;
  API_BEGIN();
  CHECK_HANDLE();
-  if (bst->learner()->GetAttr(key, &ret_str)) {
+  if (bst->GetAttr(key, &ret_str)) {
    *out = ret_str.c_str();
    *success = 1;
  } else {
@ -1108,28 +1025,28 @@ XGB_DLL int XGBoosterGetAttr(BoosterHandle handle,
 }
 XGB_DLL int XGBoosterSetAttr(BoosterHandle handle,
-                     const char* key,
+                             const char* key,
-                     const char* value) {
+                             const char* value) {
-  auto* bst = static_cast<Booster*>(handle);
+  auto* bst = static_cast<Learner*>(handle);
  API_BEGIN();
  CHECK_HANDLE();
  if (value == nullptr) {
-    bst->learner()->DelAttr(key);
+    bst->DelAttr(key);
  } else {
-    bst->learner()->SetAttr(key, value);
+    bst->SetAttr(key, value);
  }
  API_END();
 }
 XGB_DLL int XGBoosterGetAttrNames(BoosterHandle handle,
-                     xgboost::bst_ulong* out_len,
+                                  xgboost::bst_ulong* out_len,
-                     const char*** out) {
+                                  const char*** out) {
  std::vector<std::string>& str_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_str;
  std::vector<const char*>& charp_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_charp;
-  auto *bst = static_cast<Booster*>(handle);
+  auto *bst = static_cast<Learner*>(handle);
  API_BEGIN();
  CHECK_HANDLE();
-  str_vecs = bst->learner()->GetAttrNames();
+  str_vecs = bst->GetAttrNames();
  charp_vecs.resize(str_vecs.size());
  for (size_t i = 0; i < str_vecs.size(); ++i) {
    charp_vecs[i] = str_vecs[i].c_str();
@ -1140,13 +1057,13 @@ XGB_DLL int XGBoosterGetAttrNames(BoosterHandle handle,
 }
 XGB_DLL int XGBoosterLoadRabitCheckpoint(BoosterHandle handle,
-                                 int* version) {
+                                         int* version) {
  API_BEGIN();
  CHECK_HANDLE();
-  auto* bst = static_cast<Booster*>(handle);
+  auto* bst = static_cast<Learner*>(handle);
-  *version = rabit::LoadCheckPoint(bst->learner());
+  *version = rabit::LoadCheckPoint(bst);
  if (*version != 0) {
-    bst->Intialize();
+    bst->Configure();
  }
  API_END();
 }
@ -1154,23 +1071,14 @@ XGB_DLL int XGBoosterLoadRabitCheckpoint(BoosterHandle handle,
 XGB_DLL int XGBoosterSaveRabitCheckpoint(BoosterHandle handle) {
  API_BEGIN();
  CHECK_HANDLE();
-  auto* bst = static_cast<Booster*>(handle);
+  auto* bst = static_cast<Learner*>(handle);
-  if (bst->learner()->AllowLazyCheckPoint()) {
+  if (bst->AllowLazyCheckPoint()) {
-    rabit::LazyCheckPoint(bst->learner());
+    rabit::LazyCheckPoint(bst);
  } else {
-    rabit::CheckPoint(bst->learner());
+    rabit::CheckPoint(bst);
  }
  API_END();
 }
 /* hidden method; only known to C++ test suite */
 const std::map<std::string, std::string>&
 QueryBoosterConfigurationArguments(BoosterHandle handle) {
  CHECK_HANDLE();
  auto* bst = static_cast<Booster*>(handle);
  bst->LazyInit();
  return bst->learner()->GetConfigurationArguments();
 }
 // force link rabit
 static DMLC_ATTRIBUTE_UNUSED int XGBOOST_LINK_RABIT_C_API_ = RabitLinkTag();
--- a/src/cli_main.cc
+++ b/src/cli_main.cc
@ -1,5 +1,5 @@
 /*!
- * Copyright 2014 by Contributors
+ * Copyright 2014-2019 by Contributors
 * \file cli_main.cc
 * \brief The command line interface program of xgboost.
 *  This file is not included in dynamic library.
@ -188,10 +188,9 @@ void CLITrain(const CLIParam& param) {
      std::unique_ptr<dmlc::Stream> fi(
          dmlc::Stream::Create(param.model_in.c_str(), "r"));
      learner->Load(fi.get());
-      learner->Configure(param.cfg);
+      learner->SetParams(param.cfg);
    } else {
-      learner->Configure(param.cfg);
+      learner->SetParams(param.cfg);
      learner->InitModel();
    }
  }
  LOG(INFO) << "Loading data: " << dmlc::GetTime() - tstart_data_load << " sec";
@ -275,7 +274,7 @@ void CLIDumpModel(const CLIParam& param) {
  std::unique_ptr<Learner> learner(Learner::Create({}));
  std::unique_ptr<dmlc::Stream> fi(
      dmlc::Stream::Create(param.model_in.c_str(), "r"));
-  learner->Configure(param.cfg);
+  learner->SetParams(param.cfg);
  learner->Load(fi.get());
  // dump data
  std::vector<std::string> dump = learner->DumpModel(
@ -316,7 +315,7 @@ void CLIPredict(const CLIParam& param) {
  std::unique_ptr<dmlc::Stream> fi(
      dmlc::Stream::Create(param.model_in.c_str(), "r"));
  learner->Load(fi.get());
-  learner->Configure(param.cfg);
+  learner->SetParams(param.cfg);
  LOG(INFO) << "start prediction...";
  HostDeviceVector<bst_float> preds;
--- a/src/common/hist_util.cu
+++ b/src/common/hist_util.cu
@ -402,7 +402,7 @@ struct GPUSketcher {
  void SketchBatch(const SparsePage &batch, const MetaInfo &info) {
    GPUDistribution dist =
-      GPUDistribution::Block(GPUSet::All(learner_param_.gpu_id, learner_param_.n_gpus,
+      GPUDistribution::Block(GPUSet::All(generic_param_.gpu_id, generic_param_.n_gpus,
                                         batch.Size()));
    // create device shards
@ -429,8 +429,8 @@ struct GPUSketcher {
    }
  }
-  GPUSketcher(const tree::TrainParam &param, const LearnerTrainParam &learner_param, int gpu_nrows)
+  GPUSketcher(const tree::TrainParam &param, const GenericParameter &generic_param, int gpu_nrows)
-    : param_(param), learner_param_(learner_param), gpu_batch_nrows_(gpu_nrows), row_stride_(0) {
+    : param_(param), generic_param_(generic_param), gpu_batch_nrows_(gpu_nrows), row_stride_(0) {
  }
  /* Builds the sketches on the GPU for the dmatrix and returns the row stride
@ -452,14 +452,14 @@ struct GPUSketcher {
 private:
  std::vector<std::unique_ptr<DeviceShard>> shards_;
  const tree::TrainParam &param_;
-  const LearnerTrainParam &learner_param_;
+  const GenericParameter &generic_param_;
  int gpu_batch_nrows_;
  size_t row_stride_;
  std::unique_ptr<SketchContainer> sketch_container_;
 };
 size_t DeviceSketch
-  (const tree::TrainParam &param, const LearnerTrainParam &learner_param, int gpu_batch_nrows,
+  (const tree::TrainParam &param, const GenericParameter &learner_param, int gpu_batch_nrows,
   DMatrix *dmat, HistogramCuts *hmat) {
  GPUSketcher sketcher(param, learner_param, gpu_batch_nrows);
  // We only need to return the result in HistogramCuts container, so it is safe to
--- a/src/common/hist_util.h
+++ b/src/common/hist_util.h
@ -291,7 +291,7 @@ class DenseCuts  : public CutsBuilder {
 *  \return The row stride across the entire dataset.
 */
 size_t DeviceSketch
-  (const tree::TrainParam& param, const LearnerTrainParam &learner_param, int gpu_batch_nrows,
+  (const tree::TrainParam& param, const GenericParameter &learner_param, int gpu_batch_nrows,
   DMatrix* dmat, HistogramCuts* hmat);
--- a/src/gbm/gblinear.cc
+++ b/src/gbm/gblinear.cc
@ -57,13 +57,13 @@ class GBLinear : public GradientBooster {
      cache_[d.get()] = std::move(e);
    }
  }
-  void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override {
+  void Configure(const Args& cfg) override {
    if (model_.weight.size() == 0) {
      model_.param.InitAllowUnknown(cfg);
    }
    param_.InitAllowUnknown(cfg);
    updater_.reset(LinearUpdater::Create(param_.updater, learner_param_));
-    updater_->Init(cfg);
+    updater_->Configure(cfg);
    monitor_.Init("GBLinear");
  }
  void Load(dmlc::Stream* fi) override {
--- a/src/gbm/gbm.cc
+++ b/src/gbm/gbm.cc
@ -13,7 +13,7 @@ DMLC_REGISTRY_ENABLE(::xgboost::GradientBoosterReg);
 namespace xgboost {
 GradientBooster* GradientBooster::Create(
    const std::string& name,
-    LearnerTrainParam const* learner_param,
+    GenericParameter const* learner_param,
    const std::vector<std::shared_ptr<DMatrix> >& cache_mats,
    bst_float base_margin) {
  auto *e = ::dmlc::Registry< ::xgboost::GradientBoosterReg>::Get()->Find(name);
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@ -32,12 +32,9 @@ namespace gbm {
 DMLC_REGISTRY_FILE_TAG(gbtree);
-void GBTree::Configure(const std::vector<std::pair<std::string, std::string> >& cfg) {
+void GBTree::Configure(const Args& cfg) {
  this->cfg_ = cfg;
  tparam_.InitAllowUnknown(cfg);
  std::string updater_seq = tparam_.updater_seq;
  ConfigureUpdaters({cfg.begin(), cfg.cend()});
  model_.Configure(cfg);
@ -46,15 +43,46 @@ void GBTree::Configure(const std::vector<std::pair<std::string, std::string> >&
    model_.InitTreesToUpdate();
  }
-  // configure predictor
+  // configure predictors
-  predictor_ = std::unique_ptr<Predictor>(
+  if (!cpu_predictor_) {
-      Predictor::Create(tparam_.predictor, this->learner_param_));
+    cpu_predictor_ = std::unique_ptr<Predictor>(
-  predictor_->Init(cfg, cache_);
+        Predictor::Create("cpu_predictor", this->learner_param_));
  }
 #if defined(XGBOOST_USE_CUDA)
  if (!gpu_predictor_) {
    gpu_predictor_ = std::unique_ptr<Predictor>(
        Predictor::Create("gpu_predictor", this->learner_param_));
  }
 #endif  // defined(XGBOOST_USE_CUDA)
  monitor_.Init("GBTree");
  configured_ = true;
 }
-void GBTree::PerformTreeMethodHeuristic(DMatrix* p_train,
+// FIXME(trivialfis): This handles updaters and predictor.  Because the choice of updaters
-                                        std::map<std::string, std::string> cfg) {
+// depends on whether external memory is used and how large is dataset.  We can remove the
 // dependency on DMatrix once `hist` tree method can handle external memory so that we can
 // make it default.
 void GBTree::ConfigureWithKnownData(std::map<std::string, std::string> const& cfg, DMatrix* fmat) {
  std::string updater_seq = tparam_.updater_seq;
  tparam_.InitAllowUnknown(cfg);
  this->PerformTreeMethodHeuristic({this->cfg_.begin(), this->cfg_.end()}, fmat);
  this->ConfigureUpdaters({this->cfg_.begin(), this->cfg_.end()});
  LOG(DEBUG) << "Using updaters: " << tparam_.updater_seq;
  // initialize the updaters only when needed.
  if (updater_seq != tparam_.updater_seq) {
    this->updaters_.clear();
  }
  this->InitUpdater();
  cpu_predictor_->Configure({cfg.cbegin(), cfg.cend()}, cache_);
 #if defined(XGBOOST_USE_CUDA)
  gpu_predictor_->Configure({cfg.cbegin(), cfg.cend()}, cache_);
 #endif  // defined(XGBOOST_USE_CUDA)
 }
 void GBTree::PerformTreeMethodHeuristic(std::map<std::string, std::string> const& cfg,
                                        DMatrix* fmat) {
  if (cfg.find("updater") != cfg.cend()) {
    // This method is disabled when `updater` parameter is explicitly
    // set, since only experts are expected to do so.
@ -71,11 +99,11 @@ void GBTree::PerformTreeMethodHeuristic(DMatrix* p_train,
      "Tree method is automatically selected to be 'approx' "
      "for distributed training.";
    tparam_.tree_method = TreeMethod::kApprox;
-  } else if (!p_train->SingleColBlock()) {
+  } else if (!fmat->SingleColBlock()) {
    LOG(WARNING) << "Tree method is automatically set to 'approx' "
                    "since external-memory data matrix is used.";
    tparam_.tree_method = TreeMethod::kApprox;
-  } else if (p_train->Info().num_row_ >= (4UL << 20UL)) {
+  } else if (fmat->Info().num_row_ >= (4UL << 20UL)) {
    /* Choose tree_method='approx' automatically for large data matrix */
    LOG(WARNING) << "Tree method is automatically selected to be "
        "'approx' for faster speed. To use old behavior "
@ -91,7 +119,7 @@ void GBTree::PerformTreeMethodHeuristic(DMatrix* p_train,
 void GBTree::ConfigureUpdaters(const std::map<std::string, std::string>& cfg) {
  // `updater` parameter was manually specified
-  if (cfg.find("updater")  != cfg.cend()) {
+  if (cfg.find("updater") != cfg.cend()) {
    LOG(WARNING) << "DANGER AHEAD: You have manually specified `updater` "
        "parameter. The `tree_method` parameter will be ignored. "
        "Incorrect sequence of updaters will produce undefined "
@ -141,17 +169,9 @@ void GBTree::ConfigureUpdaters(const std::map<std::string, std::string>& cfg) {
 void GBTree::DoBoost(DMatrix* p_fmat,
                     HostDeviceVector<GradientPair>* in_gpair,
                     ObjFunction* obj) {
  std::string updater_seq = tparam_.updater_seq;
  this->PerformTreeMethodHeuristic(p_fmat, {this->cfg_.begin(), this->cfg_.end()});
  this->ConfigureUpdaters({this->cfg_.begin(), this->cfg_.end()});
  LOG(DEBUG) << "Using updaters: " << tparam_.updater_seq;
  // initialize the updaters only when needed.
  if (updater_seq != tparam_.updater_seq) {
    this->updaters_.clear();
  }
  std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
  const int ngroup = model_.param.num_output_group;
  ConfigureWithKnownData({this->cfg_.cbegin(), this->cfg_.cend()}, p_fmat);
  monitor_.Start("BoostNewTrees");
  if (ngroup == 1) {
    std::vector<std::unique_ptr<RegTree> > ret;
@ -189,7 +209,7 @@ void GBTree::InitUpdater() {
  std::vector<std::string> ups = common::Split(tval, ',');
  for (const std::string& pstr : ups) {
    std::unique_ptr<TreeUpdater> up(TreeUpdater::Create(pstr.c_str(), learner_param_));
-    up->Init(this->cfg_);
+    up->Configure(this->cfg_);
    updaters_.push_back(std::move(up));
  }
 }
@ -198,7 +218,6 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
                           DMatrix *p_fmat,
                           int bst_group,
                           std::vector<std::unique_ptr<RegTree> >* ret) {
  this->InitUpdater();
  std::vector<RegTree*> new_trees;
  ret->clear();
  // create the trees
@ -230,7 +249,8 @@ void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& ne
    num_new_trees += new_trees[gid].size();
    model_.CommitModel(std::move(new_trees[gid]), gid);
  }
-  predictor_->UpdatePredictionCache(model_, &updaters_, num_new_trees);
+  CHECK(configured_);
  GetPredictor()->UpdatePredictionCache(model_, &updaters_, num_new_trees);
 }
@ -239,7 +259,7 @@ class Dart : public GBTree {
 public:
  explicit Dart(bst_float base_margin) : GBTree(base_margin) {}
-  void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override {
+  void Configure(const Args& cfg) override {
    GBTree::Configure(cfg);
    if (model_.trees.size() == 0) {
      dparam_.InitAllowUnknown(cfg);
--- a/src/gbm/gbtree.h
+++ b/src/gbm/gbtree.h
@ -154,13 +154,15 @@ class GBTree : public GradientBooster {
 #endif  // XGBOOST_USE_CUDA
  }
-  void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override;
+  void Configure(const Args& cfg) override;
  // Revise `tree_method` and `updater` parameters after seeing the training
  // data matrix
-  void PerformTreeMethodHeuristic(DMatrix* p_train,
+  void PerformTreeMethodHeuristic(std::map<std::string, std::string> const& cfg,
-                                  std::map<std::string, std::string> cfg);
+                                  DMatrix* fmat);
  /*! \brief Map `tree_method` parameter to `updater` parameter */
  void ConfigureUpdaters(const std::map<std::string, std::string>& cfg);
  void ConfigureWithKnownData(std::map<std::string, std::string> const& cfg, DMatrix* fmat);
  /*! \brief Carry out one iteration of boosting */
  void DoBoost(DMatrix* p_fmat,
               HostDeviceVector<GradientPair>* in_gpair,
@ -178,7 +180,7 @@ class GBTree : public GradientBooster {
    this->cfg_.clear();
    this->cfg_.emplace_back(std::string("num_feature"),
-                                       common::ToString(model_.param.num_feature));
+                            common::ToString(model_.param.num_feature));
  }
  GBTreeTrainParam const& GetTrainParam() const {
@ -195,37 +197,42 @@ class GBTree : public GradientBooster {
  }
  void PredictBatch(DMatrix* p_fmat,
-               HostDeviceVector<bst_float>* out_preds,
+                    HostDeviceVector<bst_float>* out_preds,
-               unsigned ntree_limit) override {
+                    unsigned ntree_limit) override {
-    predictor_->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
+    CHECK(configured_);
    GetPredictor()->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
  }
  void PredictInstance(const SparsePage::Inst& inst,
               std::vector<bst_float>* out_preds,
               unsigned ntree_limit,
               unsigned root_index) override {
-    predictor_->PredictInstance(inst, out_preds, model_,
+    CHECK(configured_);
-                               ntree_limit, root_index);
+    cpu_predictor_->PredictInstance(inst, out_preds, model_,
                                    ntree_limit, root_index);
  }
  void PredictLeaf(DMatrix* p_fmat,
                   std::vector<bst_float>* out_preds,
                   unsigned ntree_limit) override {
-    predictor_->PredictLeaf(p_fmat, out_preds, model_, ntree_limit);
+    CHECK(configured_);
    cpu_predictor_->PredictLeaf(p_fmat, out_preds, model_, ntree_limit);
  }
  void PredictContribution(DMatrix* p_fmat,
                           std::vector<bst_float>* out_contribs,
                           unsigned ntree_limit, bool approximate, int condition,
                           unsigned condition_feature) override {
-    predictor_->PredictContribution(p_fmat, out_contribs, model_, ntree_limit, approximate);
+    CHECK(configured_);
    cpu_predictor_->PredictContribution(p_fmat, out_contribs, model_, ntree_limit, approximate);
  }
  void PredictInteractionContributions(DMatrix* p_fmat,
                                       std::vector<bst_float>* out_contribs,
                                       unsigned ntree_limit, bool approximate) override {
-    predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_,
+    CHECK(configured_);
-                                               ntree_limit, approximate);
+    cpu_predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_,
                                                    ntree_limit, approximate);
  }
  std::vector<std::string> DumpModel(const FeatureMap& fmap,
@ -244,6 +251,25 @@ class GBTree : public GradientBooster {
                     int bst_group,
                     std::vector<std::unique_ptr<RegTree> >* ret);
  std::unique_ptr<Predictor> const& GetPredictor() const {
    CHECK(configured_);
    if (tparam_.predictor == "cpu_predictor") {
      CHECK(cpu_predictor_);
      return cpu_predictor_;
    } else if (tparam_.predictor == "gpu_predictor") {
 #if defined(XGBOOST_USE_CUDA)
      CHECK(gpu_predictor_);
      return gpu_predictor_;
 #else
      LOG(FATAL) << "XGBoost is not compiled with CUDA support.";
      return cpu_predictor_;
 #endif  // defined(XGBOOST_USE_CUDA)
    } else {
      LOG(FATAL) << "Unknown predictor: " << tparam_.predictor;
      return cpu_predictor_;
    }
  }
  // commit new trees all at once
  virtual void CommitModel(
      std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees);
@ -253,13 +279,17 @@ class GBTree : public GradientBooster {
  // training parameter
  GBTreeTrainParam tparam_;
  // ----training fields----
  bool configured_ {false};
  // configurations for tree
-  std::vector<std::pair<std::string, std::string> > cfg_;
+  Args cfg_;
  // the updaters that can be applied to each of tree
  std::vector<std::unique_ptr<TreeUpdater>> updaters_;
  // Cached matrices
  std::vector<std::shared_ptr<DMatrix>> cache_;
-  std::unique_ptr<Predictor> predictor_;
+  std::unique_ptr<Predictor> cpu_predictor_;
 #if defined(XGBOOST_USE_CUDA)
  std::unique_ptr<Predictor> gpu_predictor_;
 #endif  // defined(XGBOOST_USE_CUDA)
  common::Monitor monitor_;
 };
--- a/src/gbm/gbtree_model.h
+++ b/src/gbm/gbtree_model.h
@ -63,7 +63,7 @@ struct GBTreeModelParam : public dmlc::Parameter<GBTreeModelParam> {
 struct GBTreeModel {
  explicit GBTreeModel(bst_float base_margin) : base_margin(base_margin) {}
-  void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) {
+  void Configure(const Args& cfg) {
    // initialize model parameters if not yet been initialized.
    if (trees.size() == 0) {
      param.InitAllowUnknown(cfg);
--- a/src/learner.cc
+++ b/src/learner.cc
@ -6,6 +6,7 @@
 */
 #include <dmlc/io.h>
 #include <dmlc/timer.h>
 #include <dmlc/any.h>
 #include <xgboost/feature_map.h>
 #include <xgboost/learner.h>
 #include <xgboost/logging.h>
@ -50,18 +51,21 @@ inline std::string RenderParamVal(const std::string& str) {
 }  // anonymous namespace
 namespace xgboost {
 enum class DataSplitMode : int {
  kAuto = 0, kCol = 1, kRow = 2
 };
 }  // namespace xgboost
 DECLARE_FIELD_ENUM_CLASS(xgboost::DataSplitMode);
 namespace xgboost {
 // implementation of base learner.
 bool Learner::AllowLazyCheckPoint() const {
  return gbm_->AllowLazyCheckPoint();
 }
 std::vector<std::string> Learner::DumpModel(const FeatureMap& fmap,
                                            bool with_stats,
                                            std::string format) const {
  return gbm_->DumpModel(fmap, with_stats, format);
 }
 /*! \brief training parameter for regression */
 struct LearnerModelParam : public dmlc::Parameter<LearnerModelParam> {
  /* \brief global bias */
@ -97,9 +101,39 @@ struct LearnerModelParam : public dmlc::Parameter<LearnerModelParam> {
  }
 };
 struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
  // data split mode, can be row, col, or none.
  DataSplitMode dsplit;
  // flag to disable default metric
  int disable_default_eval_metric;
  std::string booster;
  std::string objective;
  // declare parameters
  DMLC_DECLARE_PARAMETER(LearnerTrainParam) {
    DMLC_DECLARE_FIELD(dsplit)
        .set_default(DataSplitMode::kAuto)
        .add_enum("auto", DataSplitMode::kAuto)
        .add_enum("col", DataSplitMode::kCol)
        .add_enum("row", DataSplitMode::kRow)
        .describe("Data split mode for distributed training.");
    DMLC_DECLARE_FIELD(disable_default_eval_metric)
        .set_default(0)
        .describe("flag to disable default metric. Set to >0 to disable");
    DMLC_DECLARE_FIELD(booster)
        .set_default("gbtree")
        .describe("Gradient booster used for training.");
    DMLC_DECLARE_FIELD(objective)
        .set_default("reg:squarederror")
        .describe("Objective function used for obtaining gradient.");
  }
 };
 DMLC_REGISTER_PARAMETER(LearnerModelParam);
 DMLC_REGISTER_PARAMETER(LearnerTrainParam);
 DMLC_REGISTER_PARAMETER(GenericParameter);
 /*!
 * \brief learner that performs gradient boosting for a specific objective
@ -108,56 +142,20 @@ DMLC_REGISTER_PARAMETER(LearnerTrainParam);
 class LearnerImpl : public Learner {
 public:
  explicit LearnerImpl(std::vector<std::shared_ptr<DMatrix> >  cache)
-      : cache_(std::move(cache)) {
+      : configured_{false}, cache_(std::move(cache)) {}
    // boosted tree
    name_obj_ = "reg:squarederror";
    name_gbm_ = "gbtree";
  }
  void ConfigureObjective() {
    if (cfg_.count("num_class") != 0) {
      cfg_["num_output_group"] = cfg_["num_class"];
      if (atoi(cfg_["num_class"].c_str()) > 1 && cfg_.count("objective") == 0) {
        cfg_["objective"] = "multi:softmax";
      }
    }
    if (cfg_.find("max_delta_step") == cfg_.cend() &&
        cfg_.find("objective") != cfg_.cend() &&
        cfg_["objective"] == "count:poisson") {
      cfg_["max_delta_step"] = kMaxDeltaStepDefaultValue;
    }
    if (cfg_.count("objective") == 0) {
      cfg_["objective"] = "reg:squarederror";
    }
  }
  // Configuration before data is known.
-  void Configure(
+  void Configure() override {
-      const std::vector<std::pair<std::string, std::string> >& args) override {
+    if (configured_) { return; }
    // add to configurations
    tparam_.InitAllowUnknown(args);
    ConsoleLogger::Configure(args.cbegin(), args.cend());
    monitor_.Init("Learner");
-    cfg_.clear();
+    monitor_.Start("Configure");
    auto old_tparam = tparam_;
    Args args = {cfg_.cbegin(), cfg_.cend()};
-    for (const auto& kv : args) {
+    tparam_.InitAllowUnknown(args);
-      if (kv.first == "eval_metric") {
+    generic_param_.InitAllowUnknown(args);
-        // check duplication
+    ConsoleLogger::Configure(args);
-        auto dup_check = [&kv](const std::unique_ptr<Metric>& m) {
+    if (generic_param_.nthread != 0) {
-          return m->Name() != kv.second;
+      omp_set_num_threads(generic_param_.nthread);
        };
        if (std::all_of(metrics_.begin(), metrics_.end(), dup_check)) {
          metrics_.emplace_back(Metric::Create(kv.second, &tparam_));
          mparam_.contain_eval_metrics = 1;
        }
      } else {
        cfg_[kv.first] = kv.second;
      }
    }
    if (tparam_.nthread != 0) {
      omp_set_num_threads(tparam_.nthread);
    }
    // add additional parameters
@ -166,54 +164,34 @@ class LearnerImpl : public Learner {
      tparam_.dsplit = DataSplitMode::kRow;
    }
-    ConfigureObjective();
+    mparam_.InitAllowUnknown(args);
-    name_gbm_ = tparam_.booster;
+    // set seed only before the model is initialized
    common::GlobalRandom().seed(generic_param_.seed);
    // must precede configure gbm since num_features is required for gbm
    this->ConfigureNumFeatures();
    args = {cfg_.cbegin(), cfg_.cend()};  // renew
    this->ConfigureObjective(old_tparam, &args);
    this->ConfigureGBM(old_tparam, args);
    this->ConfigureMetrics(args);
-    // FIXME(trivialfis): So which one should go first? Init or Configure?
+    this->configured_ = true;
-    if (!this->ModelInitialized()) {
+    monitor_.Stop("Configure");
      mparam_.InitAllowUnknown(args);
      name_obj_ = cfg_["objective"];
      // set seed only before the model is initialized
      common::GlobalRandom().seed(tparam_.seed);
    }
    // set number of features correctly.
    cfg_["num_feature"] = common::ToString(mparam_.num_feature);
    cfg_["num_class"] = common::ToString(mparam_.num_class);
    if (gbm_ != nullptr) {
      gbm_->Configure(cfg_.begin(), cfg_.end());
    }
    if (obj_ != nullptr) {
      obj_->Configure(cfg_.begin(), cfg_.end());
    }
    for (auto& p_metric : metrics_) {
      p_metric->Configure(cfg_.begin(), cfg_.end());
    }
  }
-  void InitModel() override { this->LazyInitModel(); }
+  void CheckDataSplitMode() {
-
+    if (rabit::IsDistributed()) {
-  // Configuration can only be done after data is known
+      CHECK(tparam_.dsplit != DataSplitMode::kAuto)
-  void ConfigurationWithKnownData(DMatrix* dmat) {
+        << "Precondition violated; dsplit cannot be 'auto' in distributed mode";
-    CHECK(ModelInitialized())
+      if (tparam_.dsplit == DataSplitMode::kCol) {
-        << " Internal Error: Always call InitModel or Load before any evaluation.";
+        // 'distcol' updater hidden until it becomes functional again
-    this->ValidateDMatrix(dmat);
+        // See discussion at https://github.com/dmlc/xgboost/issues/1832
-    CHECK(this->gbm_) << " Internal: GBM is not set";
+        LOG(FATAL) << "Column-wise data split is currently not supported.";
    if (this->gbm_->UseGPU()) {
      if (cfg_.find("n_gpus") == cfg_.cend()) {
        tparam_.n_gpus = 1;
      }
      if (tparam_.n_gpus != 1) {
        LOG(WARNING) << "Multi-GPU training is deprecated. "
                        "Please use distributed GPU training with one process per GPU.";
      }
    }
  }
  void Load(dmlc::Stream* fi) override {
-    tparam_ = LearnerTrainParam();
+    generic_param_.InitAllowUnknown(Args{});
    tparam_.Init(std::vector<std::pair<std::string, std::string>>{});
    // TODO(tqchen) mark deprecation of old format.
    common::PeekableInStream fp(fi);
@ -244,15 +222,15 @@ class LearnerImpl : public Learner {
        len = len >> static_cast<uint64_t>(32UL);
      }
      if (len != 0) {
-        name_obj_.resize(len);
+        tparam_.objective.resize(len);
-        CHECK_EQ(fi->Read(&name_obj_[0], len), len)
+        CHECK_EQ(fi->Read(&tparam_.objective[0], len), len)
            << "BoostLearner: wrong model format";
      }
    }
-    CHECK(fi->Read(&name_gbm_)) << "BoostLearner: wrong model format";
+    CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format";
    // duplicated code with LazyInitModel
-    obj_.reset(ObjFunction::Create(name_obj_, &tparam_));
+    obj_.reset(ObjFunction::Create(tparam_.objective, &generic_param_));
-    gbm_.reset(GradientBooster::Create(name_gbm_, &tparam_,
+    gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_param_,
                                       cache_, mparam_.base_score));
    gbm_->Load(fi);
    if (mparam_.contain_extra_attrs != 0) {
@ -287,17 +265,17 @@ class LearnerImpl : public Learner {
            kv.second = "cpu_predictor";
          }
 #endif  // XGBOOST_USE_CUDA
-          // NO visiable GPU on current environment
+          // NO visible GPU in current environment
          if (is_gpu_predictor && GPUSet::AllVisible().Size() == 0) {
            cfg_["predictor"] = "cpu_predictor";
            kv.second = "cpu_predictor";
            LOG(INFO) << "Switch gpu_predictor to cpu_predictor.";
          }
        }
      }
-      attributes_ =
+      attributes_ = std::map<std::string, std::string>(attr.begin(), attr.end());
          std::map<std::string, std::string>(attr.begin(), attr.end());
    }
-    if (name_obj_ == "count:poisson") {
+    if (tparam_.objective == "count:poisson") {
      std::string max_delta_step;
      fi->Read(&max_delta_step);
      cfg_["max_delta_step"] = max_delta_step;
@ -306,26 +284,41 @@ class LearnerImpl : public Learner {
      std::vector<std::string> metr;
      fi->Read(&metr);
      for (auto name : metr) {
-        metrics_.emplace_back(
+        metrics_.emplace_back(Metric::Create(name, &generic_param_));
            Metric::Create(name, &tparam_));
      }
    }
    cfg_["num_class"] = common::ToString(mparam_.num_class);
    cfg_["num_feature"] = common::ToString(mparam_.num_feature);
-    obj_->Configure(cfg_.begin(), cfg_.end());
+
    gbm_->Configure({cfg_.cbegin(), cfg_.cend()});
    obj_->Configure({cfg_.begin(), cfg_.end()});
    for (auto& p_metric : metrics_) {
-      p_metric->Configure(cfg_.begin(), cfg_.end());
+      p_metric->Configure({cfg_.begin(), cfg_.end()});
    }
    this->configured_ = true;
  }
  // rabit save model to rabit checkpoint
  void Save(dmlc::Stream* fo) const override {
    if (!this->configured_) {
      // Save empty model.  Calling Configure in a dummy LearnerImpl avoids violating
      // constness.
      LearnerImpl empty(std::move(this->cache_));
      empty.SetParams({this->cfg_.cbegin(), this->cfg_.cend()});
      for (auto const& kv : attributes_) {
        empty.SetAttr(kv.first, kv.second);
      }
      empty.Configure();
      empty.Save(fo);
      return;
    }
    LearnerModelParam mparam = mparam_;  // make a copy to potentially modify
    std::vector<std::pair<std::string, std::string> > extra_attr;
-      // extra attributed to be added just before saving
+    // extra attributed to be added just before saving
-
+    if (tparam_.objective == "count:poisson") {
    if (name_obj_ == "count:poisson") {
      auto it = cfg_.find("max_delta_step");
      if (it != cfg_.end()) {
        // write `max_delta_step` parameter as extra attribute of booster
@ -336,7 +329,7 @@ class LearnerImpl : public Learner {
    {
      // Write `predictor`, `n_gpus`, `gpu_id` parameters as extra attributes
      for (const auto& key : std::vector<std::string>{
-                                   "predictor", "n_gpus", "gpu_id"}) {
+          "predictor", "n_gpus", "gpu_id"}) {
        auto it = cfg_.find(key);
        if (it != cfg_.end()) {
          mparam.contain_extra_attrs = 1;
@ -345,8 +338,8 @@ class LearnerImpl : public Learner {
      }
    }
    fo->Write(&mparam, sizeof(LearnerModelParam));
-    fo->Write(name_obj_);
+    fo->Write(tparam_.objective);
-    fo->Write(name_gbm_);
+    fo->Write(tparam_.booster);
    gbm_->Save(fo);
    if (mparam.contain_extra_attrs != 0) {
      std::map<std::string, std::string> attr(attributes_);
@ -356,7 +349,7 @@ class LearnerImpl : public Learner {
      fo->Write(std::vector<std::pair<std::string, std::string>>(
                  attr.begin(), attr.end()));
    }
-    if (name_obj_ == "count:poisson") {
+    if (tparam_.objective == "count:poisson") {
      auto it = cfg_.find("max_delta_step");
      if (it != cfg_.end()) {
        fo->Write(it->second);
@ -377,27 +370,21 @@ class LearnerImpl : public Learner {
    }
  }
-  void CheckDataSplitMode() {
+  std::vector<std::string> DumpModel(const FeatureMap& fmap,
-    if (rabit::IsDistributed()) {
+                                     bool with_stats,
-      CHECK(tparam_.dsplit != DataSplitMode::kAuto)
+                                     std::string format) const override {
-        << "Precondition violated; dsplit cannot be 'auto' in distributed mode";
+    return gbm_->DumpModel(fmap, with_stats, format);
      if (tparam_.dsplit == DataSplitMode::kCol) {
        // 'distcol' updater hidden until it becomes functional again
        // See discussion at https://github.com/dmlc/xgboost/issues/1832
        LOG(FATAL) << "Column-wise data split is currently not supported.";
      }
    }
  }
  void UpdateOneIter(int iter, DMatrix* train) override {
    monitor_.Start("UpdateOneIter");
-    if (tparam_.seed_per_iteration || rabit::IsDistributed()) {
+    if (generic_param_.seed_per_iteration || rabit::IsDistributed()) {
-      common::GlobalRandom().seed(tparam_.seed * kRandSeedMagic + iter);
+      common::GlobalRandom().seed(generic_param_.seed * kRandSeedMagic + iter);
    }
-    // this->PerformTreeMethodHeuristic(train);
+    this->Configure();
    this->CheckDataSplitMode();
-    this->ConfigurationWithKnownData(train);
+    this->ValidateDMatrix(train);
    monitor_.Start("PredictRaw");
    this->PredictRaw(train, &preds_[train]);
@ -412,12 +399,12 @@ class LearnerImpl : public Learner {
  void BoostOneIter(int iter, DMatrix* train,
                    HostDeviceVector<GradientPair>* in_gpair) override {
    monitor_.Start("BoostOneIter");
-    if (tparam_.seed_per_iteration || rabit::IsDistributed()) {
+    if (generic_param_.seed_per_iteration || rabit::IsDistributed()) {
-      common::GlobalRandom().seed(tparam_.seed * kRandSeedMagic + iter);
+      common::GlobalRandom().seed(generic_param_.seed * kRandSeedMagic + iter);
    }
    this->Configure();
    this->CheckDataSplitMode();
-    // this->PerformTreeMethodHeuristic(train);
+    this->ValidateDMatrix(train);
    this->ConfigurationWithKnownData(train);
    gbm_->DoBoost(train, in_gpair);
    monitor_.Stop("BoostOneIter");
@ -426,16 +413,17 @@ class LearnerImpl : public Learner {
  std::string EvalOneIter(int iter, const std::vector<DMatrix*>& data_sets,
                          const std::vector<std::string>& data_names) override {
    monitor_.Start("EvalOneIter");
    this->Configure();
    std::ostringstream os;
    os << '[' << iter << ']' << std::setiosflags(std::ios::fixed);
    if (metrics_.size() == 0 && tparam_.disable_default_eval_metric <= 0) {
-      metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric(), &tparam_));
+      metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric(), &generic_param_));
-      metrics_.back()->Configure(cfg_.begin(), cfg_.end());
+      metrics_.back()->Configure({cfg_.begin(), cfg_.end()});
    }
    for (size_t i = 0; i < data_sets.size(); ++i) {
      DMatrix * dmat = data_sets[i];
-      this->ConfigurationWithKnownData(dmat);
+      this->ValidateDMatrix(dmat);
      this->PredictRaw(data_sets[i], &preds_[dmat]);
      obj_->EvalTransform(&preds_[dmat]);
      for (auto& ev : metrics_) {
@ -449,6 +437,25 @@ class LearnerImpl : public Learner {
    return os.str();
  }
  void SetParam(const std::string& key, const std::string& value) override {
    configured_ = false;
    if (key == kEvalMetric) {
      if (std::find(metric_names_.cbegin(), metric_names_.cend(),
                    value) == metric_names_.cend()) {
        metric_names_.emplace_back(value);
      }
    } else {
      cfg_[key] = value;
    }
  }
  // Short hand for setting multiple parameters
  void SetParams(std::vector<std::pair<std::string, std::string>> const& args) override {
    configured_ = false;
    for (auto const& kv : args) {
      this->SetParam(kv.first, kv.second);
    }
  }
  void SetAttr(const std::string& key, const std::string& value) override {
    attributes_[key] = value;
    mparam_.contain_extra_attrs = 1;
@ -463,34 +470,21 @@ class LearnerImpl : public Learner {
  bool DelAttr(const std::string& key) override {
    auto it = attributes_.find(key);
-    if (it == attributes_.end()) return false;
+    if (it == attributes_.end()) { return false; }
    attributes_.erase(it);
    return true;
  }
  std::vector<std::string> GetAttrNames() const override {
    std::vector<std::string> out;
-    out.reserve(attributes_.size());
+    for (auto const& kv : attributes_) {
-    for (auto& p : attributes_) {
+      out.emplace_back(kv.first);
      out.push_back(p.first);
    }
    return out;
  }
-  LearnerTrainParam const& GetLearnerTrainParameter() const override {
+  GenericParameter const& GetGenericParameter() const override {
-    return tparam_;
+    return generic_param_;
  }
  std::pair<std::string, bst_float> Evaluate(DMatrix* data,
                                             std::string metric) {
    if (metric == "auto") metric = obj_->DefaultEvalMetric();
    std::unique_ptr<Metric> ev(Metric::Create(metric.c_str(), &tparam_));
    this->ConfigurationWithKnownData(data);
    this->PredictRaw(data, &preds_[data]);
    obj_->EvalTransform(&preds_[data]);
    return std::make_pair(metric,
                          ev->Eval(preds_[data], data->Info(),
                                   tparam_.dsplit == DataSplitMode::kRow));
  }
  void Predict(DMatrix* data, bool output_margin,
@ -500,6 +494,7 @@ class LearnerImpl : public Learner {
    int multiple_predictions = static_cast<int>(pred_leaf) +
                               static_cast<int>(pred_interactions) +
                               static_cast<int>(pred_contribs);
    this->Configure();
    CHECK_LE(multiple_predictions, 1) << "Perform one kind of prediction at a time.";
    if (pred_contribs) {
      gbm_->PredictContribution(data, &out_preds->HostVector(), ntree_limit, approx_contribs);
@ -521,11 +516,76 @@ class LearnerImpl : public Learner {
  }
 protected:
  /*!
   * \brief get un-transformed prediction
   * \param data training data matrix
   * \param out_preds output vector that stores the prediction
   * \param ntree_limit limit number of trees used for boosted tree
   *   predictor, when it equals 0, this means we are using all the trees
   */
  void PredictRaw(DMatrix* data, HostDeviceVector<bst_float>* out_preds,
                  unsigned ntree_limit = 0) const {
    CHECK(gbm_ != nullptr)
        << "Predict must happen after Load or InitModel";
    this->ValidateDMatrix(data);
    gbm_->PredictBatch(data, out_preds, ntree_limit);
  }
  // return whether model is already initialized.
-  inline bool ModelInitialized() const { return gbm_ != nullptr; }
+  bool ModelInitialized() const { return configured_; }
-  // lazily initialize the model based on configuration if it haven't yet been initialized.
+
-  inline void LazyInitModel() {
+  void ConfigureObjective(LearnerTrainParam const& old, Args* p_args) {
-    if (this->ModelInitialized()) return;
+    if (cfg_.find("num_class") != cfg_.cend() && cfg_.at("num_class") != "0") {
      cfg_["num_output_group"] = cfg_["num_class"];
      if (atoi(cfg_["num_class"].c_str()) > 1 && cfg_.count("objective") == 0) {
        tparam_.objective = "multi:softmax";
      }
    }
    if (cfg_.find("max_delta_step") == cfg_.cend() &&
        cfg_.find("objective") != cfg_.cend() &&
        tparam_.objective == "count:poisson") {
      cfg_["max_delta_step"] = kMaxDeltaStepDefaultValue;
    }
    if (obj_ == nullptr || tparam_.objective != old.objective) {
      obj_.reset(ObjFunction::Create(tparam_.objective, &generic_param_));
    }
    // reset the base score
    mparam_.base_score = obj_->ProbToMargin(mparam_.base_score);
    auto& args = *p_args;
    args = {cfg_.cbegin(), cfg_.cend()};  // renew
    obj_->Configure(args);
  }
  void ConfigureMetrics(Args const& args) {
    for (auto const& name : metric_names_) {
      auto DupCheck = [&name](std::unique_ptr<Metric> const& m) {
                        return m->Name() != name;
                      };
      if (std::all_of(metrics_.begin(), metrics_.end(), DupCheck)) {
        metrics_.emplace_back(std::unique_ptr<Metric>(Metric::Create(name, &generic_param_)));
        mparam_.contain_eval_metrics = 1;
      }
    }
    for (auto& p_metric : metrics_) {
      p_metric->Configure(args);
    }
  }
  void ConfigureGBM(LearnerTrainParam const& old, Args const& args) {
    if (gbm_ == nullptr || old.booster != tparam_.booster) {
      gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_param_,
                                         cache_, mparam_.base_score));
    }
    gbm_->Configure(args);
    if (this->gbm_->UseGPU() && cfg_.find("n_gpus") == cfg_.cend()) {
      generic_param_.n_gpus = 1;
    }
  }
  // set number of features correctly.
  void ConfigureNumFeatures() {
    // estimate feature bound
    // TODO(hcho3): Change num_feature to 64-bit integer
    unsigned num_feature = 0;
@ -533,8 +593,8 @@ class LearnerImpl : public Learner {
      CHECK(matrix != nullptr);
      const uint64_t num_col = matrix->Info().num_col_;
      CHECK_LE(num_col, static_cast<uint64_t>(std::numeric_limits<unsigned>::max()))
-        << "Unfortunately, XGBoost does not support data matrices with "
+          << "Unfortunately, XGBoost does not support data matrices with "
-        << std::numeric_limits<unsigned>::max() << " features or greater";
+          << std::numeric_limits<unsigned>::max() << " features or greater";
      num_feature = std::max(num_feature, static_cast<unsigned>(num_col));
    }
    // run allreduce on num_feature to find the maximum value
@ -546,32 +606,10 @@ class LearnerImpl : public Learner {
        << "0 feature is supplied.  Are you using raw Booster interface?";
    // setup
    cfg_["num_feature"] = common::ToString(mparam_.num_feature);
-    CHECK(obj_ == nullptr && gbm_ == nullptr);
+    cfg_["num_class"] = common::ToString(mparam_.num_class);
    obj_.reset(ObjFunction::Create(name_obj_, &tparam_));
    obj_->Configure(cfg_.begin(), cfg_.end());
    // reset the base score
    mparam_.base_score = obj_->ProbToMargin(mparam_.base_score);
    gbm_.reset(GradientBooster::Create(name_gbm_, &tparam_,
                                       cache_, mparam_.base_score));
    gbm_->Configure(cfg_.begin(), cfg_.end());
  }
-  /*!
+  void ValidateDMatrix(DMatrix* p_fmat) const {
   * \brief get un-transformed prediction
   * \param data training data matrix
   * \param out_preds output vector that stores the prediction
   * \param ntree_limit limit number of trees used for boosted tree
   *   predictor, when it equals 0, this means we are using all the trees
   */
  void PredictRaw(DMatrix* data, HostDeviceVector<bst_float>* out_preds,
                  unsigned ntree_limit = 0) {
    CHECK(gbm_ != nullptr)
        << "Predict must happen after Load or InitModel";
    ConfigurationWithKnownData(data);
    gbm_->PredictBatch(data, out_preds, ntree_limit);
  }
  void ValidateDMatrix(DMatrix* p_fmat) {
    MetaInfo const& info = p_fmat->Info();
    auto const& weights = info.weights_.HostVector();
    if (info.group_ptr_.size() != 0 && weights.size() != 0) {
@ -586,28 +624,33 @@ class LearnerImpl : public Learner {
  // model parameter
  LearnerModelParam mparam_;
  LearnerTrainParam tparam_;
  // configurations
  std::map<std::string, std::string> cfg_;
-  // attributes
+  // FIXME(trivialfis): Legacy field used to store extra attributes into binary model.
  std::map<std::string, std::string> attributes_;
-  // name of gbm
+  std::vector<std::string> metric_names_;
-  std::string name_gbm_;
+  static std::string const kEvalMetric;  // NOLINT
  // name of objective function
  std::string name_obj_;
  // temporal storages for prediction
  std::map<DMatrix*, HostDeviceVector<bst_float>> preds_;
  // gradient pairs
  HostDeviceVector<GradientPair> gpair_;
  bool configured_;
 private:
  /*! \brief random number transformation seed. */
-  static const int kRandSeedMagic = 127;
+  static int32_t constexpr kRandSeedMagic = 127;
  // internal cached dmatrix
  std::vector<std::shared_ptr<DMatrix> > cache_;
  common::Monitor monitor_;
 };
 std::string const LearnerImpl::kEvalMetric {"eval_metric"};  // NOLINT
 constexpr int32_t LearnerImpl::kRandSeedMagic;
 Learner* Learner::Create(
    const std::vector<std::shared_ptr<DMatrix> >& cache_data) {
  return new LearnerImpl(cache_data);
--- a/src/linear/linear_updater.cc
+++ b/src/linear/linear_updater.cc
@ -11,7 +11,7 @@ DMLC_REGISTRY_ENABLE(::xgboost::LinearUpdaterReg);
 namespace xgboost {
-LinearUpdater* LinearUpdater::Create(const std::string& name, LearnerTrainParam const* lparam) {
+LinearUpdater* LinearUpdater::Create(const std::string& name, GenericParameter const* lparam) {
  auto *e = ::dmlc::Registry< ::xgboost::LinearUpdaterReg>::Get()->Find(name);
  if (e == nullptr) {
    LOG(FATAL) << "Unknown linear updater " << name;
--- a/src/linear/updater_coordinate.cc
+++ b/src/linear/updater_coordinate.cc
@ -24,8 +24,7 @@ DMLC_REGISTRY_FILE_TAG(updater_coordinate);
 class CoordinateUpdater : public LinearUpdater {
 public:
  // set training parameter
-  void Init(
+  void Configure(Args const& args) override {
      const std::vector<std::pair<std::string, std::string> > &args) override {
    const std::vector<std::pair<std::string, std::string> > rest {
      tparam_.InitAllowUnknown(args)
    };
--- a/src/linear/updater_gpu_coordinate.cu
+++ b/src/linear/updater_gpu_coordinate.cu
@ -157,8 +157,7 @@ class DeviceShard {
 class GPUCoordinateUpdater : public LinearUpdater {
 public:
  // set training parameter
-  void Init(
+  void Configure(Args const& args) override {
      const std::vector<std::pair<std::string, std::string>> &args) override {
    tparam_.InitAllowUnknown(args);
    selector_.reset(FeatureSelector::Create(tparam_.feature_selector));
    monitor_.Init("GPUCoordinateUpdater");
--- a/src/linear/updater_shotgun.cc
+++ b/src/linear/updater_shotgun.cc
@ -14,7 +14,7 @@ DMLC_REGISTRY_FILE_TAG(updater_shotgun);
 class ShotgunUpdater : public LinearUpdater {
 public:
  // set training parameter
-  void Init(const std::vector<std::pair<std::string, std::string> > &args) override {
+  void Configure(Args const& args) override {
    param_.InitAllowUnknown(args);
    if (param_.feature_selector != kCyclic &&
        param_.feature_selector != kShuffle) {
--- a/src/logging.cc
+++ b/src/logging.cc
@ -50,7 +50,7 @@ bool ConsoleLogger::ShouldLog(LogVerbosity verbosity) {
  return verbosity <= global_verbosity_ || verbosity == LV::kIgnore;
 }
-void ConsoleLogger::Configure(const std::map<std::string, std::string>& args) {
+void ConsoleLogger::Configure(Args const& args) {
  param_.InitAllowUnknown(args);
  // Deprecated, but when trying to display deprecation message some R
  // tests trying to catch stdout will fail.
--- a/src/metric/elementwise_metric.cu
+++ b/src/metric/elementwise_metric.cu
@ -104,7 +104,7 @@ class ElementWiseMetricsReduction {
 #endif  // XGBOOST_USE_CUDA
  PackedReduceResult Reduce(
-      const LearnerTrainParam &tparam,
+      const GenericParameter &tparam,
      GPUSet devices,
      const HostDeviceVector<bst_float>& weights,
      const HostDeviceVector<bst_float>& labels,
--- a/src/metric/metric.cc
+++ b/src/metric/metric.cc
@ -12,7 +12,7 @@ DMLC_REGISTRY_ENABLE(::xgboost::MetricReg);
 }
 namespace xgboost {
-Metric* Metric::Create(const std::string& name, LearnerTrainParam const* tparam) {
+Metric* Metric::Create(const std::string& name, GenericParameter const* tparam) {
  std::string buf = name;
  std::string prefix = name;
  const char* param;
--- a/src/metric/multiclass_metric.cu
+++ b/src/metric/multiclass_metric.cu
@ -126,7 +126,7 @@ class MultiClassMetricsReduction {
 #endif  // XGBOOST_USE_CUDA
  PackedReduceResult Reduce(
-      const LearnerTrainParam &tparam,
+      const GenericParameter &tparam,
      GPUSet devices,
      size_t n_class,
      const HostDeviceVector<bst_float>& weights,
--- a/src/objective/multiclass_obj.cu
+++ b/src/objective/multiclass_obj.cu
@ -50,7 +50,11 @@ class SoftmaxMultiClassObj : public ObjFunction {
                   HostDeviceVector<GradientPair>* out_gpair) override {
    CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty";
    CHECK(preds.Size() == (static_cast<size_t>(param_.num_class) * info.labels_.Size()))
-        << "SoftmaxMultiClassObj: label size and pred size does not match";
+        << "SoftmaxMultiClassObj: label size and pred size does not match.\n"
        << "label.Size() * num_class: "
        << info.labels_.Size() * static_cast<size_t>(param_.num_class) << "\n"
        << "num_class: " << param_.num_class << "\n"
        << "preds.Size(): " << preds.Size();
    const int nclass = param_.num_class;
    const auto ndata = static_cast<int64_t>(preds.Size() / nclass);
--- a/src/objective/objective.cc
+++ b/src/objective/objective.cc
@ -14,7 +14,7 @@ DMLC_REGISTRY_ENABLE(::xgboost::ObjFunctionReg);
 namespace xgboost {
 // implement factory functions
-ObjFunction* ObjFunction::Create(const std::string& name, LearnerTrainParam const* tparam) {
+ObjFunction* ObjFunction::Create(const std::string& name, GenericParameter const* tparam) {
  auto *e = ::dmlc::Registry< ::xgboost::ObjFunctionReg>::Get()->Find(name);
  if (e == nullptr) {
    for (const auto& entry : ::dmlc::Registry< ::xgboost::ObjFunctionReg>::List()) {
--- a/src/predictor/gpu_predictor.cu
+++ b/src/predictor/gpu_predictor.cu
@ -377,8 +377,7 @@ class GPUPredictor : public xgboost::Predictor {
  }
 public:
-  GPUPredictor()  // NOLINT
+  GPUPredictor() = default;
      : cpu_predictor_(Predictor::Create("cpu_predictor", learner_param_)) {}
  void PredictBatch(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
                    const gbm::GBTreeModel& model, int tree_begin,
@ -468,12 +467,15 @@ class GPUPredictor : public xgboost::Predictor {
                       std::vector<bst_float>* out_preds,
                       const gbm::GBTreeModel& model, unsigned ntree_limit,
                       unsigned root_index) override {
-    cpu_predictor_->PredictInstance(inst, out_preds, model, root_index);
+    LOG(FATAL) << "Internal error: " << __func__
               << " is not implemented in GPU Predictor.";
  }
  void PredictLeaf(DMatrix* p_fmat, std::vector<bst_float>* out_preds,
                   const gbm::GBTreeModel& model,
                   unsigned ntree_limit) override {
-    cpu_predictor_->PredictLeaf(p_fmat, out_preds, model, ntree_limit);
+    LOG(FATAL) << "Internal error: " << __func__
               << " is not implemented in GPU Predictor.";
  }
  void PredictContribution(DMatrix* p_fmat,
@ -481,9 +483,8 @@ class GPUPredictor : public xgboost::Predictor {
                           const gbm::GBTreeModel& model, unsigned ntree_limit,
                           bool approximate, int condition,
                           unsigned condition_feature) override {
-    cpu_predictor_->PredictContribution(p_fmat, out_contribs, model, ntree_limit,
+    LOG(FATAL) << "Internal error: " << __func__
-                                       approximate, condition,
+               << " is not implemented in GPU Predictor.";
                                       condition_feature);
  }
  void PredictInteractionContributions(DMatrix* p_fmat,
@ -491,14 +492,13 @@ class GPUPredictor : public xgboost::Predictor {
                                       const gbm::GBTreeModel& model,
                                       unsigned ntree_limit,
                                       bool approximate) override {
-    cpu_predictor_->PredictInteractionContributions(p_fmat, out_contribs, model,
+    LOG(FATAL) << "Internal error: " << __func__
-                                                   ntree_limit, approximate);
+               << " is not implemented in GPU Predictor.";
  }
-  void Init(const std::vector<std::pair<std::string, std::string>>& cfg,
+  void Configure(const std::vector<std::pair<std::string, std::string>>& cfg,
-            const std::vector<std::shared_ptr<DMatrix>>& cache) override {
+                 const std::vector<std::shared_ptr<DMatrix>>& cache) override {
-    Predictor::Init(cfg, cache);
+    Predictor::Configure(cfg, cache);
    cpu_predictor_->Init(cfg, cache);
    GPUSet devices = GPUSet::All(learner_param_->gpu_id, learner_param_->n_gpus);
    ConfigureShards(devices);
@ -517,7 +517,6 @@ class GPUPredictor : public xgboost::Predictor {
      });
  }
  std::unique_ptr<Predictor> cpu_predictor_;
  std::vector<DeviceShard> shards_;
  GPUSet devices_;
  common::Monitor monitor_;
--- a/src/predictor/predictor.cc
+++ b/src/predictor/predictor.cc
@ -8,14 +8,14 @@ namespace dmlc {
 DMLC_REGISTRY_ENABLE(::xgboost::PredictorReg);
 }  // namespace dmlc
 namespace xgboost {
-void Predictor::Init(
+void Predictor::Configure(
    const std::vector<std::pair<std::string, std::string>>& cfg,
    const std::vector<std::shared_ptr<DMatrix>>& cache) {
  for (const std::shared_ptr<DMatrix>& d : cache) {
    cache_[d.get()].data = d;
  }
 }
-Predictor* Predictor::Create(std::string const& name, LearnerTrainParam const* learner_param) {
+Predictor* Predictor::Create(std::string const& name, GenericParameter const* learner_param) {
  auto* e = ::dmlc::Registry<PredictorReg>::Get()->Find(name);
  if (e == nullptr) {
    LOG(FATAL) << "Unknown predictor type " << name;
--- a/src/tree/split_evaluator.cc
+++ b/src/tree/split_evaluator.cc
@ -43,8 +43,7 @@ SplitEvaluator* SplitEvaluator::Create(const std::string& name) {
 }
 // Default implementations of some virtual methods that aren't always needed
-void SplitEvaluator::Init(
+void SplitEvaluator::Init(const Args& args) {}
    const std::vector<std::pair<std::string, std::string> >& args) {}
 void SplitEvaluator::Reset() {}
 void SplitEvaluator::AddSplit(bst_uint nodeid,
                              bst_uint leftid,
@ -104,8 +103,7 @@ class ElasticNet final : public SplitEvaluator {
      LOG(FATAL) << "ElasticNet does not accept an inner SplitEvaluator";
    }
  }
-  void Init(
+  void Init(const Args& args) override {
      const std::vector<std::pair<std::string, std::string> >& args) override {
    params_.InitAllowUnknown(args);
  }
@ -210,7 +208,7 @@ class MonotonicConstraint final : public SplitEvaluator {
    inner_ = std::move(inner);
  }
-  void Init(const std::vector<std::pair<std::string, std::string> >& args)
+  void Init(const Args& args)
      override {
    inner_->Init(args);
    params_.InitAllowUnknown(args);
@ -369,7 +367,7 @@ class InteractionConstraint final : public SplitEvaluator {
    inner_ = std::move(inner);
  }
-  void Init(const std::vector<std::pair<std::string, std::string> >& args)
+  void Init(const Args& args)
      override {
    inner_->Init(args);
    params_.InitAllowUnknown(args);
--- a/src/tree/split_evaluator.h
+++ b/src/tree/split_evaluator.h
@ -32,8 +32,7 @@ class SplitEvaluator {
  virtual ~SplitEvaluator() = default;
  // Used to initialise any regularisation hyperparameters provided by the user
-  virtual void Init(
+  virtual void Init(const Args& args);
      const std::vector<std::pair<std::string, std::string> >& args);
  // Resets the SplitEvaluator to the state it was in after the Init was called
  virtual void Reset();
--- a/src/tree/tree_updater.cc
+++ b/src/tree/tree_updater.cc
@ -14,7 +14,7 @@ DMLC_REGISTRY_ENABLE(::xgboost::TreeUpdaterReg);
 namespace xgboost {
-TreeUpdater* TreeUpdater::Create(const std::string& name, LearnerTrainParam const* tparam) {
+TreeUpdater* TreeUpdater::Create(const std::string& name, GenericParameter const* tparam) {
  auto *e = ::dmlc::Registry< ::xgboost::TreeUpdaterReg>::Get()->Find(name);
  if (e == nullptr) {
    LOG(FATAL) << "Unknown tree updater " << name;
--- a/src/tree/updater_basemaker-inl.h
+++ b/src/tree/updater_basemaker-inl.h
@ -30,7 +30,7 @@ namespace tree {
 */
 class BaseMaker: public TreeUpdater {
 public:
-  void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
+  void Configure(const Args& args) override {
    param_.InitAllowUnknown(args);
  }
--- a/src/tree/updater_colmaker.cc
+++ b/src/tree/updater_colmaker.cc
@ -25,12 +25,16 @@ DMLC_REGISTRY_FILE_TAG(updater_colmaker);
 /*! \brief column-wise update to construct a tree */
 class ColMaker: public TreeUpdater {
 public:
-  void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
+  void Configure(const Args& args) override {
    param_.InitAllowUnknown(args);
    spliteval_.reset(SplitEvaluator::Create(param_.split_evaluator));
    spliteval_->Init(args);
  }
  char const* Name() const override {
    return "grow_colmaker";
  }
  void Update(HostDeviceVector<GradientPair> *gpair,
              DMatrix* dmat,
              const std::vector<RegTree*> &trees) override {
@ -768,13 +772,18 @@ class ColMaker: public TreeUpdater {
 // distributed column maker
 class DistColMaker : public ColMaker {
 public:
-  void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
+  void Configure(const Args& args) override {
    param_.InitAllowUnknown(args);
    pruner_.reset(TreeUpdater::Create("prune", tparam_));
-    pruner_->Init(args);
+    pruner_->Configure(args);
    spliteval_.reset(SplitEvaluator::Create(param_.split_evaluator));
    spliteval_->Init(args);
  }
  char const* Name() const override {
    return "distcol";
  }
  void Update(HostDeviceVector<GradientPair> *gpair,
              DMatrix* dmat,
              const std::vector<RegTree*> &trees) override {
--- a/src/tree/updater_gpu.cu
+++ b/src/tree/updater_gpu.cu
@ -581,7 +581,11 @@ class GPUMaker : public TreeUpdater {
  GPUMaker() : allocated_{false} {}
  ~GPUMaker() override = default;
-  void Init(const std::vector<std::pair<std::string, std::string>> &args) override {
+  char const* Name() const override {
    return "gpu_exact";
  }
  void Configure(const Args &args) override {
     param_.InitAllowUnknown(args);
     maxNodes_ = (1 << (param_.max_depth + 1)) - 1;
     maxLeaves_ = 1 << param_.max_depth;
--- a/src/tree/updater_gpu_common.cuh
+++ b/src/tree/updater_gpu_common.cuh
@ -1,5 +1,5 @@
 /*!
- * Copyright 2017 XGBoost contributors
+ * Copyright 2017-2019 XGBoost contributors
 */
 #pragma once
 #include <thrust/random.h>
--- a/src/tree/updater_gpu_hist.cu
+++ b/src/tree/updater_gpu_hist.cu
@ -1290,13 +1290,12 @@ template <typename GradientSumT>
 class GPUHistMakerSpecialised {
 public:
  GPUHistMakerSpecialised() : initialised_{false}, p_last_fmat_{nullptr} {}
-  void Init(const std::vector<std::pair<std::string, std::string>>& args,
+  void Configure(const Args& args, GenericParameter const* generic_param) {
            LearnerTrainParam const* lparam) {
    param_.InitAllowUnknown(args);
-    learner_param_ = lparam;
+    generic_param_ = generic_param;
    hist_maker_param_.InitAllowUnknown(args);
-    auto devices = GPUSet::All(learner_param_->gpu_id,
+    auto devices = GPUSet::All(generic_param_->gpu_id,
-                               learner_param_->n_gpus);
+                               generic_param_->n_gpus);
    n_devices_ = devices.Size();
    CHECK(n_devices_ != 0) << "Must have at least one device";
    dist_ = GPUDistribution::Block(devices);
@ -1362,7 +1361,7 @@ class GPUHistMakerSpecialised {
    monitor_.StartCuda("Quantiles");
    // Create the quantile sketches for the dmatrix and initialize HistogramCuts
-    size_t row_stride = common::DeviceSketch(param_, *learner_param_,
+    size_t row_stride = common::DeviceSketch(param_, *generic_param_,
                                             hist_maker_param_.gpu_batch_nrows,
                                             dmat, &hmat_);
    monitor_.StopCuda("Quantiles");
@ -1488,7 +1487,7 @@ class GPUHistMakerSpecialised {
  int n_bins_;
  GPUHistMakerTrainParam hist_maker_param_;
-  LearnerTrainParam const* learner_param_;
+  GenericParameter const* generic_param_;
  dh::AllReducer reducer_;
@ -1502,17 +1501,16 @@ class GPUHistMakerSpecialised {
 class GPUHistMaker : public TreeUpdater {
 public:
-  void Init(
+  void Configure(const Args& args) override {
      const std::vector<std::pair<std::string, std::string>>& args) override {
    hist_maker_param_.InitAllowUnknown(args);
    float_maker_.reset();
    double_maker_.reset();
    if (hist_maker_param_.single_precision_histogram) {
      float_maker_.reset(new GPUHistMakerSpecialised<GradientPair>());
-      float_maker_->Init(args, tparam_);
+      float_maker_->Configure(args, tparam_);
    } else {
      double_maker_.reset(new GPUHistMakerSpecialised<GradientPairPrecise>());
-      double_maker_->Init(args, tparam_);
+      double_maker_->Configure(args, tparam_);
    }
  }
@ -1534,6 +1532,10 @@ class GPUHistMaker : public TreeUpdater {
    }
  }
  char const* Name() const override {
    return "gpu_hist";
  }
 private:
  GPUHistMakerTrainParam hist_maker_param_;
  std::unique_ptr<GPUHistMakerSpecialised<GradientPair>> float_maker_;
--- a/src/tree/updater_histmaker.cc
+++ b/src/tree/updater_histmaker.cc
@ -33,6 +33,9 @@ class HistMaker: public BaseMaker {
    }
    param_.learning_rate = lr;
  }
  char const* Name() const override {
    return "grow_histmaker";
  }
 protected:
    /*! \brief a single histogram */
@ -83,7 +86,7 @@ class HistMaker: public BaseMaker {
    // per thread histset
    std::vector<HistSet> hset;
    // initialize the hist set
-    inline void Init(const TrainParam &param, int nthread) {
+    inline void Configure(const TrainParam &param, int nthread) {
      hset.resize(nthread);
      // cleanup statistics
      for (int tid = 0; tid < nthread; ++tid) {
@ -274,6 +277,9 @@ class HistMaker: public BaseMaker {
 class CQHistMaker: public HistMaker {
 public:
  CQHistMaker()  = default;
  char const* Name() const override {
    return "grow_local_histmaker";
  }
 protected:
  struct HistEntry {
@ -339,7 +345,7 @@ class CQHistMaker: public HistMaker {
      feat2workindex_[fset[i]] = static_cast<int>(i);
    }
    // start to work
-    this->wspace_.Init(this->param_, 1);
+    this->wspace_.Configure(this->param_, 1);
    // if it is C++11, use lazy evaluation for Allreduce,
    // to gain speedup in recovery
    auto lazy_get_hist = [&]() {
@ -637,6 +643,11 @@ class CQHistMaker: public HistMaker {
 // global proposal
 class GlobalProposalHistMaker: public CQHistMaker {
 public:
  char const* Name() const override {
    return "grow_global_histmaker";
  }
 protected:
  void ResetPosAndPropose(const std::vector<GradientPair> &gpair,
                          DMatrix *p_fmat,
@ -682,7 +693,7 @@ class GlobalProposalHistMaker: public CQHistMaker {
      this->feat2workindex_[fset[i]] = static_cast<int>(i);
    }
    // start to work
-    this->wspace_.Init(this->param_, 1);
+    this->wspace_.Configure(this->param_, 1);
    // to gain speedup in recovery
    {
      this->thread_hist_.resize(omp_get_max_threads());
--- a/src/tree/updater_prune.cc
+++ b/src/tree/updater_prune.cc
@ -24,10 +24,14 @@ class TreePruner: public TreeUpdater {
  TreePruner() {
    syncher_.reset(TreeUpdater::Create("sync", tparam_));
  }
  char const* Name() const override {
    return "prune";
  }
  // set training parameter
-  void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
+  void Configure(const Args& args) override {
    param_.InitAllowUnknown(args);
-    syncher_->Init(args);
+    syncher_->Configure(args);
  }
  // update the tree, do pruning
  void Update(HostDeviceVector<GradientPair> *gpair,
--- a/src/tree/updater_quantile_hist.cc
+++ b/src/tree/updater_quantile_hist.cc
@ -32,12 +32,12 @@ namespace tree {
 DMLC_REGISTRY_FILE_TAG(updater_quantile_hist);
-void QuantileHistMaker::Init(const std::vector<std::pair<std::string, std::string> >& args) {
+void QuantileHistMaker::Configure(const Args& args) {
  // initialize pruner
  if (!pruner_) {
    pruner_.reset(TreeUpdater::Create("prune", tparam_));
  }
-  pruner_->Init(args);
+  pruner_->Configure(args);
  param_.InitAllowUnknown(args);
  is_gmat_initialized_ = false;
--- a/src/tree/updater_quantile_hist.h
+++ b/src/tree/updater_quantile_hist.h
@ -46,7 +46,7 @@ using xgboost::common::Column;
 /*! \brief construct a tree using quantized feature values */
 class QuantileHistMaker: public TreeUpdater {
 public:
-  void Init(const std::vector<std::pair<std::string, std::string> >& args) override;
+  void Configure(const Args& args) override;
  void Update(HostDeviceVector<GradientPair>* gpair,
              DMatrix* dmat,
@ -55,6 +55,9 @@ class QuantileHistMaker: public TreeUpdater {
  bool UpdatePredictionCache(const DMatrix* data,
                             HostDeviceVector<bst_float>* out_preds) override;
  char const* Name() const override {
    return "grow_quantile_histmaker";
  }
 protected:
  // training parameter
--- a/src/tree/updater_refresh.cc
+++ b/src/tree/updater_refresh.cc
@ -21,9 +21,12 @@ DMLC_REGISTRY_FILE_TAG(updater_refresh);
 /*! \brief pruner that prunes a tree after growing finishs */
 class TreeRefresher: public TreeUpdater {
 public:
-  void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
+  void Configure(const Args& args) override {
    param_.InitAllowUnknown(args);
  }
  char const* Name() const override {
    return "refresh";
  }
  // update the tree, do pruning
  void Update(HostDeviceVector<GradientPair> *gpair,
              DMatrix *p_fmat,
--- a/src/tree/updater_skmaker.cc
+++ b/src/tree/updater_skmaker.cc
@ -22,6 +22,9 @@ DMLC_REGISTRY_FILE_TAG(updater_skmaker);
 class SketchMaker: public BaseMaker {
 public:
  char const* Name() const override {
    return "grow_skmaker";
  }
  void Update(HostDeviceVector<GradientPair> *gpair,
              DMatrix *p_fmat,
              const std::vector<RegTree*> &trees) override {
--- a/src/tree/updater_sync.cc
+++ b/src/tree/updater_sync.cc
@ -20,7 +20,11 @@ DMLC_REGISTRY_FILE_TAG(updater_sync);
 */
 class TreeSyncher: public TreeUpdater {
 public:
-  void Init(const std::vector<std::pair<std::string, std::string> >& args) override {}
+  void Configure(const Args& args) override {}
  char const* Name() const override {
    return "prune";
  }
  void Update(HostDeviceVector<GradientPair> *gpair,
              DMatrix* dmat,
--- a/tests/cpp/common/test_common.cu
+++ b/tests/cpp/common/test_common.cu
@ -46,14 +46,14 @@ TEST(GPUSet, Verbose) {
    args["verbosity"] = "3";  // LOG INFO
    testing::internal::CaptureStderr();
-    ConsoleLogger::Configure(args.cbegin(), args.cend());
+    ConsoleLogger::Configure({args.cbegin(), args.cend()});
    GPUSet::All(0, 1);
    std::string output = testing::internal::GetCapturedStderr();
    ASSERT_NE(output.find("GPU ID: 0"), std::string::npos);
    ASSERT_NE(output.find("GPUs: 1"), std::string::npos);
    args["verbosity"] = "1";  // restore
-    ConsoleLogger::Configure(args.cbegin(), args.cend());
+    ConsoleLogger::Configure({args.cbegin(), args.cend()});
  }
 }
--- a/tests/cpp/common/test_monitor.cc
+++ b/tests/cpp/common/test_monitor.cc
@ -14,8 +14,8 @@ TEST(Monitor, Logging) {
        monitor_.Stop("basic");
      };
-  std::map<std::string, std::string> args = {std::make_pair("verbosity", "3")};
+  Args args = {std::make_pair("verbosity", "3")};
-  ConsoleLogger::Configure(args.cbegin(), args.cend());
+  ConsoleLogger::Configure(args);
  testing::internal::CaptureStderr();
  run_monitor();
  std::string output = testing::internal::GetCapturedStderr();
@ -23,7 +23,7 @@ TEST(Monitor, Logging) {
  // Monitor only prints messages when set to DEBUG.
  args = {std::make_pair("verbosity", "2")};
-  ConsoleLogger::Configure(args.cbegin(), args.cend());
+  ConsoleLogger::Configure(args);
  testing::internal::CaptureStderr();
  run_monitor();
  output = testing::internal::GetCapturedStderr();
--- a/tests/cpp/gbm/test_gbtree.cc
+++ b/tests/cpp/gbm/test_gbtree.cc
@ -8,42 +8,47 @@ TEST(GBTree, SelectTreeMethod) {
  using Arg = std::pair<std::string, std::string>;
  size_t constexpr kRows = 10;
  size_t constexpr kCols = 10;
-  auto mat_ptr = CreateDMatrix(kRows, kCols, 0);
+  auto p_shared_ptr_dmat = CreateDMatrix(kRows, kCols, 0);
-  std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {*mat_ptr};
+  auto p_dmat {(*p_shared_ptr_dmat).get()};
-  LearnerTrainParam learner_param;
+  GenericParameter generic_param;
-  learner_param.InitAllowUnknown(std::vector<Arg>{Arg("n_gpus", "0")});
+  generic_param.InitAllowUnknown(std::vector<Arg>{Arg("n_gpus", "0")});
  std::unique_ptr<GradientBooster> p_gbm{
-    GradientBooster::Create("gbtree", &learner_param, {}, 0)};
+    GradientBooster::Create("gbtree", &generic_param, {}, 0)};
  auto& gbtree = dynamic_cast<gbm::GBTree&> (*p_gbm);
  // Test if `tree_method` can be set
  std::string n_feat = std::to_string(kCols);
-  gbtree.Configure({Arg{"tree_method", "approx"}, Arg{"num_feature", n_feat}});
+  std::map<std::string, std::string> args {Arg{"tree_method", "approx"}, Arg{"num_feature", n_feat}};
  gbtree.Configure({args.cbegin(), args.cend()});
  gbtree.ConfigureWithKnownData(args, p_dmat);
  auto const& tparam = gbtree.GetTrainParam();
  gbtree.ConfigureWithKnownData({Arg{"tree_method", "approx"}, Arg{"num_feature", n_feat}}, p_dmat);
  ASSERT_EQ(tparam.updater_seq, "grow_histmaker,prune");
-  gbtree.Configure({Arg("tree_method", "exact"), Arg("num_feature", n_feat)});
+  gbtree.ConfigureWithKnownData({Arg("tree_method", "exact"), Arg("num_feature", n_feat)}, p_dmat);
  ASSERT_EQ(tparam.updater_seq, "grow_colmaker,prune");
-  gbtree.Configure({Arg("tree_method", "hist"), Arg("num_feature", n_feat)});
+  gbtree.ConfigureWithKnownData({Arg("tree_method", "hist"), Arg("num_feature", n_feat)}, p_dmat);
  ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker");
  ASSERT_EQ(tparam.predictor, "cpu_predictor");
-  gbtree.Configure({Arg{"booster", "dart"}, Arg{"tree_method", "hist"},
+  gbtree.ConfigureWithKnownData({Arg{"booster", "dart"}, Arg{"tree_method", "hist"},
-                    Arg{"num_feature", n_feat}});
+                                 Arg{"num_feature", n_feat}}, p_dmat);
  ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker");
 #ifdef XGBOOST_USE_CUDA
-  learner_param.InitAllowUnknown(std::vector<Arg>{Arg{"n_gpus", "1"}});
+  generic_param.InitAllowUnknown(std::vector<Arg>{Arg{"n_gpus", "1"}});
-  gbtree.Configure({Arg("tree_method", "gpu_exact"),
+  gbtree.ConfigureWithKnownData({Arg("tree_method", "gpu_exact"),
-                    Arg("num_feature", n_feat)});
+                                 Arg("num_feature", n_feat)}, p_dmat);
  ASSERT_EQ(tparam.updater_seq, "grow_gpu,prune");
  ASSERT_EQ(tparam.predictor, "gpu_predictor");
-  gbtree.Configure({Arg("tree_method", "gpu_hist"), Arg("num_feature", n_feat)});
+  gbtree.ConfigureWithKnownData({Arg("tree_method", "gpu_hist"), Arg("num_feature", n_feat)},
                                p_dmat);
  ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
  ASSERT_EQ(tparam.predictor, "gpu_predictor");
-  gbtree.Configure({Arg{"booster", "dart"}, Arg{"tree_method", "gpu_hist"},
+  gbtree.ConfigureWithKnownData({Arg{"booster", "dart"}, Arg{"tree_method", "gpu_hist"},
-                    Arg{"num_feature", n_feat}});
+                                 Arg{"num_feature", n_feat}}, p_dmat);
  ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
 #endif
-  delete mat_ptr;
+  delete p_shared_ptr_dmat;
 }
 }  // namespace xgboost
--- a/tests/cpp/helpers.h
+++ b/tests/cpp/helpers.h
@ -189,8 +189,8 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrixWithRC(size_t n_rows, size_t n_c
 gbm::GBTreeModel CreateTestModel();
-inline LearnerTrainParam CreateEmptyGenericParam(int gpu_id, int n_gpus) {
+inline GenericParameter CreateEmptyGenericParam(int gpu_id, int n_gpus) {
-  xgboost::LearnerTrainParam tparam;
+  xgboost::GenericParameter tparam;
  std::vector<std::pair<std::string, std::string>> args {
    {"gpu_id", std::to_string(gpu_id)},
    {"n_gpus", std::to_string(n_gpus)}};
--- a/tests/cpp/linear/test_linear.cc
+++ b/tests/cpp/linear/test_linear.cc
@ -1,5 +1,5 @@
 /*!
- * Copyright 2018 by Contributors
+ * Copyright 2018-2019 by Contributors
 */
 #include <xgboost/linear_updater.h>
 #include "../helpers.h"
@ -11,7 +11,7 @@ TEST(Linear, shotgun) {
  {
    auto updater = std::unique_ptr<xgboost::LinearUpdater>(
        xgboost::LinearUpdater::Create("shotgun", &lparam));
-    updater->Init({{"eta", "1."}});
+    updater->Configure({{"eta", "1."}});
    xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
        (*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0));
    xgboost::gbm::GBLinearModel model;
@ -26,7 +26,7 @@ TEST(Linear, shotgun) {
  {
    auto updater = std::unique_ptr<xgboost::LinearUpdater>(
        xgboost::LinearUpdater::Create("shotgun", &lparam));
-    EXPECT_ANY_THROW(updater->Init({{"feature_selector", "random"}}));
+    EXPECT_ANY_THROW(updater->Configure({{"feature_selector", "random"}}));
  }
  delete mat;
 }
@ -36,7 +36,7 @@ TEST(Linear, coordinate) {
  auto lparam = xgboost::CreateEmptyGenericParam(0, 0);
  auto updater = std::unique_ptr<xgboost::LinearUpdater>(
      xgboost::LinearUpdater::Create("coord_descent", &lparam));
-  updater->Init({{"eta", "1."}});
+  updater->Configure({{"eta", "1."}});
  xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
      (*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0));
  xgboost::gbm::GBLinearModel model;
--- a/tests/cpp/linear/test_linear.cu
+++ b/tests/cpp/linear/test_linear.cu
@ -11,7 +11,7 @@ TEST(Linear, GPUCoordinate) {
  lparam.n_gpus = 1;
  auto updater = std::unique_ptr<xgboost::LinearUpdater>(
      xgboost::LinearUpdater::Create("gpu_coord_descent", &lparam));
-  updater->Init({{"eta", "1."}});
+  updater->Configure({{"eta", "1."}});
  xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
      (*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0));
  xgboost::gbm::GBLinearModel model;
@ -33,7 +33,7 @@ TEST(Linear, MGPU_GPUCoordinate) {
    lparam.n_gpus = -1;
    auto updater = std::unique_ptr<xgboost::LinearUpdater>(
        xgboost::LinearUpdater::Create("gpu_coord_descent", &lparam));
-    updater->Init({{"eta", "1."}});
+    updater->Configure({{"eta", "1."}});
    xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
        (*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0));
    xgboost::gbm::GBLinearModel model;
@ -52,7 +52,7 @@ TEST(Linear, MGPU_GPUCoordinate) {
    auto mat = xgboost::CreateDMatrix(10, 10, 0);
    auto updater = std::unique_ptr<xgboost::LinearUpdater>(
        xgboost::LinearUpdater::Create("gpu_coord_descent", &lparam));
-    updater->Init({{"eta", "1."}});
+    updater->Configure({{"eta", "1."}});
    xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
        (*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0));
    xgboost::gbm::GBLinearModel model;
--- a/tests/cpp/objective/test_hinge.cc
+++ b/tests/cpp/objective/test_hinge.cc
@ -6,7 +6,7 @@
 #include "../helpers.h"
 TEST(Objective, DeclareUnifiedTest(HingeObj)) {
-  xgboost::LearnerTrainParam tparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
+  xgboost::GenericParameter tparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
  xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("binary:hinge", &tparam);
  xgboost::bst_float eps = std::numeric_limits<xgboost::bst_float>::min();
--- a/tests/cpp/objective/test_multiclass_obj.cc
+++ b/tests/cpp/objective/test_multiclass_obj.cc
@ -7,7 +7,7 @@
 #include "../helpers.h"
 TEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassObjGPair)) {
-  xgboost::LearnerTrainParam lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
+  xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
  std::vector<std::pair<std::string, std::string>> args {{"num_class", "3"}};
  xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("multi:softmax", &lparam);
@ -47,7 +47,7 @@ TEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassBasic)) {
 }
 TEST(Objective, DeclareUnifiedTest(SoftprobMultiClassBasic)) {
-  xgboost::LearnerTrainParam lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
+  xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
  std::vector<std::pair<std::string, std::string>> args {
    std::pair<std::string, std::string>("num_class", "3")};
--- a/tests/cpp/objective/test_objective.cc
+++ b/tests/cpp/objective/test_objective.cc
@ -7,7 +7,7 @@
 TEST(Objective, UnknownFunction) {
  xgboost::ObjFunction* obj = nullptr;
-  xgboost::LearnerTrainParam tparam;
+  xgboost::GenericParameter tparam;
  std::vector<std::pair<std::string, std::string>> args;
  tparam.InitAllowUnknown(args);
--- a/tests/cpp/objective/test_ranking_obj.cc
+++ b/tests/cpp/objective/test_ranking_obj.cc
@ -4,7 +4,7 @@
 #include "../helpers.h"
 TEST(Objective, PairwiseRankingGPair) {
-  xgboost::LearnerTrainParam tparam;
+  xgboost::GenericParameter tparam;
  std::vector<std::pair<std::string, std::string>> args;
  tparam.InitAllowUnknown(args);
--- a/tests/cpp/objective/test_regression_obj.cc
+++ b/tests/cpp/objective/test_regression_obj.cc
@ -7,7 +7,7 @@
 #include "../helpers.h"
 TEST(Objective, DeclareUnifiedTest(LinearRegressionGPair)) {
-  xgboost::LearnerTrainParam tparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
+  xgboost::GenericParameter tparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
  std::vector<std::pair<std::string, std::string>> args;
  xgboost::ObjFunction * obj =
@ -32,7 +32,7 @@ TEST(Objective, DeclareUnifiedTest(LinearRegressionGPair)) {
 }
 TEST(Objective, DeclareUnifiedTest(SquaredLog)) {
-  xgboost::LearnerTrainParam tparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
+  xgboost::GenericParameter tparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
  std::vector<std::pair<std::string, std::string>> args;
  xgboost::ObjFunction * obj =
@ -56,7 +56,7 @@ TEST(Objective, DeclareUnifiedTest(SquaredLog)) {
 }
 TEST(Objective, DeclareUnifiedTest(LogisticRegressionGPair)) {
-  xgboost::LearnerTrainParam tparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
+  xgboost::GenericParameter tparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
  std::vector<std::pair<std::string, std::string>> args;
  xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("reg:logistic", &tparam);
@ -72,7 +72,7 @@ TEST(Objective, DeclareUnifiedTest(LogisticRegressionGPair)) {
 }
 TEST(Objective, DeclareUnifiedTest(LogisticRegressionBasic)) {
-  xgboost::LearnerTrainParam lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
+  xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
  std::vector<std::pair<std::string, std::string>> args;
  xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("reg:logistic", &lparam);
@ -102,7 +102,7 @@ TEST(Objective, DeclareUnifiedTest(LogisticRegressionBasic)) {
 }
 TEST(Objective, DeclareUnifiedTest(LogisticRawGPair)) {
-  xgboost::LearnerTrainParam lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
+  xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
  std::vector<std::pair<std::string, std::string>> args;
  xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("binary:logitraw", &lparam);
@ -118,7 +118,7 @@ TEST(Objective, DeclareUnifiedTest(LogisticRawGPair)) {
 }
 TEST(Objective, DeclareUnifiedTest(PoissonRegressionGPair)) {
-  xgboost::LearnerTrainParam lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
+  xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
  std::vector<std::pair<std::string, std::string>> args;
  xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("count:poisson", &lparam);
@ -140,7 +140,7 @@ TEST(Objective, DeclareUnifiedTest(PoissonRegressionGPair)) {
 }
 TEST(Objective, DeclareUnifiedTest(PoissonRegressionBasic)) {
-  xgboost::LearnerTrainParam lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
+  xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
  std::vector<std::pair<std::string, std::string>> args;
  xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("count:poisson", &lparam);
@ -168,7 +168,7 @@ TEST(Objective, DeclareUnifiedTest(PoissonRegressionBasic)) {
 }
 TEST(Objective, DeclareUnifiedTest(GammaRegressionGPair)) {
-  xgboost::LearnerTrainParam lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
+  xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
  std::vector<std::pair<std::string, std::string>> args;
  xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("reg:gamma", &lparam);
@ -189,7 +189,7 @@ TEST(Objective, DeclareUnifiedTest(GammaRegressionGPair)) {
 }
 TEST(Objective, DeclareUnifiedTest(GammaRegressionBasic)) {
-  xgboost::LearnerTrainParam lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
+  xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
  std::vector<std::pair<std::string, std::string>> args;
  xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("reg:gamma", &lparam);
@ -217,7 +217,7 @@ TEST(Objective, DeclareUnifiedTest(GammaRegressionBasic)) {
 }
 TEST(Objective, DeclareUnifiedTest(TweedieRegressionGPair)) {
-  xgboost::LearnerTrainParam lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
+  xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
  std::vector<std::pair<std::string, std::string>> args;
  xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("reg:tweedie", &lparam);
@ -241,7 +241,7 @@ TEST(Objective, DeclareUnifiedTest(TweedieRegressionGPair)) {
 #if defined(__CUDACC__)
 TEST(Objective, CPU_vs_CUDA) {
-  xgboost::LearnerTrainParam lparam = xgboost::CreateEmptyGenericParam(0, 1);
+  xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(0, 1);
  xgboost::ObjFunction * obj =
      xgboost::ObjFunction::Create("reg:squarederror", &lparam);
@ -294,7 +294,7 @@ TEST(Objective, CPU_vs_CUDA) {
 #endif
 TEST(Objective, DeclareUnifiedTest(TweedieRegressionBasic)) {
-  xgboost::LearnerTrainParam lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
+  xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
  std::vector<std::pair<std::string, std::string>> args;
  xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("reg:tweedie", &lparam);
@ -325,7 +325,7 @@ TEST(Objective, DeclareUnifiedTest(TweedieRegressionBasic)) {
 // CoxRegression not implemented in GPU code, no need for testing.
 #if !defined(__CUDACC__)
 TEST(Objective, CoxRegressionGPair) {
-  xgboost::LearnerTrainParam lparam = xgboost::CreateEmptyGenericParam(0, 0);
+  xgboost::GenericParameter lparam = xgboost::CreateEmptyGenericParam(0, 0);
  std::vector<std::pair<std::string, std::string>> args;
  xgboost::ObjFunction * obj =
      xgboost::ObjFunction::Create("survival:cox", &lparam);
--- a/tests/cpp/predictor/test_gpu_predictor.cu
+++ b/tests/cpp/predictor/test_gpu_predictor.cu
@ -2,10 +2,12 @@
 /*!
 * Copyright 2017-2019 XGBoost contributors
 */
 #include <dmlc/logging.h>
 #include <dmlc/filesystem.h>
 #include <xgboost/c_api.h>
 #include <xgboost/predictor.h>
 #include <xgboost/logging.h>
 #include <xgboost/learner.h>
 #include <string>
 #include "gtest/gtest.h"
 #include "../helpers.h"
@ -20,8 +22,14 @@ inline void CheckCAPICall(int ret) {
 }  // namespace anonymous
 #endif
-extern const std::map<std::string, std::string>&
+const std::map<std::string, std::string>&
-QueryBoosterConfigurationArguments(BoosterHandle handle);
+QueryBoosterConfigurationArguments(BoosterHandle handle) {
  CHECK_NE(handle, static_cast<void*>(nullptr));
  auto* bst = static_cast<xgboost::Learner*>(handle);
  bst->Configure();
  return bst->GetConfigurationArguments();
 }
 namespace xgboost {
 namespace predictor {
@ -35,8 +43,8 @@ TEST(gpu_predictor, Test) {
  std::unique_ptr<Predictor> cpu_predictor =
      std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &cpu_lparam));
-  gpu_predictor->Init({}, {});
+  gpu_predictor->Configure({}, {});
-  cpu_predictor->Init({}, {});
+  cpu_predictor->Configure({}, {});
  int n_row = 5;
  int n_col = 5;
@ -56,35 +64,6 @@ TEST(gpu_predictor, Test) {
  for (int i = 0; i < gpu_out_predictions.Size(); i++) {
    ASSERT_NEAR(gpu_out_predictions_h[i], cpu_out_predictions_h[i], abs_tolerance);
  }
  // Test predict instance
  const auto &batch = *(*dmat)->GetRowBatches().begin();
  for (int i = 0; i < batch.Size(); i++) {
    std::vector<float> gpu_instance_out_predictions;
    std::vector<float> cpu_instance_out_predictions;
    cpu_predictor->PredictInstance(batch[i], &cpu_instance_out_predictions,
                                   model);
    gpu_predictor->PredictInstance(batch[i], &gpu_instance_out_predictions,
                                   model);
    ASSERT_EQ(gpu_instance_out_predictions[0], cpu_instance_out_predictions[0]);
  }
  // Test predict leaf
  std::vector<float> gpu_leaf_out_predictions;
  std::vector<float> cpu_leaf_out_predictions;
  cpu_predictor->PredictLeaf((*dmat).get(), &cpu_leaf_out_predictions, model);
  gpu_predictor->PredictLeaf((*dmat).get(), &gpu_leaf_out_predictions, model);
  for (int i = 0; i < gpu_leaf_out_predictions.size(); i++) {
    ASSERT_EQ(gpu_leaf_out_predictions[i], cpu_leaf_out_predictions[i]);
  }
  // Test predict contribution
  std::vector<float> gpu_out_contribution;
  std::vector<float> cpu_out_contribution;
  cpu_predictor->PredictContribution((*dmat).get(), &cpu_out_contribution, model);
  gpu_predictor->PredictContribution((*dmat).get(), &gpu_out_contribution, model);
  for (int i = 0; i < gpu_out_contribution.size(); i++) {
    ASSERT_EQ(gpu_out_contribution[i], cpu_out_contribution[i]);
  }
  delete dmat;
 }
@ -93,7 +72,7 @@ TEST(gpu_predictor, ExternalMemoryTest) {
  auto lparam = CreateEmptyGenericParam(0, 1);
  std::unique_ptr<Predictor> gpu_predictor =
      std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam));
-  gpu_predictor->Init({}, {});
+  gpu_predictor->Configure({}, {});
  gbm::GBTreeModel model = CreateTestModel();
  int n_col = 3;
  model.param.num_feature = n_col;
@ -108,38 +87,6 @@ TEST(gpu_predictor, ExternalMemoryTest) {
  for (const auto& v : out_predictions.HostVector()) {
    ASSERT_EQ(v, 1.5);
  }
  // Test predict leaf
  std::vector<float> leaf_out_predictions;
  gpu_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
  EXPECT_EQ(leaf_out_predictions.size(), dmat->Info().num_row_);
  for (const auto& v : leaf_out_predictions) {
    ASSERT_EQ(v, 0);
  }
  // Test predict contribution
  std::vector<float> out_contribution;
  gpu_predictor->PredictContribution(dmat.get(), &out_contribution, model);
  EXPECT_EQ(out_contribution.size(), dmat->Info().num_row_ * (n_col + 1));
  for (int i = 0; i < out_contribution.size(); i++) {
    if (i % (n_col + 1) == n_col) {
      ASSERT_EQ(out_contribution[i], 1.5);
    } else {
      ASSERT_EQ(out_contribution[i], 0);
    }
  }
  // Test predict contribution (approximate method)
  std::vector<float> out_contribution_approximate;
  gpu_predictor->PredictContribution(dmat.get(), &out_contribution_approximate, model, true);
  EXPECT_EQ(out_contribution.size(), dmat->Info().num_row_ * (n_col + 1));
  for (int i = 0; i < out_contribution.size(); i++) {
    if (i % (n_col + 1) == n_col) {
      ASSERT_EQ(out_contribution[i], 1.5);
    } else {
      ASSERT_EQ(out_contribution[i], 0);
    }
  }
 }
 #if defined(XGBOOST_USE_NCCL)
@ -231,7 +178,7 @@ TEST(gpu_predictor, MGPU_Test) {
  std::unique_ptr<Predictor> cpu_predictor =
      std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &cpu_lparam));
-  cpu_predictor->Init({}, {});
+  cpu_predictor->Configure({}, {});
  for (size_t i = 1; i < 33; i *= 2) {
    int n_row = i, n_col = i;
@ -263,7 +210,7 @@ TEST(gpu_predictor, MGPU_ExternalMemoryTest) {
  std::unique_ptr<Predictor> gpu_predictor =
      std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &gpu_lparam));
-  gpu_predictor->Init({}, {});
+  gpu_predictor->Configure({}, {});
  gbm::GBTreeModel model = CreateTestModel();
  model.param.num_feature = 3;
--- a/tests/cpp/test_learner.cc
+++ b/tests/cpp/test_learner.cc
@ -14,7 +14,7 @@ TEST(Learner, Basic) {
  auto mat_ptr = CreateDMatrix(10, 10, 0);
  std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {*mat_ptr};
  auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
-  learner->Configure(args);
+  learner->SetParams(args);
  delete mat_ptr;
 }
@ -46,9 +46,7 @@ TEST(Learner, CheckGroup) {
  std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {p_mat};
  auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
-  learner->Configure({Arg{"objective", "rank:pairwise"}});
+  learner->SetParams({Arg{"objective", "rank:pairwise"}});
  learner->InitModel();
  EXPECT_NO_THROW(learner->UpdateOneIter(0, p_mat.get()));
  group.resize(kNumGroups+1);
@ -77,11 +75,34 @@ TEST(Learner, SLOW_CheckMultiBatch) {
  dmat->Info().SetInfo("label", labels.data(), DataType::kFloat32, num_row);
  std::vector<std::shared_ptr<DMatrix>> mat{dmat};
  auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
-  learner->Configure({Arg{"objective", "binary:logistic"}});
+  learner->SetParams({Arg{"objective", "binary:logistic"}, Arg{"verbosity", "3"}});
  learner->InitModel();
  learner->UpdateOneIter(0, dmat.get());
 }
 TEST(Learner, Configuration) {
  std::string const emetric = "eval_metric";
  {
    std::unique_ptr<Learner> learner { Learner::Create({nullptr}) };
    learner->SetParam(emetric, "auc");
    learner->SetParam(emetric, "rmsle");
    learner->SetParam("foo", "bar");
    // eval_metric is not part of configuration
    auto attr_names = learner->GetConfigurationArguments();
    ASSERT_EQ(attr_names.size(), 1);
    ASSERT_EQ(attr_names.find(emetric), attr_names.cend());
    ASSERT_EQ(attr_names.at("foo"), "bar");
  }
  {
    std::unique_ptr<Learner> learner { Learner::Create({nullptr}) };
    learner->SetParams({{"foo", "bar"}, {emetric, "auc"}, {emetric, "entropy"}, {emetric, "KL"}});
    auto attr_names = learner->GetConfigurationArguments();
    ASSERT_EQ(attr_names.size(), 1);
    ASSERT_EQ(attr_names.at("foo"), "bar");
  }
 }
 #if defined(XGBOOST_USE_CUDA)
 TEST(Learner, IO) {
@ -98,13 +119,12 @@ TEST(Learner, IO) {
  std::vector<std::shared_ptr<DMatrix>> mat {p_dmat};
  std::unique_ptr<Learner> learner {Learner::Create(mat)};
-  learner->Configure({Arg{"tree_method", "auto"},
+  learner->SetParams({Arg{"tree_method", "auto"},
                      Arg{"predictor", "gpu_predictor"},
                      Arg{"n_gpus", "-1"}});
  learner->InitModel();
  learner->UpdateOneIter(0, p_dmat.get());
-  ASSERT_EQ(learner->GetLearnerTrainParameter().gpu_id, 0);
+  ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
-  ASSERT_EQ(learner->GetLearnerTrainParameter().n_gpus, -1);
+  ASSERT_EQ(learner->GetGenericParameter().n_gpus, -1);
  dmlc::TemporaryDirectory tempdir;
  const std::string fname = tempdir.path + "/model.bst";
@ -117,8 +137,8 @@ TEST(Learner, IO) {
  std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r"));
  learner->Load(fi.get());
-  ASSERT_EQ(learner->GetLearnerTrainParameter().gpu_id, 0);
+  ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
-  ASSERT_EQ(learner->GetLearnerTrainParameter().n_gpus, 0);
+  ASSERT_EQ(learner->GetGenericParameter().n_gpus, 0);
  delete pp_dmat;
 }
@ -137,59 +157,53 @@ TEST(Learner, GPUConfiguration) {
  p_dmat->Info().labels_.HostVector() = labels;
  {
    std::unique_ptr<Learner> learner {Learner::Create(mat)};
-    learner->Configure({Arg{"booster", "gblinear"},
+    learner->SetParams({Arg{"booster", "gblinear"},
                        Arg{"updater", "gpu_coord_descent"}});
    learner->InitModel();
    learner->UpdateOneIter(0, p_dmat.get());
-    ASSERT_EQ(learner->GetLearnerTrainParameter().gpu_id, 0);
+    ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
-    ASSERT_EQ(learner->GetLearnerTrainParameter().n_gpus, 1);
+    ASSERT_EQ(learner->GetGenericParameter().n_gpus, 1);
  }
  {
    std::unique_ptr<Learner> learner {Learner::Create(mat)};
-    learner->Configure({Arg{"tree_method", "gpu_exact"}});
+    learner->SetParams({Arg{"tree_method", "gpu_exact"}});
    learner->InitModel();
    learner->UpdateOneIter(0, p_dmat.get());
-    ASSERT_EQ(learner->GetLearnerTrainParameter().gpu_id, 0);
+    ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
-    ASSERT_EQ(learner->GetLearnerTrainParameter().n_gpus, 1);
+    ASSERT_EQ(learner->GetGenericParameter().n_gpus, 1);
  }
  {
    std::unique_ptr<Learner> learner {Learner::Create(mat)};
-    learner->Configure({Arg{"tree_method", "gpu_hist"}});
+    learner->SetParams({Arg{"tree_method", "gpu_hist"}});
    learner->InitModel();
    learner->UpdateOneIter(0, p_dmat.get());
-    ASSERT_EQ(learner->GetLearnerTrainParameter().gpu_id, 0);
+    ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
-    ASSERT_EQ(learner->GetLearnerTrainParameter().n_gpus, 1);
+    ASSERT_EQ(learner->GetGenericParameter().n_gpus, 1);
  }
  {
    // with CPU algorithm
    std::unique_ptr<Learner> learner {Learner::Create(mat)};
-    learner->Configure({Arg{"tree_method", "hist"}});
+    learner->SetParams({Arg{"tree_method", "hist"}});
    learner->InitModel();
    learner->UpdateOneIter(0, p_dmat.get());
-    ASSERT_EQ(learner->GetLearnerTrainParameter().gpu_id, 0);
+    ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
-    ASSERT_EQ(learner->GetLearnerTrainParameter().n_gpus, 0);
+    ASSERT_EQ(learner->GetGenericParameter().n_gpus, 0);
  }
  {
    // with CPU algorithm, but `n_gpus` takes priority
    std::unique_ptr<Learner> learner {Learner::Create(mat)};
-    learner->Configure({Arg{"tree_method", "hist"},
+    learner->SetParams({Arg{"tree_method", "hist"},
                        Arg{"n_gpus", "1"}});
    learner->InitModel();
    learner->UpdateOneIter(0, p_dmat.get());
-    ASSERT_EQ(learner->GetLearnerTrainParameter().gpu_id, 0);
+    ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
-    ASSERT_EQ(learner->GetLearnerTrainParameter().n_gpus, 1);
+    ASSERT_EQ(learner->GetGenericParameter().n_gpus, 1);
  }
  {
    // With CPU algorithm but GPU Predictor, this is to simulate when
    // XGBoost is only used for prediction, so tree method is not
    // specified.
    std::unique_ptr<Learner> learner {Learner::Create(mat)};
-    learner->Configure({Arg{"tree_method", "hist"},
+    learner->SetParams({Arg{"tree_method", "hist"},
                        Arg{"predictor", "gpu_predictor"}});
    learner->InitModel();
    learner->UpdateOneIter(0, p_dmat.get());
-    ASSERT_EQ(learner->GetLearnerTrainParameter().gpu_id, 0);
+    ASSERT_EQ(learner->GetGenericParameter().gpu_id, 0);
-    ASSERT_EQ(learner->GetLearnerTrainParameter().n_gpus, 1);
+    ASSERT_EQ(learner->GetGenericParameter().n_gpus, 1);
  }
  delete pp_dmat;
--- a/tests/cpp/test_logging.cc
+++ b/tests/cpp/test_logging.cc
@ -10,14 +10,14 @@ TEST(Logging, Basic) {
  std::string output;
  args["verbosity"] = "0";  // silent
-  ConsoleLogger::Configure(args.cbegin(), args.cend());
+  ConsoleLogger::Configure({args.cbegin(), args.cend()});
  testing::internal::CaptureStderr();
  LOG(DEBUG) << "Test silent.";
  output = testing::internal::GetCapturedStderr();
  ASSERT_EQ(output.length(), 0);
  args["verbosity"] = "3";  // debug
-  ConsoleLogger::Configure(args.cbegin(), args.cend());
+  ConsoleLogger::Configure({args.cbegin(), args.cend()});
  testing::internal::CaptureStderr();
  LOG(WARNING) << "Test Log Warning.";
@ -35,14 +35,14 @@ TEST(Logging, Basic) {
  ASSERT_NE(output.find("DEBUG"), std::string::npos);
  args["verbosity"] = "1";  // warning
-  ConsoleLogger::Configure(args.cbegin(), args.cend());
+  ConsoleLogger::Configure({args.cbegin(), args.cend()});
  testing::internal::CaptureStderr();
  LOG(INFO) << "INFO should not be displayed when set to warning.";
  output = testing::internal::GetCapturedStderr();
  ASSERT_EQ(output.size(), 0);
  args["silent"] = "True";
-  ConsoleLogger::Configure(args.cbegin(), args.cend());
+  ConsoleLogger::Configure({args.cbegin(), args.cend()});
  testing::internal::CaptureStderr();
  LOG(INFO) << "Test silent parameter.";
  output = testing::internal::GetCapturedStderr();
@ -54,7 +54,7 @@ TEST(Logging, Basic) {
  ASSERT_NE(output.find("Test Log Console"), std::string::npos);
  args["verbosity"] = "1";  // restore
-  ConsoleLogger::Configure(args.cbegin(), args.cend());
+  ConsoleLogger::Configure({args.cbegin(), args.cend()});
 }
 }  // namespace xgboost
--- a/tests/cpp/test_main.cc
+++ b/tests/cpp/test_main.cc
@ -1,12 +1,13 @@
 // Copyright by Contributors
 #include <gtest/gtest.h>
 #include <xgboost/base.h>
 #include <xgboost/logging.h>
 #include <string>
 #include <vector>
 int main(int argc, char ** argv) {
-  std::vector<std::pair<std::string, std::string>> args {{"verbosity", "2"}};
+  xgboost::Args args {{"verbosity", "2"}};
-  xgboost::ConsoleLogger::Configure(args.begin(), args.end());
+  xgboost::ConsoleLogger::Configure(args);
  testing::InitGoogleTest(&argc, argv);
  testing::FLAGS_gtest_death_test_style = "threadsafe";
  return RUN_ALL_TESTS();
--- a/tests/cpp/tree/test_gpu_exact.cu
+++ b/tests/cpp/tree/test_gpu_exact.cu
@ -16,7 +16,7 @@ TEST(GPUExact, Update) {
  std::vector<Arg> args{{"max_depth", "1"}};
  auto* p_gpuexact_maker = TreeUpdater::Create("grow_gpu", &lparam);
-  p_gpuexact_maker->Init(args);
+  p_gpuexact_maker->Configure(args);
  size_t constexpr kNRows = 4;
  size_t constexpr kNCols = 8;
--- a/tests/cpp/tree/test_gpu_hist.cu
+++ b/tests/cpp/tree/test_gpu_hist.cu
@ -384,10 +384,11 @@ void TestHistogramIndexImpl(int n_gpus) {
    {"max_leaves", "0"}
  };
-  LearnerTrainParam learner_param(CreateEmptyGenericParam(0, n_gpus));
+  GenericParameter generic_param(CreateEmptyGenericParam(0, n_gpus));
-  hist_maker.Init(training_params, &learner_param);
+  hist_maker.Configure(training_params, &generic_param);
  hist_maker.InitDataOnce(hist_maker_dmat.get());
-  hist_maker_ext.Init(training_params, &learner_param);
+  hist_maker_ext.Configure(training_params, &generic_param);
  hist_maker_ext.InitDataOnce(hist_maker_ext_dmat.get());
  ASSERT_EQ(hist_maker.shards_.size(), hist_maker_ext.shards_.size());
--- a/tests/cpp/tree/test_prune.cc
+++ b/tests/cpp/tree/test_prune.cc
@ -37,7 +37,7 @@ TEST(Updater, Prune) {
  std::vector<RegTree*> trees {&tree};
  // prepare pruner
  std::unique_ptr<TreeUpdater> pruner(TreeUpdater::Create("prune", &lparam));
-  pruner->Init(cfg);
+  pruner->Configure(cfg);
  // loss_chg < min_split_loss;
  tree.ExpandNode(0, 0, 0, true, 0.0f, 0.3f, 0.4f, 0.0f, 0.0f);
--- a/tests/cpp/tree/test_quantile_hist.cc
+++ b/tests/cpp/tree/test_quantile_hist.cc
@ -236,7 +236,7 @@ class QuantileHistMock : public QuantileHistMaker {
  explicit QuantileHistMock(
      const std::vector<std::pair<std::string, std::string> >& args) :
      cfg_{args} {
-    QuantileHistMaker::Init(args);
+    QuantileHistMaker::Configure(args);
    builder_.reset(
        new BuilderMock(
            param_,
--- a/tests/cpp/tree/test_refresh.cc
+++ b/tests/cpp/tree/test_refresh.cc
@ -37,7 +37,7 @@ TEST(Updater, Refresh) {
  tree.Stat(cleft).base_weight = 1.2;
  tree.Stat(cright).base_weight = 1.3;
-  refresher->Init(cfg);
+  refresher->Configure(cfg);
  refresher->Update(&gpair, dmat->get(), trees);
  bst_float constexpr kEps = 1e-6;
--- a/tests/cpp/tree/test_split_evaluator.cc
+++ b/tests/cpp/tree/test_split_evaluator.cc
@ -32,7 +32,7 @@ TEST(SplitEvaluator, Interaction) {
    }
    std::vector<int32_t> solutions{4, 5};
    ASSERT_EQ(accepted_features.size(), solutions.size());
-    for (int32_t f = 0; f < accepted_features.size(); ++f) {
+    for (size_t f = 0; f < accepted_features.size(); ++f) {
      ASSERT_EQ(accepted_features[f], solutions[f]);
    }
  }
--- a/tests/python/test_basic_models.py
+++ b/tests/python/test_basic_models.py
@ -154,7 +154,7 @@ class TestModels(unittest.TestCase):
    def test_multi_eval_metric(self):
        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
-        param = {'max_depth': 2, 'eta': 0.2, 'verbosity': 0,
+        param = {'max_depth': 2, 'eta': 0.2, 'verbosity': 1,
                 'objective': 'binary:logistic'}
        param['eval_metric'] = ["auc", "logloss", 'error']
        evals_result = {}