Offload some configurations into GBM. (#4553)

This is part 1 of refactoring configuration. * Move tree heuristic configurations. * Split up declarations and definitions for GBTree. * Implement UseGPU in gbm.
2019-06-14 09:18:51 +08:00 · 2019-06-14 09:18:51 +08:00 · c5719cc457
commit c5719cc457
parent a2042b685a
12 changed files with 630 additions and 519 deletions
--- a/include/xgboost/gbm.h
+++ b/include/xgboost/gbm.h
@ -146,6 +146,10 @@ class GradientBooster {
  virtual std::vector<std::string> DumpModel(const FeatureMap& fmap,
                                             bool with_stats,
                                             std::string format) const = 0;
+  /*!
+   * \brief Whether the current booster use GPU.
+   */
+  virtual bool UseGPU() const = 0;
  /*!
   * \brief create a gradient booster from given name
   * \param name name of gradient booster
--- a/include/xgboost/generic_parameters.h
+++ b/include/xgboost/generic_parameters.h
@ -8,18 +8,15 @@
 #include <dmlc/parameter.h>
 #include <xgboost/enum_class_param.h>

+#include <string>
+
 namespace xgboost {
-enum class TreeMethod : int {
-  kAuto = 0, kApprox = 1, kExact = 2, kHist = 3,
-  kGPUExact = 4, kGPUHist = 5
-};

 enum class DataSplitMode : int {
  kAuto = 0, kCol = 1, kRow = 2
 };
 }  // namespace xgboost

-DECLARE_FIELD_ENUM_CLASS(xgboost::TreeMethod);
 DECLARE_FIELD_ENUM_CLASS(xgboost::DataSplitMode);

 namespace xgboost {
@ -30,8 +27,6 @@ struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
  bool seed_per_iteration;
  // data split mode, can be row, col, or none.
  DataSplitMode dsplit;
-  // tree construction method
-  TreeMethod tree_method;
  // number of threads to use if OpenMP is enabled
  // if equals 0, use system default
  int nthread;
@ -42,6 +37,8 @@ struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
  // number of devices to use, -1 implies using all available devices.
  int n_gpus;

+  std::string booster;
+
  // declare parameters
  DMLC_DECLARE_PARAMETER(LearnerTrainParam) {
    DMLC_DECLARE_FIELD(seed).set_default(0).describe(
@ -58,15 +55,6 @@ struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
        .add_enum("col", DataSplitMode::kCol)
        .add_enum("row", DataSplitMode::kRow)
        .describe("Data split mode for distributed training.");
-    DMLC_DECLARE_FIELD(tree_method)
-        .set_default(TreeMethod::kAuto)
-        .add_enum("auto", TreeMethod::kAuto)
-        .add_enum("approx", TreeMethod::kApprox)
-        .add_enum("exact", TreeMethod::kExact)
-        .add_enum("hist", TreeMethod::kHist)
-        .add_enum("gpu_exact", TreeMethod::kGPUExact)
-        .add_enum("gpu_hist", TreeMethod::kGPUHist)
-        .describe("Choice of tree construction method.");
    DMLC_DECLARE_FIELD(nthread).set_default(0).describe(
        "Number of threads to use.");
    DMLC_DECLARE_FIELD(disable_default_eval_metric)
@ -79,6 +67,9 @@ struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
        .set_default(0)
        .set_lower_bound(-1)
        .describe("Number of GPUs to use for multi-gpu algorithms.");
+    DMLC_DECLARE_FIELD(booster)
+        .set_default("gbtree")
+        .describe("Gradient booster used for training.");
  }
 };
 }  // namespace xgboost
--- a/src/common/common.cc
+++ b/src/common/common.cc
@ -37,6 +37,7 @@ GPUSet GPUSet::All(GpuIdType gpu_id, GpuIdType n_gpus, int32_t n_rows) {
  CHECK_GE(n_gpus, -1) << "n_gpus must be >= -1.";

  GpuIdType const n_devices_visible = AllVisible().Size();
+  CHECK_LE(n_gpus, n_devices_visible);
  if (n_devices_visible == 0 || n_gpus == 0 || n_rows == 0) {
    LOG(DEBUG) << "Runing on CPU.";
    return Empty();
--- a/src/common/host_device_vector.cu
+++ b/src/common/host_device_vector.cu
@ -360,7 +360,9 @@ struct HostDeviceVectorImpl {

  void Shard(const GPUDistribution& distribution) {
    if (distribution_ == distribution) { return; }
-    CHECK(distribution_.IsEmpty());
+    CHECK(distribution_.IsEmpty())
+        << "This: " << distribution_.Devices().Size() << ", "
+        << "Others: " << distribution.Devices().Size();
    distribution_ = distribution;
    InitShards();
  }
--- a/src/gbm/gblinear.cc
+++ b/src/gbm/gblinear.cc
@ -180,6 +180,14 @@ class GBLinear : public GradientBooster {
    return model_.DumpModel(fmap, with_stats, format);
  }

+  bool UseGPU() const override {
+    if (param_.updater == "gpu_coord_descent") {
+      return true;
+    } else {
+      return false;
+    }
+  }
+
 protected:
  void PredictBatchInternal(DMatrix *p_fmat,
               std::vector<bst_float> *out_preds) {
@ -257,6 +265,7 @@ class GBLinear : public GradientBooster {
    }
    preds[gid] = psum;
  }
+
  // biase margin score
  bst_float base_margin_;
  // model field
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@ -1,5 +1,5 @@
 /*!
- * Copyright 2014 by Contributors
+ * Copyright 2014-2019 by Contributors
 * \file gbtree.cc
 * \brief gradient boosted tree implementation.
 * \author Tianqi Chen
@ -11,309 +11,280 @@
 #include <xgboost/gbm.h>
 #include <xgboost/predictor.h>
 #include <xgboost/tree_updater.h>
+
 #include <vector>
 #include <memory>
 #include <utility>
 #include <string>
 #include <limits>
 #include <algorithm>
+
 #include "../common/common.h"
 #include "../common/host_device_vector.h"
 #include "../common/random.h"
+#include "gbtree.h"
 #include "gbtree_model.h"
 #include "../common/timer.h"

+
 namespace xgboost {
 namespace gbm {

 DMLC_REGISTRY_FILE_TAG(gbtree);

-// boosting process types
-enum TreeProcessType {
-  kDefault,
-  kUpdate
-};
+void GBTree::Configure(const std::vector<std::pair<std::string, std::string> >& cfg) {
+  this->cfg_ = cfg;
+  tparam_.InitAllowUnknown(cfg);
+  std::string updater_seq = tparam_.updater_seq;

-/*! \brief training parameters */
-struct GBTreeTrainParam : public dmlc::Parameter<GBTreeTrainParam> {
-  /*!
-   * \brief number of parallel trees constructed each iteration
-   *  use this option to support boosted random forest
-   */
-  int num_parallel_tree;
-  /*! \brief tree updater sequence */
-  std::string updater_seq;
-  /*! \brief type of boosting process to run */
-  int process_type;
-  std::string predictor;
-  // declare parameters
-  DMLC_DECLARE_PARAMETER(GBTreeTrainParam) {
-    DMLC_DECLARE_FIELD(num_parallel_tree)
-        .set_default(1)
-        .set_lower_bound(1)
-        .describe("Number of parallel trees constructed during each iteration."\
-                  " This option is used to support boosted random forest.");
-    DMLC_DECLARE_FIELD(updater_seq)
-        .set_default("grow_colmaker,prune")
-        .describe("Tree updater sequence.");
-    DMLC_DECLARE_FIELD(process_type)
-        .set_default(kDefault)
-        .add_enum("default", kDefault)
-        .add_enum("update", kUpdate)
-        .describe("Whether to run the normal boosting process that creates new trees,"\
-                  " or to update the trees in an existing model.");
-    // add alias
-    DMLC_DECLARE_ALIAS(updater_seq, updater);
-    DMLC_DECLARE_FIELD(predictor)
-      .set_default("cpu_predictor")
-      .describe("Predictor algorithm type");
-  }
-};
+  ConfigureUpdaters({cfg.begin(), cfg.cend()});

-/*! \brief training parameters */
-struct DartTrainParam : public dmlc::Parameter<DartTrainParam> {
-  /*! \brief type of sampling algorithm */
-  int sample_type;
-  /*! \brief type of normalization algorithm */
-  int normalize_type;
-  /*! \brief fraction of trees to drop during the dropout */
-  float rate_drop;
-  /*! \brief whether at least one tree should always be dropped during the dropout */
-  bool one_drop;
-  /*! \brief probability of skipping the dropout during an iteration */
-  float skip_drop;
-  /*! \brief learning step size for a time */
-  float learning_rate;
-  // declare parameters
-  DMLC_DECLARE_PARAMETER(DartTrainParam) {
-    DMLC_DECLARE_FIELD(sample_type)
-        .set_default(0)
-        .add_enum("uniform", 0)
-        .add_enum("weighted", 1)
-        .describe("Different types of sampling algorithm.");
-    DMLC_DECLARE_FIELD(normalize_type)
-        .set_default(0)
-        .add_enum("tree", 0)
-        .add_enum("forest", 1)
-        .describe("Different types of normalization algorithm.");
-    DMLC_DECLARE_FIELD(rate_drop)
-        .set_range(0.0f, 1.0f)
-        .set_default(0.0f)
-        .describe("Fraction of trees to drop during the dropout.");
-    DMLC_DECLARE_FIELD(one_drop)
-        .set_default(false)
-        .describe("Whether at least one tree should always be dropped during the dropout.");
-    DMLC_DECLARE_FIELD(skip_drop)
-        .set_range(0.0f, 1.0f)
-        .set_default(0.0f)
-        .describe("Probability of skipping the dropout during a boosting iteration.");
-    DMLC_DECLARE_FIELD(learning_rate)
-        .set_lower_bound(0.0f)
-        .set_default(0.3f)
-        .describe("Learning rate(step size) of update.");
-    DMLC_DECLARE_ALIAS(learning_rate, eta);
-  }
-};
+  model_.Configure(cfg);

-
-// cache entry
-struct CacheEntry {
-  std::shared_ptr<DMatrix> data;
-  std::vector<bst_float> predictions;
-};
-
-// gradient boosted trees
-class GBTree : public GradientBooster {
- public:
-  explicit GBTree(bst_float base_margin) : model_(base_margin) {}
-
-  void InitCache(const std::vector<std::shared_ptr<DMatrix> > &cache) {
-    cache_ = cache;
+  // for the 'update' process_type, move trees into trees_to_update
+  if (tparam_.process_type == TreeProcessType::kUpdate) {
+    model_.InitTreesToUpdate();
  }

-  void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override {
-    this->cfg_ = cfg;
-    model_.Configure(cfg);
-    // initialize the updaters only when needed.
-    std::string updater_seq = tparam_.updater_seq;
-    tparam_.InitAllowUnknown(cfg);
-    if (updater_seq != tparam_.updater_seq) updaters_.clear();
-    for (const auto& up : updaters_) {
-      up->Init(cfg);
+  // configure predictor
+  predictor_ = std::unique_ptr<Predictor>(
+      Predictor::Create(tparam_.predictor, this->learner_param_));
+  predictor_->Init(cfg, cache_);
+  monitor_.Init("GBTree");
+}
+
+void GBTree::PerformTreeMethodHeuristic(DMatrix* p_train,
+                                        std::map<std::string, std::string> cfg) {
+  if (cfg.find("updater") != cfg.cend()) {
+    // This method is disabled when `updater` parameter is explicitly
+    // set, since only experts are expected to do so.
+    return;
+  }
+
+  const TreeMethod current_tree_method = tparam_.tree_method;
+
+  if (rabit::IsDistributed()) {
+    switch (current_tree_method) {
+      case TreeMethod::kAuto:
+        LOG(WARNING) <<
+            "Tree method is automatically selected to be 'approx' "
+            "for distributed training.";
+        break;
+      case TreeMethod::kApprox:
+      case TreeMethod::kHist:
+        // things are okay, do nothing
+        break;
+      case TreeMethod::kExact:
+        LOG(WARNING) << "Tree method was set to be "
+                     << "exact"
+                     << "', but only 'approx' and 'hist' is available for distributed "
+            "training. The `tree_method` parameter is now being "
+            "changed to 'approx'";
+        break;
+      case TreeMethod::kGPUExact:
+        // FIXME(trivialfis): Remove this line once GPU Exact is removed.
+        LOG(FATAL) << "Distributed training is not available with GPU Exact algorithm.";
+        break;
+      case TreeMethod::kGPUHist:
+        break;
+      default:
+        LOG(FATAL) << "Unknown tree_method ("
+                   << static_cast<int>(current_tree_method) << ") detected";
    }
-    // for the 'update' process_type, move trees into trees_to_update
-    if (tparam_.process_type == kUpdate) {
-      model_.InitTreesToUpdate();
-    }
-
-    // configure predictor
-    predictor_ = std::unique_ptr<Predictor>(Predictor::Create(tparam_.predictor, learner_param_));
-    predictor_->Init(cfg, cache_);
-    monitor_.Init("GBTree");
-  }
-
-  void Load(dmlc::Stream* fi) override {
-    model_.Load(fi);
-
-    this->cfg_.clear();
-    this->cfg_.emplace_back(std::string("num_feature"),
-                                       common::ToString(model_.param.num_feature));
-  }
-
-  void Save(dmlc::Stream* fo) const override {
-    model_.Save(fo);
-  }
-
-  bool AllowLazyCheckPoint() const override {
-    return model_.param.num_output_group == 1 ||
-        tparam_.updater_seq.find("distcol") != std::string::npos;
-  }
-
-  void DoBoost(DMatrix* p_fmat,
-               HostDeviceVector<GradientPair>* in_gpair,
-               ObjFunction* obj) override {
-    std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
-    const int ngroup = model_.param.num_output_group;
-    monitor_.Start("BoostNewTrees");
-    if (ngroup == 1) {
-      std::vector<std::unique_ptr<RegTree> > ret;
-      BoostNewTrees(in_gpair, p_fmat, 0, &ret);
-      new_trees.push_back(std::move(ret));
+    if (current_tree_method != TreeMethod::kHist) {
+      LOG(WARNING) << "Tree method is automatically selected to be 'approx'"
+          " for distributed training.";
+      tparam_.tree_method = TreeMethod::kApprox;
    } else {
-      CHECK_EQ(in_gpair->Size() % ngroup, 0U)
-          << "must have exactly ngroup*nrow gpairs";
-      // TODO(canonizer): perform this on GPU if HostDeviceVector has device set.
-      HostDeviceVector<GradientPair> tmp
+      LOG(WARNING) << "Tree method is specified to be 'hist'"
+          " for distributed training.";
+      tparam_.tree_method = TreeMethod::kHist;
+    }
+  } else if (!p_train->SingleColBlock()) {
+    /* Some tree methods are not available for external-memory DMatrix */
+    switch (current_tree_method) {
+      case TreeMethod::kAuto:
+        LOG(WARNING) << "Tree method is automatically set to 'approx' "
+            "since external-memory data matrix is used.";
+        break;
+      case TreeMethod::kApprox:
+        // things are okay, do nothing
+        break;
+      case TreeMethod::kExact:
+        LOG(WARNING) << "Tree method was set to be 'exact', "
+            "but currently we are only able to proceed with "
+            "approximate algorithm ('approx') because external-"
+            "memory data matrix is used.";
+        break;
+      case TreeMethod::kHist:
+        // things are okay, do nothing
+        break;
+      case TreeMethod::kGPUExact:
+      case TreeMethod::kGPUHist:
+        LOG(FATAL)
+            << "External-memory data matrix is not available with GPU algorithms";
+        break;
+      default:
+        LOG(FATAL) << "Unknown tree_method ("
+                   << static_cast<int>(current_tree_method) << ") detected";
+    }
+    tparam_.tree_method = TreeMethod::kApprox;
+  } else if (p_train->Info().num_row_ >= (4UL << 20UL)
+             && current_tree_method == TreeMethod::kAuto) {
+    /* Choose tree_method='approx' automatically for large data matrix */
+    LOG(WARNING) << "Tree method is automatically selected to be "
+        "'approx' for faster speed. To use old behavior "
+        "(exact greedy algorithm on single machine), "
+        "set tree_method to 'exact'.";
+    tparam_.tree_method = TreeMethod::kApprox;
+  }
+  LOG(DEBUG) << "Using predictor: " << tparam_.predictor;
+}
+
+void GBTree::ConfigureUpdaters(const std::map<std::string, std::string>& cfg) {
+  // `updater` parameter was manually specified
+  if (cfg.find("updater")  != cfg.cend()) {
+    LOG(WARNING) << "DANGER AHEAD: You have manually specified `updater` "
+        "parameter. The `tree_method` parameter will be ignored. "
+        "Incorrect sequence of updaters will produce undefined "
+        "behavior. For common uses, we recommend using "
+        "`tree_method` parameter instead.";
+    return;
+  }
+
+  /* Choose updaters according to tree_method parameters */
+  switch (tparam_.tree_method) {
+    case TreeMethod::kAuto:
+      // Use heuristic to choose between 'exact' and 'approx'
+      // This choice is deferred to PerformTreeMethodHeuristic().
+      break;
+    case TreeMethod::kApprox:
+      tparam_.updater_seq = "grow_histmaker,prune";
+      break;
+    case TreeMethod::kExact:
+      tparam_.updater_seq = "grow_colmaker,prune";
+      break;
+    case TreeMethod::kHist:
+      LOG(INFO) <<
+          "Tree method is selected to be 'hist', which uses a "
+          "single updater grow_quantile_histmaker.";
+      tparam_.updater_seq = "grow_quantile_histmaker";
+      break;
+    case TreeMethod::kGPUExact:
+      this->AssertGPUSupport();
+      tparam_.updater_seq = "grow_gpu,prune";
+      if (cfg.find("predictor") == cfg.cend()) {
+        tparam_.predictor = "gpu_predictor";
+      }
+      break;
+    case TreeMethod::kGPUHist:
+      this->AssertGPUSupport();
+      tparam_.updater_seq = "grow_gpu_hist";
+      if (cfg.find("predictor") == cfg.cend()) {
+        tparam_.predictor = "gpu_predictor";
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unknown tree_method ("
+                 << static_cast<int>(tparam_.tree_method) << ") detected";
+  }
+}
+
+void GBTree::DoBoost(DMatrix* p_fmat,
+                     HostDeviceVector<GradientPair>* in_gpair,
+                     ObjFunction* obj) {
+  std::string updater_seq = tparam_.updater_seq;
+  this->PerformTreeMethodHeuristic(p_fmat, {this->cfg_.begin(), this->cfg_.end()});
+  this->ConfigureUpdaters({this->cfg_.begin(), this->cfg_.end()});
+  LOG(DEBUG) << "Using updaters: " << tparam_.updater_seq;
+  // initialize the updaters only when needed.
+  if (updater_seq != tparam_.updater_seq) {
+    this->updaters_.clear();
+  }
+
+  std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
+  const int ngroup = model_.param.num_output_group;
+  monitor_.Start("BoostNewTrees");
+  if (ngroup == 1) {
+    std::vector<std::unique_ptr<RegTree> > ret;
+    BoostNewTrees(in_gpair, p_fmat, 0, &ret);
+    new_trees.push_back(std::move(ret));
+  } else {
+    CHECK_EQ(in_gpair->Size() % ngroup, 0U)
+        << "must have exactly ngroup*nrow gpairs";
+    // TODO(canonizer): perform this on GPU if HostDeviceVector has device set.
+    HostDeviceVector<GradientPair> tmp
        (in_gpair->Size() / ngroup, GradientPair(),
         GPUDistribution::Block(in_gpair->Distribution().Devices()));
-      const auto& gpair_h = in_gpair->ConstHostVector();
-      auto nsize = static_cast<bst_omp_uint>(tmp.Size());
-      for (int gid = 0; gid < ngroup; ++gid) {
-        std::vector<GradientPair>& tmp_h = tmp.HostVector();
-        #pragma omp parallel for schedule(static)
-        for (bst_omp_uint i = 0; i < nsize; ++i) {
-          tmp_h[i] = gpair_h[i * ngroup + gid];
-        }
-        std::vector<std::unique_ptr<RegTree> > ret;
-        BoostNewTrees(&tmp, p_fmat, gid, &ret);
-        new_trees.push_back(std::move(ret));
+    const auto& gpair_h = in_gpair->ConstHostVector();
+    auto nsize = static_cast<bst_omp_uint>(tmp.Size());
+    for (int gid = 0; gid < ngroup; ++gid) {
+      std::vector<GradientPair>& tmp_h = tmp.HostVector();
+#pragma omp parallel for schedule(static)
+      for (bst_omp_uint i = 0; i < nsize; ++i) {
+        tmp_h[i] = gpair_h[i * ngroup + gid];
      }
-    }
-    monitor_.Stop("BoostNewTrees");
-    monitor_.Start("CommitModel");
-    this->CommitModel(std::move(new_trees));
-    monitor_.Stop("CommitModel");
-  }
-
-  void PredictBatch(DMatrix* p_fmat,
-               HostDeviceVector<bst_float>* out_preds,
-               unsigned ntree_limit) override {
-    predictor_->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
-  }
-
-  void PredictInstance(const SparsePage::Inst& inst,
-               std::vector<bst_float>* out_preds,
-               unsigned ntree_limit,
-               unsigned root_index) override {
-    predictor_->PredictInstance(inst, out_preds, model_,
-                               ntree_limit, root_index);
-  }
-
-  void PredictLeaf(DMatrix* p_fmat,
-                   std::vector<bst_float>* out_preds,
-                   unsigned ntree_limit) override {
-    predictor_->PredictLeaf(p_fmat, out_preds, model_, ntree_limit);
-  }
-
-  void PredictContribution(DMatrix* p_fmat,
-                           std::vector<bst_float>* out_contribs,
-                           unsigned ntree_limit, bool approximate, int condition,
-                           unsigned condition_feature) override {
-    predictor_->PredictContribution(p_fmat, out_contribs, model_, ntree_limit, approximate);
-  }
-
-  void PredictInteractionContributions(DMatrix* p_fmat,
-                                       std::vector<bst_float>* out_contribs,
-                                       unsigned ntree_limit, bool approximate) override {
-    predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_,
-                                               ntree_limit, approximate);
-  }
-
-  std::vector<std::string> DumpModel(const FeatureMap& fmap,
-                                     bool with_stats,
-                                     std::string format) const override {
-    return model_.DumpModel(fmap, with_stats, format);
-  }
-
- protected:
-  // initialize updater before using them
-  inline void InitUpdater() {
-    if (updaters_.size() != 0) return;
-    std::string tval = tparam_.updater_seq;
-    std::vector<std::string> ups = common::Split(tval, ',');
-    for (const std::string& pstr : ups) {
-      std::unique_ptr<TreeUpdater> up(TreeUpdater::Create(pstr.c_str(), learner_param_));
-      up->Init(this->cfg_);
-      updaters_.push_back(std::move(up));
+      std::vector<std::unique_ptr<RegTree> > ret;
+      BoostNewTrees(&tmp, p_fmat, gid, &ret);
+      new_trees.push_back(std::move(ret));
    }
  }
-
-  // do group specific group
-  inline void BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
-                            DMatrix *p_fmat,
-                            int bst_group,
-                            std::vector<std::unique_ptr<RegTree> >* ret) {
-    this->InitUpdater();
-    std::vector<RegTree*> new_trees;
-    ret->clear();
-    // create the trees
-    for (int i = 0; i < tparam_.num_parallel_tree; ++i) {
-      if (tparam_.process_type == kDefault) {
-        // create new tree
-        std::unique_ptr<RegTree> ptr(new RegTree());
-        ptr->param.InitAllowUnknown(this->cfg_);
-        new_trees.push_back(ptr.get());
-        ret->push_back(std::move(ptr));
-      } else if (tparam_.process_type == kUpdate) {
-        CHECK_LT(model_.trees.size(), model_.trees_to_update.size());
-        // move an existing tree from trees_to_update
-        auto t = std::move(model_.trees_to_update[model_.trees.size() +
-                           bst_group * tparam_.num_parallel_tree + i]);
-        new_trees.push_back(t.get());
-        ret->push_back(std::move(t));
-      }
-    }
-    // update the trees
-    for (auto& up : updaters_) {
-      up->Update(gpair, p_fmat, new_trees);
+  monitor_.Stop("BoostNewTrees");
+  monitor_.Start("CommitModel");
+  this->CommitModel(std::move(new_trees));
+  monitor_.Stop("CommitModel");
 }
-  }

-  // commit new trees all at once
-  virtual void
-  CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) {
-    int num_new_trees = 0;
-    for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
-      num_new_trees += new_trees[gid].size();
-      model_.CommitModel(std::move(new_trees[gid]), gid);
+void GBTree::InitUpdater() {
+  if (updaters_.size() != 0) return;
+  std::string tval = tparam_.updater_seq;
+  std::vector<std::string> ups = common::Split(tval, ',');
+  for (const std::string& pstr : ups) {
+    std::unique_ptr<TreeUpdater> up(TreeUpdater::Create(pstr.c_str(), learner_param_));
+    up->Init(this->cfg_);
+    updaters_.push_back(std::move(up));
+  }
+}
+
+void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
+                           DMatrix *p_fmat,
+                           int bst_group,
+                           std::vector<std::unique_ptr<RegTree> >* ret) {
+  this->InitUpdater();
+  std::vector<RegTree*> new_trees;
+  ret->clear();
+  // create the trees
+  for (int i = 0; i < tparam_.num_parallel_tree; ++i) {
+    if (tparam_.process_type == TreeProcessType::kDefault) {
+      // create new tree
+      std::unique_ptr<RegTree> ptr(new RegTree());
+      ptr->param.InitAllowUnknown(this->cfg_);
+      new_trees.push_back(ptr.get());
+      ret->push_back(std::move(ptr));
+    } else if (tparam_.process_type == TreeProcessType::kUpdate) {
+      CHECK_LT(model_.trees.size(), model_.trees_to_update.size());
+      // move an existing tree from trees_to_update
+      auto t = std::move(model_.trees_to_update[model_.trees.size() +
+                                                bst_group * tparam_.num_parallel_tree + i]);
+      new_trees.push_back(t.get());
+      ret->push_back(std::move(t));
    }
-    predictor_->UpdatePredictionCache(model_, &updaters_, num_new_trees);
  }
+  // update the trees
+  for (auto& up : updaters_) {
+    up->Update(gpair, p_fmat, new_trees);
+  }
+}
+
+void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) {
+  int num_new_trees = 0;
+  for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
+    num_new_trees += new_trees[gid].size();
+    model_.CommitModel(std::move(new_trees[gid]), gid);
+  }
+  predictor_->UpdatePredictionCache(model_, &updaters_, num_new_trees);
+}

-  // --- data structure ---
-  GBTreeModel model_;
-  // training parameter
-  GBTreeTrainParam tparam_;
-  // ----training fields----
-  // configurations for tree
-  std::vector<std::pair<std::string, std::string> > cfg_;
-  // the updaters that can be applied to each of tree
-  std::vector<std::unique_ptr<TreeUpdater>> updaters_;
-  // Cached matrices
-  std::vector<std::shared_ptr<DMatrix>> cache_;
-  std::unique_ptr<Predictor> predictor_;
-  common::Monitor monitor_;
-};

 // dart
 class Dart : public GBTree {
@ -372,6 +343,10 @@ class Dart : public GBTree {
    }
  }

+  bool UseGPU() const override {
+    return false;
+  }
+
 protected:
  friend class GBTree;
  // internal prediction loop
--- a/src/gbm/gbtree.h
+++ b/src/gbm/gbtree.h
@ -0,0 +1,269 @@
+/*!
+ * Copyright 2014-2019 by Contributors
+ * \file gbtree.cc
+ * \brief gradient boosted tree implementation.
+ * \author Tianqi Chen
+ */
+#ifndef XGBOOST_GBM_GBTREE_H_
+#define XGBOOST_GBM_GBTREE_H_
+
+#include <dmlc/omp.h>
+#include <dmlc/parameter.h>
+
+#include <xgboost/logging.h>
+#include <xgboost/gbm.h>
+#include <xgboost/predictor.h>
+#include <xgboost/tree_updater.h>
+#include <xgboost/enum_class_param.h>
+
+#include <vector>
+#include <map>
+#include <memory>
+#include <utility>
+#include <string>
+
+#include "gbtree_model.h"
+#include "../common/common.h"
+#include "../common/host_device_vector.h"
+#include "../common/timer.h"
+
+namespace xgboost {
+enum class TreeMethod : int {
+  kAuto = 0, kApprox = 1, kExact = 2, kHist = 3,
+  kGPUExact = 4, kGPUHist = 5
+};
+
+// boosting process types
+enum class TreeProcessType : int {
+  kDefault = 0,
+  kUpdate = 1
+};
+}  // namespace xgboost
+
+DECLARE_FIELD_ENUM_CLASS(xgboost::TreeMethod);
+DECLARE_FIELD_ENUM_CLASS(xgboost::TreeProcessType);
+
+namespace xgboost {
+namespace gbm {
+
+/*! \brief training parameters */
+struct GBTreeTrainParam : public dmlc::Parameter<GBTreeTrainParam> {
+  /*!
+   * \brief number of parallel trees constructed each iteration
+   *  use this option to support boosted random forest
+   */
+  int num_parallel_tree;
+  /*! \brief tree updater sequence */
+  std::string updater_seq;
+  /*! \brief type of boosting process to run */
+  TreeProcessType process_type;
+  // predictor name
+  std::string predictor;
+  // tree construction method
+  TreeMethod tree_method;
+  // declare parameters
+  DMLC_DECLARE_PARAMETER(GBTreeTrainParam) {
+    DMLC_DECLARE_FIELD(num_parallel_tree)
+        .set_default(1)
+        .set_lower_bound(1)
+        .describe("Number of parallel trees constructed during each iteration."\
+                  " This option is used to support boosted random forest.");
+    DMLC_DECLARE_FIELD(updater_seq)
+        .set_default("grow_colmaker,prune")
+        .describe("Tree updater sequence.");
+    DMLC_DECLARE_FIELD(process_type)
+        .set_default(TreeProcessType::kDefault)
+        .add_enum("default", TreeProcessType::kDefault)
+        .add_enum("update", TreeProcessType::kUpdate)
+        .describe("Whether to run the normal boosting process that creates new trees,"\
+                  " or to update the trees in an existing model.");
+    // add alias
+    DMLC_DECLARE_ALIAS(updater_seq, updater);
+    DMLC_DECLARE_FIELD(predictor)
+      .set_default("cpu_predictor")
+      .describe("Predictor algorithm type");
+    DMLC_DECLARE_FIELD(tree_method)
+        .set_default(TreeMethod::kAuto)
+        .add_enum("auto",      TreeMethod::kAuto)
+        .add_enum("approx",    TreeMethod::kApprox)
+        .add_enum("exact",     TreeMethod::kExact)
+        .add_enum("hist",      TreeMethod::kHist)
+        .add_enum("gpu_exact", TreeMethod::kGPUExact)
+        .add_enum("gpu_hist",  TreeMethod::kGPUHist)
+        .describe("Choice of tree construction method.");
+  }
+};
+
+/*! \brief training parameters */
+struct DartTrainParam : public dmlc::Parameter<DartTrainParam> {
+  /*! \brief type of sampling algorithm */
+  int sample_type;
+  /*! \brief type of normalization algorithm */
+  int normalize_type;
+  /*! \brief fraction of trees to drop during the dropout */
+  float rate_drop;
+  /*! \brief whether at least one tree should always be dropped during the dropout */
+  bool one_drop;
+  /*! \brief probability of skipping the dropout during an iteration */
+  float skip_drop;
+  /*! \brief learning step size for a time */
+  float learning_rate;
+  // declare parameters
+  DMLC_DECLARE_PARAMETER(DartTrainParam) {
+    DMLC_DECLARE_FIELD(sample_type)
+        .set_default(0)
+        .add_enum("uniform", 0)
+        .add_enum("weighted", 1)
+        .describe("Different types of sampling algorithm.");
+    DMLC_DECLARE_FIELD(normalize_type)
+        .set_default(0)
+        .add_enum("tree", 0)
+        .add_enum("forest", 1)
+        .describe("Different types of normalization algorithm.");
+    DMLC_DECLARE_FIELD(rate_drop)
+        .set_range(0.0f, 1.0f)
+        .set_default(0.0f)
+        .describe("Fraction of trees to drop during the dropout.");
+    DMLC_DECLARE_FIELD(one_drop)
+        .set_default(false)
+        .describe("Whether at least one tree should always be dropped during the dropout.");
+    DMLC_DECLARE_FIELD(skip_drop)
+        .set_range(0.0f, 1.0f)
+        .set_default(0.0f)
+        .describe("Probability of skipping the dropout during a boosting iteration.");
+    DMLC_DECLARE_FIELD(learning_rate)
+        .set_lower_bound(0.0f)
+        .set_default(0.3f)
+        .describe("Learning rate(step size) of update.");
+    DMLC_DECLARE_ALIAS(learning_rate, eta);
+  }
+};
+
+// gradient boosted trees
+class GBTree : public GradientBooster {
+ public:
+  explicit GBTree(bst_float base_margin) : model_(base_margin) {}
+
+  void InitCache(const std::vector<std::shared_ptr<DMatrix> > &cache) {
+    cache_ = cache;
+  }
+
+  static void AssertGPUSupport() {
+#ifndef XGBOOST_USE_CUDA
+    LOG(FATAL) << "XGBoost version not compiled with GPU support.";
+#endif  // XGBOOST_USE_CUDA
+  }
+
+  void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override;
+  // Revise `tree_method` and `updater` parameters after seeing the training
+  // data matrix
+  void PerformTreeMethodHeuristic(DMatrix* p_train,
+                                  std::map<std::string, std::string> cfg);
+  /*! \brief Map `tree_method` parameter to `updater` parameter */
+  void ConfigureUpdaters(const std::map<std::string, std::string>& cfg);
+  /*! \brief Carry out one iteration of boosting */
+  void DoBoost(DMatrix* p_fmat,
+               HostDeviceVector<GradientPair>* in_gpair,
+               ObjFunction* obj) override;
+
+  bool UseGPU() const override {
+    return
+        tparam_.predictor == "gpu_predictor" ||
+        tparam_.tree_method == TreeMethod::kGPUHist ||
+        tparam_.tree_method == TreeMethod::kGPUExact;
+  }
+
+  void Load(dmlc::Stream* fi) override {
+    model_.Load(fi);
+
+    this->cfg_.clear();
+    this->cfg_.emplace_back(std::string("num_feature"),
+                                       common::ToString(model_.param.num_feature));
+  }
+
+  GBTreeTrainParam const& GetTrainParam() const {
+    return tparam_;
+  }
+
+  void Save(dmlc::Stream* fo) const override {
+    model_.Save(fo);
+  }
+
+  bool AllowLazyCheckPoint() const override {
+    return model_.param.num_output_group == 1 ||
+        tparam_.updater_seq.find("distcol") != std::string::npos;
+  }
+
+  void PredictBatch(DMatrix* p_fmat,
+               HostDeviceVector<bst_float>* out_preds,
+               unsigned ntree_limit) override {
+    predictor_->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
+  }
+
+  void PredictInstance(const SparsePage::Inst& inst,
+               std::vector<bst_float>* out_preds,
+               unsigned ntree_limit,
+               unsigned root_index) override {
+    predictor_->PredictInstance(inst, out_preds, model_,
+                               ntree_limit, root_index);
+  }
+
+  void PredictLeaf(DMatrix* p_fmat,
+                   std::vector<bst_float>* out_preds,
+                   unsigned ntree_limit) override {
+    predictor_->PredictLeaf(p_fmat, out_preds, model_, ntree_limit);
+  }
+
+  void PredictContribution(DMatrix* p_fmat,
+                           std::vector<bst_float>* out_contribs,
+                           unsigned ntree_limit, bool approximate, int condition,
+                           unsigned condition_feature) override {
+    predictor_->PredictContribution(p_fmat, out_contribs, model_, ntree_limit, approximate);
+  }
+
+  void PredictInteractionContributions(DMatrix* p_fmat,
+                                       std::vector<bst_float>* out_contribs,
+                                       unsigned ntree_limit, bool approximate) override {
+    predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_,
+                                               ntree_limit, approximate);
+  }
+
+  std::vector<std::string> DumpModel(const FeatureMap& fmap,
+                                     bool with_stats,
+                                     std::string format) const override {
+    return model_.DumpModel(fmap, with_stats, format);
+  }
+
+ protected:
+  // initialize updater before using them
+  void InitUpdater();
+
+  // do group specific group
+  void BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
+                     DMatrix *p_fmat,
+                     int bst_group,
+                     std::vector<std::unique_ptr<RegTree> >* ret);
+
+  // commit new trees all at once
+  virtual void CommitModel(
+      std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees);
+
+  // --- data structure ---
+  GBTreeModel model_;
+  // training parameter
+  GBTreeTrainParam tparam_;
+  // ----training fields----
+  // configurations for tree
+  std::vector<std::pair<std::string, std::string> > cfg_;
+  // the updaters that can be applied to each of tree
+  std::vector<std::unique_ptr<TreeUpdater>> updaters_;
+  // Cached matrices
+  std::vector<std::shared_ptr<DMatrix>> cache_;
+  std::unique_ptr<Predictor> predictor_;
+  common::Monitor monitor_;
+};
+
+}  // namespace gbm
+}  // namespace xgboost
+
+#endif  // XGBOOST_GBM_GBTREE_H_
--- a/src/learner.cc
+++ b/src/learner.cc
@ -113,68 +113,6 @@ class LearnerImpl : public Learner {
    name_gbm_ = "gbtree";
  }

-  static void AssertGPUSupport() {
-#ifndef XGBOOST_USE_CUDA
-    LOG(FATAL) << "XGBoost version not compiled with GPU support.";
-#endif  // XGBOOST_USE_CUDA
-  }
-
-
-  /*! \brief Map `tree_method` parameter to `updater` parameter */
-  void ConfigureUpdaters() {
-    // This method is not applicable to non-tree learners
-    if (cfg_.find("booster") != cfg_.cend() &&
-        (cfg_.at("booster") != "gbtree" && cfg_.at("booster") != "dart")) {
-      return;
-    }
-    // `updater` parameter was manually specified
-    if (cfg_.count("updater") > 0) {
-      LOG(WARNING) << "DANGER AHEAD: You have manually specified `updater` "
-                      "parameter. The `tree_method` parameter will be ignored. "
-                      "Incorrect sequence of updaters will produce undefined "
-                      "behavior. For common uses, we recommend using "
-                      "`tree_method` parameter instead.";
-      return;
-    }
-
-    /* Choose updaters according to tree_method parameters */
-    switch (tparam_.tree_method) {
-     case TreeMethod::kAuto:
-      // Use heuristic to choose between 'exact' and 'approx'
-      // This choice is deferred to PerformTreeMethodHeuristic().
-      break;
-     case TreeMethod::kApprox:
-      cfg_["updater"] = "grow_histmaker,prune";
-      break;
-     case TreeMethod::kExact:
-      cfg_["updater"] = "grow_colmaker,prune";
-      break;
-     case TreeMethod::kHist:
-      LOG(INFO) <<
-          "Tree method is selected to be 'hist', which uses a "
-          "single updater grow_quantile_histmaker.";
-      cfg_["updater"] = "grow_quantile_histmaker";
-      break;
-     case TreeMethod::kGPUExact:
-      this->AssertGPUSupport();
-      cfg_["updater"] = "grow_gpu,prune";
-      if (cfg_.count("predictor") == 0) {
-        cfg_["predictor"] = "gpu_predictor";
-      }
-      break;
-     case TreeMethod::kGPUHist:
-      this->AssertGPUSupport();
-      cfg_["updater"] = "grow_gpu_hist";
-      if (cfg_.count("predictor") == 0) {
-        cfg_["predictor"] = "gpu_predictor";
-      }
-      break;
-     default:
-      LOG(FATAL) << "Unknown tree_method ("
-                 << static_cast<int>(tparam_.tree_method) << ") detected";
-    }
-  }
-
  void ConfigureObjective() {
    if (cfg_.count("num_class") != 0) {
      cfg_["num_output_group"] = cfg_["num_class"];
@ -192,9 +130,6 @@ class LearnerImpl : public Learner {
    if (cfg_.count("objective") == 0) {
      cfg_["objective"] = "reg:squarederror";
    }
-    if (cfg_.count("booster") == 0) {
-      cfg_["booster"] = "gbtree";
-    }
  }

  // Configuration before data is known.
@ -231,13 +166,12 @@ class LearnerImpl : public Learner {
    }

    ConfigureObjective();
-    ConfigureUpdaters();
+    name_gbm_ = tparam_.booster;

    // FIXME(trivialfis): So which one should go first? Init or Configure?
    if (!this->ModelInitialized()) {
      mparam_.InitAllowUnknown(args);
      name_obj_ = cfg_["objective"];
-      name_gbm_ = cfg_["booster"];
      // set seed only before the model is initialized
      common::GlobalRandom().seed(tparam_.seed);
    }
@ -263,18 +197,11 @@ class LearnerImpl : public Learner {
  // Configuration can only be done after data is known
  void ConfigurationWithKnownData(DMatrix* dmat) {
    CHECK(ModelInitialized())
-        << "Always call InitModel or Load before any evaluation.";
+        << " Internal Error: Always call InitModel or Load before any evaluation.";
    this->ValidateDMatrix(dmat);
-    // Configure GPU parameters
-    // FIXME(trivialfis): How do we know dependent parameters are all set?
-    if (tparam_.tree_method == TreeMethod::kGPUHist ||
-        tparam_.tree_method == TreeMethod::kGPUExact ||
-        (cfg_.find("updater") != cfg_.cend() && cfg_.at("updater") == "gpu_coord_descent") ||
-        (cfg_.find("predictor") != cfg_.cend() &&
-         cfg_.at("predictor") == "gpu_predictor")) {
-      if (cfg_.find("n_gpus") == cfg_.cend()) {
-        tparam_.n_gpus = 1;
-      }
+    CHECK(this->gbm_) << " Internal: GBM is not set";
+    if (this->gbm_->UseGPU() && cfg_.find("n_gpus") == cfg_.cend()) {
+      tparam_.n_gpus = 1;
    }
  }

@ -443,13 +370,26 @@ class LearnerImpl : public Learner {
    }
  }

+  void CheckDataSplitMode() {
+    if (rabit::IsDistributed()) {
+      CHECK(tparam_.dsplit != DataSplitMode::kAuto)
+        << "Precondition violated; dsplit cannot be 'auto' in distributed mode";
+      if (tparam_.dsplit == DataSplitMode::kCol) {
+        // 'distcol' updater hidden until it becomes functional again
+        // See discussion at https://github.com/dmlc/xgboost/issues/1832
+        LOG(FATAL) << "Column-wise data split is currently not supported.";
+      }
+    }
+  }
+
  void UpdateOneIter(int iter, DMatrix* train) override {
    monitor_.Start("UpdateOneIter");

    if (tparam_.seed_per_iteration || rabit::IsDistributed()) {
      common::GlobalRandom().seed(tparam_.seed * kRandSeedMagic + iter);
    }
-    this->PerformTreeMethodHeuristic(train);
+    // this->PerformTreeMethodHeuristic(train);
+    this->CheckDataSplitMode();
    this->ConfigurationWithKnownData(train);

    monitor_.Start("PredictRaw");
@ -468,7 +408,8 @@ class LearnerImpl : public Learner {
    if (tparam_.seed_per_iteration || rabit::IsDistributed()) {
      common::GlobalRandom().seed(tparam_.seed * kRandSeedMagic + iter);
    }
-    this->PerformTreeMethodHeuristic(train);
+    this->CheckDataSplitMode();
+    // this->PerformTreeMethodHeuristic(train);
    this->ConfigurationWithKnownData(train);

    gbm_->DoBoost(train, in_gpair);
@ -573,108 +514,6 @@ class LearnerImpl : public Learner {
  }

 protected:
-  // Revise `tree_method` and `updater` parameters after seeing the training
-  // data matrix
-  inline void PerformTreeMethodHeuristic(DMatrix* p_train) {
-    if (name_gbm_ != "gbtree" || cfg_.count("updater") > 0) {
-      // 1. This method is not applicable for non-tree learners
-      // 2. This method is disabled when `updater` parameter is explicitly
-      //    set, since only experts are expected to do so.
-      return;
-    }
-
-    const TreeMethod current_tree_method = tparam_.tree_method;
-
-    if (rabit::IsDistributed()) {
-      CHECK(tparam_.dsplit != DataSplitMode::kAuto)
-        << "Precondition violated; dsplit cannot be 'auto' in distributed mode";
-      if (tparam_.dsplit == DataSplitMode::kCol) {
-        // 'distcol' updater hidden until it becomes functional again
-        // See discussion at https://github.com/dmlc/xgboost/issues/1832
-        LOG(FATAL) << "Column-wise data split is currently not supported.";
-      }
-      switch (current_tree_method) {
-       case TreeMethod::kAuto:
-        LOG(WARNING) <<
-            "Tree method is automatically selected to be 'approx' "
-            "for distributed training.";
-        break;
-       case TreeMethod::kApprox:
-       case TreeMethod::kHist:
-        // things are okay, do nothing
-        break;
-       case TreeMethod::kExact:
-        LOG(WARNING) << "Tree method was set to be "
-                     << "exact"
-                     << "', but only 'approx' and 'hist' is available for distributed "
-                        "training. The `tree_method` parameter is now being "
-                        "changed to 'approx'";
-        break;
-       case TreeMethod::kGPUExact:
-       case TreeMethod::kGPUHist:
-        LOG(FATAL) << "Distributed training is not available with GPU algoritms";
-        break;
-       default:
-        LOG(FATAL) << "Unknown tree_method ("
-                   << static_cast<int>(current_tree_method) << ") detected";
-      }
-      if (current_tree_method != TreeMethod::kHist) {
-        LOG(WARNING) << "Tree method is automatically selected to be 'approx'"
-                        " for distributed training.";
-        tparam_.tree_method = TreeMethod::kApprox;
-      } else {
-        LOG(WARNING) << "Tree method is specified to be 'hist'"
-                        " for distributed training.";
-        tparam_.tree_method = TreeMethod::kHist;
-      }
-    } else if (!p_train->SingleColBlock()) {
-      /* Some tree methods are not available for external-memory DMatrix */
-      switch (current_tree_method) {
-       case TreeMethod::kAuto:
-        LOG(WARNING) << "Tree method is automatically set to 'approx' "
-                        "since external-memory data matrix is used.";
-        break;
-       case TreeMethod::kApprox:
-        // things are okay, do nothing
-        break;
-       case TreeMethod::kExact:
-        LOG(WARNING) << "Tree method was set to be 'exact', "
-                        "but currently we are only able to proceed with "
-                        "approximate algorithm ('approx') because external-"
-                        "memory data matrix is used.";
-        break;
-       case TreeMethod::kHist:
-        // things are okay, do nothing
-        break;
-       case TreeMethod::kGPUExact:
-       case TreeMethod::kGPUHist:
-        LOG(FATAL)
-          << "External-memory data matrix is not available with GPU algorithms";
-        break;
-       default:
-        LOG(FATAL) << "Unknown tree_method ("
-                   << static_cast<int>(current_tree_method) << ") detected";
-      }
-      tparam_.tree_method = TreeMethod::kApprox;
-    } else if (p_train->Info().num_row_ >= (4UL << 20UL)
-               && current_tree_method == TreeMethod::kAuto) {
-      /* Choose tree_method='approx' automatically for large data matrix */
-      LOG(WARNING) << "Tree method is automatically selected to be "
-                      "'approx' for faster speed. To use old behavior "
-                      "(exact greedy algorithm on single machine), "
-                      "set tree_method to 'exact'.";
-      tparam_.tree_method = TreeMethod::kApprox;
-    }
-
-    /* If tree_method was changed, re-configure updaters and gradient boosters */
-    if (tparam_.tree_method != current_tree_method) {
-      ConfigureUpdaters();
-      if (gbm_ != nullptr) {
-        gbm_->Configure(cfg_.begin(), cfg_.end());
-      }
-    }
-  }
-
  // return whether model is already initialized.
  inline bool ModelInitialized() const { return gbm_ != nullptr; }
  // lazily initialize the model based on configuration if it haven't yet been initialized.
--- a/src/tree/updater_colmaker.cc
+++ b/src/tree/updater_colmaker.cc
@ -6,12 +6,13 @@
 */
 #include <rabit/rabit.h>
 #include <xgboost/tree_updater.h>
+#include <xgboost/logging.h>
 #include <memory>
 #include <vector>
 #include <cmath>
 #include <algorithm>

-#include "./param.h"
+#include "param.h"
 #include "../common/random.h"
 #include "../common/bitmap.h"
 #include "split_evaluator.h"
@ -603,7 +604,7 @@ class ColMaker: public TreeUpdater {
        poption = static_cast<int>(num_features) * 2 < this->nthread_ ? 1 : 0;
      }
      if (poption == 0) {
-        #pragma omp parallel for schedule(dynamic, batch_size)
+#pragma omp parallel for schedule(dynamic, batch_size)
        for (bst_omp_uint i = 0; i < num_features; ++i) {
          int fid = feat_set[i];
          const int tid = omp_get_thread_num();
--- a/tests/cpp/gbm/test_gbtree.cc
+++ b/tests/cpp/gbm/test_gbtree.cc
@ -0,0 +1,49 @@
+#include <gtest/gtest.h>
+#include <xgboost/generic_parameters.h>
+#include "../helpers.h"
+#include "../../../src/gbm/gbtree.h"
+
+namespace xgboost {
+TEST(GBTree, SelectTreeMethod) {
+  using Arg = std::pair<std::string, std::string>;
+  size_t constexpr kRows = 10;
+  size_t constexpr kCols = 10;
+  auto mat_ptr = CreateDMatrix(kRows, kCols, 0);
+  std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {*mat_ptr};
+
+  LearnerTrainParam learner_param;
+  learner_param.InitAllowUnknown(std::vector<Arg>{Arg("n_gpus", "0")});
+  std::unique_ptr<GradientBooster> p_gbm{
+    GradientBooster::Create("gbtree", &learner_param, {}, 0)};
+  auto& gbtree = dynamic_cast<gbm::GBTree&> (*p_gbm);
+
+  // Test if `tree_method` can be set
+  std::string n_feat = std::to_string(kCols);
+  gbtree.Configure({Arg{"tree_method", "approx"}, Arg{"num_feature", n_feat}});
+  auto const& tparam = gbtree.GetTrainParam();
+  ASSERT_EQ(tparam.updater_seq, "grow_histmaker,prune");
+  gbtree.Configure({Arg("tree_method", "exact"), Arg("num_feature", n_feat)});
+  ASSERT_EQ(tparam.updater_seq, "grow_colmaker,prune");
+  gbtree.Configure({Arg("tree_method", "hist"), Arg("num_feature", n_feat)});
+  ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker");
+  ASSERT_EQ(tparam.predictor, "cpu_predictor");
+  gbtree.Configure({Arg{"booster", "dart"}, Arg{"tree_method", "hist"},
+                    Arg{"num_feature", n_feat}});
+  ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker");
+#ifdef XGBOOST_USE_CUDA
+  learner_param.InitAllowUnknown(std::vector<Arg>{Arg{"n_gpus", "1"}});
+  gbtree.Configure({Arg("tree_method", "gpu_exact"),
+                    Arg("num_feature", n_feat)});
+  ASSERT_EQ(tparam.updater_seq, "grow_gpu,prune");
+  ASSERT_EQ(tparam.predictor, "gpu_predictor");
+  gbtree.Configure({Arg("tree_method", "gpu_hist"), Arg("num_feature", n_feat)});
+  ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
+  ASSERT_EQ(tparam.predictor, "gpu_predictor");
+  gbtree.Configure({Arg{"booster", "dart"}, Arg{"tree_method", "gpu_hist"},
+                    Arg{"num_feature", n_feat}});
+  ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
+#endif
+
+  delete mat_ptr;
+}
+}  // namespace xgboost
--- a/tests/cpp/predictor/test_gpu_predictor.cu
+++ b/tests/cpp/predictor/test_gpu_predictor.cu
@ -145,25 +145,30 @@ TEST(gpu_predictor, MGPU_PicklingTest) {
  }

  // Load data matrix
-  CheckCAPICall(XGDMatrixCreateFromFile(tmp_file.c_str(), 0, &dmat[0]));
-  CheckCAPICall(XGDMatrixSetFloatInfo(dmat[0], "label", label.data(), 200));
+  ASSERT_EQ(XGDMatrixCreateFromFile(
+      tmp_file.c_str(), 0, &dmat[0]), 0) << XGBGetLastError();
+  ASSERT_EQ(XGDMatrixSetFloatInfo(
+      dmat[0], "label", label.data(), 200), 0) << XGBGetLastError();
  // Create booster
-  CheckCAPICall(XGBoosterCreate(dmat, 1, &bst));
+  ASSERT_EQ(XGBoosterCreate(dmat, 1, &bst), 0) << XGBGetLastError();
  // Set parameters
-  CheckCAPICall(XGBoosterSetParam(bst, "seed", "0"));
-  CheckCAPICall(XGBoosterSetParam(bst, "base_score", "0.5"));
-  CheckCAPICall(XGBoosterSetParam(bst, "booster", "gbtree"));
-  CheckCAPICall(XGBoosterSetParam(bst, "learning_rate", "0.01"));
-  CheckCAPICall(XGBoosterSetParam(bst, "max_depth", "8"));
-  CheckCAPICall(XGBoosterSetParam(bst, "objective", "binary:logistic"));
-  CheckCAPICall(XGBoosterSetParam(bst, "seed", "123"));
-  CheckCAPICall(XGBoosterSetParam(bst, "tree_method", "gpu_hist"));
-  CheckCAPICall(XGBoosterSetParam(bst, "n_gpus", std::to_string(ngpu).c_str()));
-  CheckCAPICall(XGBoosterSetParam(bst, "predictor", "gpu_predictor"));
+  ASSERT_EQ(XGBoosterSetParam(bst, "seed", "0"), 0) << XGBGetLastError();
+  ASSERT_EQ(XGBoosterSetParam(bst, "base_score", "0.5"), 0) << XGBGetLastError();
+  ASSERT_EQ(XGBoosterSetParam(bst, "booster", "gbtree"), 0) << XGBGetLastError();
+  ASSERT_EQ(XGBoosterSetParam(bst, "learning_rate", "0.01"), 0) << XGBGetLastError();
+  ASSERT_EQ(XGBoosterSetParam(bst, "max_depth", "8"), 0) << XGBGetLastError();
+  ASSERT_EQ(XGBoosterSetParam(
+      bst, "objective", "binary:logistic"), 0) << XGBGetLastError();
+  ASSERT_EQ(XGBoosterSetParam(bst, "seed", "123"), 0) << XGBGetLastError();
+  ASSERT_EQ(XGBoosterSetParam(
+      bst, "tree_method", "gpu_hist"), 0) << XGBGetLastError();
+  ASSERT_EQ(XGBoosterSetParam(
+      bst, "n_gpus", std::to_string(ngpu).c_str()), 0) << XGBGetLastError();
+  ASSERT_EQ(XGBoosterSetParam(bst, "predictor", "gpu_predictor"), 0) << XGBGetLastError();

  // Run boosting iterations
  for (int i = 0; i < 10; ++i) {
-    CheckCAPICall(XGBoosterUpdateOneIter(bst, i, dmat[0]));
+    ASSERT_EQ(XGBoosterUpdateOneIter(bst, i, dmat[0]), 0) << XGBGetLastError();
  }

  // Delete matrix
--- a/tests/cpp/test_learner.cc
+++ b/tests/cpp/test_learner.cc
@ -19,40 +19,6 @@ TEST(Learner, Basic) {
  delete mat_ptr;
 }

-TEST(Learner, SelectTreeMethod) {
-  using Arg = std::pair<std::string, std::string>;
-  auto mat_ptr = CreateDMatrix(10, 10, 0);
-  std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {*mat_ptr};
-  auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
-
-  // Test if `tree_method` can be set
-  learner->Configure({Arg("tree_method", "approx")});
-  ASSERT_EQ(learner->GetConfigurationArguments().at("updater"),
-            "grow_histmaker,prune");
-  learner->Configure({Arg("tree_method", "exact")});
-  ASSERT_EQ(learner->GetConfigurationArguments().at("updater"),
-            "grow_colmaker,prune");
-  learner->Configure({Arg("tree_method", "hist")});
-  ASSERT_EQ(learner->GetConfigurationArguments().at("updater"),
-            "grow_quantile_histmaker");
-  learner->Configure({Arg{"booster", "dart"}, Arg{"tree_method", "hist"}});
-  ASSERT_EQ(learner->GetConfigurationArguments().at("updater"),
-            "grow_quantile_histmaker");
-#ifdef XGBOOST_USE_CUDA
-  learner->Configure({Arg("tree_method", "gpu_exact")});
-  ASSERT_EQ(learner->GetConfigurationArguments().at("updater"),
-            "grow_gpu,prune");
-  learner->Configure({Arg("tree_method", "gpu_hist")});
-  ASSERT_EQ(learner->GetConfigurationArguments().at("updater"),
-            "grow_gpu_hist");
-  learner->Configure({Arg{"booster", "dart"}, Arg{"tree_method", "gpu_hist"}});
-  ASSERT_EQ(learner->GetConfigurationArguments().at("updater"),
-            "grow_gpu_hist");
-#endif
-
-  delete mat_ptr;
-}
-
 TEST(Learner, CheckGroup) {
  using Arg = std::pair<std::string, std::string>;
  size_t constexpr kNumGroups = 4;