Unify the hist tree method for different devices. (#9363)

2023-07-11 10:04:39 +08:00 · 2023-07-11 10:04:39 +08:00 · 97ed944209
commit 97ed944209
parent 20c52f07d2
8 changed files with 242 additions and 142 deletions
--- a/R-package/src/Makevars.in
+++ b/R-package/src/Makevars.in
@ -82,6 +82,7 @@ OBJECTS= \
    $(PKGROOT)/src/common/charconv.o \
    $(PKGROOT)/src/common/column_matrix.o \
    $(PKGROOT)/src/common/common.o \
    $(PKGROOT)/src/common/error_msg.o \
    $(PKGROOT)/src/common/hist_util.o \
    $(PKGROOT)/src/common/host_device_vector.o \
    $(PKGROOT)/src/common/io.o \
--- a/R-package/src/Makevars.win
+++ b/R-package/src/Makevars.win
@ -82,6 +82,7 @@ OBJECTS= \
    $(PKGROOT)/src/common/charconv.o \
    $(PKGROOT)/src/common/column_matrix.o \
    $(PKGROOT)/src/common/common.o \
    $(PKGROOT)/src/common/error_msg.o \
    $(PKGROOT)/src/common/hist_util.o \
    $(PKGROOT)/src/common/host_device_vector.o \
    $(PKGROOT)/src/common/io.o \
--- a/src/common/error_msg.cc
+++ b/src/common/error_msg.cc
@ -0,0 +1,36 @@
 /**
 * Copyright 2023 by XGBoost contributors
 */
 #include "error_msg.h"
 #include "xgboost/logging.h"
 namespace xgboost::error {
 void WarnDeprecatedGPUHist() {
  bool static thread_local logged{false};
  if (logged) {
    return;
  }
  auto msg =
      "The tree method `gpu_hist` is deprecated since 2.0.0. To use GPU training, set the `device` "
      R"(parameter to CUDA instead.
    E.g. tree_method = "hist", device = "CUDA"
 )";
  LOG(WARNING) << msg;
  logged = true;
 }
 void WarnManualUpdater() {
  bool static thread_local logged{false};
  if (logged) {
    return;
  }
  LOG(WARNING)
      << "You have manually specified the `updater` parameter. The `tree_method` parameter "
         "will be ignored. Incorrect sequence of updaters will produce undefined "
         "behavior. For common uses, we recommend using `tree_method` parameter instead.";
  logged = true;
 }
 }  // namespace xgboost::error
--- a/src/common/error_msg.h
+++ b/src/common/error_msg.h
@ -75,9 +75,12 @@ inline void WarnOldSerialization() {
  if (logged) {
    return;
  }
  LOG(WARNING) << OldSerialization();
  logged = true;
 }
 void WarnDeprecatedGPUHist();
 void WarnManualUpdater();
 }  // namespace xgboost::error
 #endif  // XGBOOST_COMMON_ERROR_MSG_H_
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@ -9,7 +9,7 @@
 #include <dmlc/omp.h>
 #include <dmlc/parameter.h>
-#include <algorithm>
+#include <algorithm>  // for equal
 #include <cinttypes>  // for uint32_t
 #include <limits>
 #include <memory>
@ -40,8 +40,53 @@
 namespace xgboost::gbm {
 DMLC_REGISTRY_FILE_TAG(gbtree);
 namespace {
 /** @brief Map the `tree_method` parameter to the `updater` parameter. */
 std::string MapTreeMethodToUpdaters(Context const* ctx_, TreeMethod tree_method) {
  // Choose updaters according to tree_method parameters
  switch (tree_method) {
    case TreeMethod::kAuto:  // Use hist as default in 2.0
    case TreeMethod::kHist: {
      return ctx_->DispatchDevice([] { return "grow_quantile_histmaker"; },
                                  [] {
                                    common::AssertGPUSupport();
                                    return "grow_gpu_hist";
                                  });
    }
    case TreeMethod::kApprox:
      CHECK(ctx_->IsCPU()) << "The `approx` tree method is not supported on GPU.";
      return "grow_histmaker";
    case TreeMethod::kExact:
      CHECK(ctx_->IsCPU()) << "The `exact` tree method is not supported on GPU.";
      return "grow_colmaker,prune";
    case TreeMethod::kGPUHist: {
      common::AssertGPUSupport();
      error::WarnDeprecatedGPUHist();
      return "grow_gpu_hist";
    }
    default:
      auto tm = static_cast<std::underlying_type_t<TreeMethod>>(tree_method);
      LOG(FATAL) << "Unknown tree_method: `" << tm << "`.";
  }
  LOG(FATAL) << "unreachable";
  return "";
 }
 bool UpdatersMatched(std::vector<std::string> updater_seq,
                     std::vector<std::unique_ptr<TreeUpdater>> const& updaters) {
  if (updater_seq.size() != updaters.size()) {
    return false;
  }
  return std::equal(updater_seq.cbegin(), updater_seq.cend(), updaters.cbegin(),
                    [](std::string const& name, std::unique_ptr<TreeUpdater> const& up) {
                      return name == up->Name();
                    });
 }
 }  // namespace
 void GBTree::Configure(Args const& cfg) {
  std::string updater_seq = tparam_.updater_seq;
  tparam_.UpdateAllowUnknown(cfg);
  tree_param_.UpdateAllowUnknown(cfg);
@ -54,8 +99,7 @@ void GBTree::Configure(Args const& cfg) {
  // configure predictors
  if (!cpu_predictor_) {
-    cpu_predictor_ = std::unique_ptr<Predictor>(
+    cpu_predictor_ = std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", this->ctx_));
        Predictor::Create("cpu_predictor", this->ctx_));
  }
  cpu_predictor_->Configure(cfg);
 #if defined(XGBOOST_USE_CUDA)
@ -70,26 +114,17 @@ void GBTree::Configure(Args const& cfg) {
 #if defined(XGBOOST_USE_ONEAPI)
  if (!oneapi_predictor_) {
-    oneapi_predictor_ = std::unique_ptr<Predictor>(
+    oneapi_predictor_ =
-        Predictor::Create("oneapi_predictor", this->ctx_));
+        std::unique_ptr<Predictor>(Predictor::Create("oneapi_predictor", this->ctx_));
  }
  oneapi_predictor_->Configure(cfg);
 #endif  // defined(XGBOOST_USE_ONEAPI)
-  monitor_.Init("GBTree");
+  // `updater` parameter was manually specified
-
+  specified_updater_ =
-  specified_updater_ = std::any_of(
+      std::any_of(cfg.cbegin(), cfg.cend(), [](auto const& arg) { return arg.first == "updater"; });
-      cfg.cbegin(), cfg.cend(),
+  if (specified_updater_) {
-      [](std::pair<std::string, std::string> const& arg) { return arg.first == "updater"; });
+    error::WarnManualUpdater();
  if (specified_updater_ && !showed_updater_warning_) {
    LOG(WARNING) << "DANGER AHEAD: You have manually specified `updater` "
        "parameter. The `tree_method` parameter will be ignored. "
        "Incorrect sequence of updaters will produce undefined "
        "behavior. For common uses, we recommend using "
        "`tree_method` parameter instead.";
    // Don't drive users to silent XGBOost.
    showed_updater_warning_ = true;
  }
  if (model_.learner_model_param->IsVectorLeaf()) {
@ -97,50 +132,28 @@ void GBTree::Configure(Args const& cfg) {
        << "Only the hist tree method is supported for building multi-target trees with vector "
           "leaf.";
  }
  LOG(DEBUG) << "Using tree method: " << static_cast<int>(tparam_.tree_method);
  this->ConfigureUpdaters();
-  if (updater_seq != tparam_.updater_seq) {
+  LOG(DEBUG) << "Using tree method: " << static_cast<int>(tparam_.tree_method);
  if (!specified_updater_) {
    this->tparam_.updater_seq = MapTreeMethodToUpdaters(ctx_, tparam_.tree_method);
  }
  auto up_names = common::Split(tparam_.updater_seq, ',');
  if (!UpdatersMatched(up_names, updaters_)) {
    updaters_.clear();
-    this->InitUpdater(cfg);
+    for (auto const& name : up_names) {
-  } else {
+      std::unique_ptr<TreeUpdater> up(
          TreeUpdater::Create(name.c_str(), ctx_, &model_.learner_model_param->task));
      updaters_.push_back(std::move(up));
    }
  }
  for (auto& up : updaters_) {
    up->Configure(cfg);
  }
 }
  configured_ = true;
 }
 void GBTree::ConfigureUpdaters() {
  if (specified_updater_) {
    return;
  }
  // `updater` parameter was manually specified
  /* Choose updaters according to tree_method parameters */
  switch (tparam_.tree_method) {
    case TreeMethod::kAuto:  // Use hist as default in 2.0
    case TreeMethod::kHist: {
      tparam_.updater_seq = "grow_quantile_histmaker";
      break;
    }
    case TreeMethod::kApprox:
      tparam_.updater_seq = "grow_histmaker";
      break;
    case TreeMethod::kExact:
      tparam_.updater_seq = "grow_colmaker,prune";
      break;
    case TreeMethod::kGPUHist: {
      common::AssertGPUSupport();
      tparam_.updater_seq = "grow_gpu_hist";
      break;
    }
    default:
      LOG(FATAL) << "Unknown tree_method (" << static_cast<int>(tparam_.tree_method)
                 << ") detected";
  }
 }
 void GPUCopyGradient(HostDeviceVector<GradientPair> const*, bst_group_t, bst_group_t,
                     HostDeviceVector<GradientPair>*)
 #if defined(XGBOOST_USE_CUDA)
@ -195,14 +208,8 @@ void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
  bst_target_t const n_groups = model_.learner_model_param->OutputLength();
  monitor_.Start("BoostNewTrees");
-  // Weird case that tree method is cpu-based but gpu_id is set.  Ideally we should let
+  auto out = linalg::MakeTensorView(ctx_, &predt->predictions, p_fmat->Info().num_row_,
-  // `gpu_id` be the single source of determining what algorithms to run, but that will
+                                    model_.learner_model_param->OutputLength());
  // break a lots of existing code.
  auto device = tparam_.tree_method != TreeMethod::kGPUHist ? Context::kCpuId : ctx_->gpu_id;
  auto out = linalg::MakeTensorView(
      device,
      device == Context::kCpuId ? predt->predictions.HostSpan() : predt->predictions.DeviceSpan(),
      p_fmat->Info().num_row_, model_.learner_model_param->OutputLength());
  CHECK_NE(n_groups, 0);
  if (!p_fmat->SingleColBlock() && obj->Task().UpdateTreeLeaf()) {
@ -261,47 +268,6 @@ void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
  this->CommitModel(std::move(new_trees));
 }
 void GBTree::InitUpdater(Args const& cfg) {
  std::string tval = tparam_.updater_seq;
  std::vector<std::string> ups = common::Split(tval, ',');
  if (updaters_.size() != 0) {
    // Assert we have a valid set of updaters.
    CHECK_EQ(ups.size(), updaters_.size());
    for (auto const& up : updaters_) {
      bool contains = std::any_of(ups.cbegin(), ups.cend(),
                        [&up](std::string const& name) {
                          return name == up->Name();
                        });
      if (!contains) {
        std::stringstream ss;
        ss << "Internal Error: " << " mismatched updater sequence.\n";
        ss << "Specified updaters: ";
        std::for_each(ups.cbegin(), ups.cend(),
                      [&ss](std::string const& name){
                        ss << name << " ";
                      });
        ss << "\n" << "Actual updaters: ";
        std::for_each(updaters_.cbegin(), updaters_.cend(),
                      [&ss](std::unique_ptr<TreeUpdater> const& updater){
                        ss << updater->Name() << " ";
                      });
        LOG(FATAL) << ss.str();
      }
    }
    // Do not push new updater in.
    return;
  }
  // create new updaters
  for (const std::string& pstr : ups) {
    std::unique_ptr<TreeUpdater> up(
        TreeUpdater::Create(pstr.c_str(), ctx_, &model_.learner_model_param->task));
    up->Configure(cfg);
    updaters_.push_back(std::move(up));
  }
 }
 void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat, int bst_group,
                           std::vector<HostDeviceVector<bst_node_t>>* out_position,
                           TreesOneGroup* ret) {
@ -310,6 +276,7 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fma
  // create the trees
  for (int i = 0; i < model_.param.num_parallel_tree; ++i) {
    if (tparam_.process_type == TreeProcessType::kDefault) {
      CHECK(!updaters_.empty());
      CHECK(!updaters_.front()->CanModifyTree())
          << "Updater: `" << updaters_.front()->Name() << "` "
          << "can not be used to create new trees. "
@ -465,7 +432,6 @@ void GBTree::SaveModel(Json* p_out) const {
 void GBTree::Slice(bst_layer_t begin, bst_layer_t end, bst_layer_t step, GradientBooster* out,
                   bool* out_of_bound) const {
  CHECK(configured_);
  CHECK(out);
  auto p_gbtree = dynamic_cast<GBTree*>(out);
@ -517,7 +483,6 @@ void GBTree::Slice(bst_layer_t begin, bst_layer_t end, bst_layer_t step, Gradien
 void GBTree::PredictBatchImpl(DMatrix* p_fmat, PredictionCacheEntry* out_preds, bool is_training,
                              bst_layer_t layer_begin, bst_layer_t layer_end) const {
  CHECK(configured_);
  if (layer_end == 0) {
    layer_end = this->BoostedRounds();
  }
@ -577,7 +542,6 @@ void GBTree::PredictBatch(DMatrix* p_fmat, PredictionCacheEntry* out_preds, bool
 void GBTree::InplacePredict(std::shared_ptr<DMatrix> p_m, float missing,
                            PredictionCacheEntry* out_preds, bst_layer_t layer_begin,
                            bst_layer_t layer_end) const {
  CHECK(configured_);
  auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);
  CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees.";
  if (p_m->Ctx()->Device() != this->ctx_->Device()) {
@ -606,8 +570,6 @@ void GBTree::InplacePredict(std::shared_ptr<DMatrix> p_m, float missing,
 [[nodiscard]] std::unique_ptr<Predictor> const& GBTree::GetPredictor(
    bool is_training, HostDeviceVector<float> const* out_pred, DMatrix* f_dmat) const {
  CHECK(configured_);
  // Data comes from SparsePageDMatrix. Since we are loading data in pages, no need to
  // prevent data copy.
  if (f_dmat && !f_dmat->SingleColBlock()) {
@ -914,7 +876,6 @@ class Dart : public GBTree {
  void PredictContribution(DMatrix* p_fmat, HostDeviceVector<bst_float>* out_contribs,
                           bst_layer_t layer_begin, bst_layer_t layer_end,
                           bool approximate) override {
    CHECK(configured_);
    auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);
    cpu_predictor_->PredictContribution(p_fmat, out_contribs, model_, tree_end, &weight_drop_,
                                        approximate);
@ -923,7 +884,6 @@ class Dart : public GBTree {
  void PredictInteractionContributions(DMatrix* p_fmat, HostDeviceVector<float>* out_contribs,
                                       bst_layer_t layer_begin, bst_layer_t layer_end,
                                       bool approximate) override {
    CHECK(configured_);
    auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);
    cpu_predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_, tree_end,
                                                    &weight_drop_, approximate);
--- a/src/gbm/gbtree.h
+++ b/src/gbm/gbtree.h
@ -59,9 +59,7 @@ struct GBTreeTrainParam : public XGBoostParameter<GBTreeTrainParam> {
  TreeMethod tree_method;
  // declare parameters
  DMLC_DECLARE_PARAMETER(GBTreeTrainParam) {
-    DMLC_DECLARE_FIELD(updater_seq)
+    DMLC_DECLARE_FIELD(updater_seq).describe("Tree updater sequence.").set_default("");
        .set_default("grow_colmaker,prune")
        .describe("Tree updater sequence.");
    DMLC_DECLARE_FIELD(process_type)
        .set_default(TreeProcessType::kDefault)
        .add_enum("default", TreeProcessType::kDefault)
@ -170,22 +168,21 @@ bool SliceTrees(bst_layer_t begin, bst_layer_t end, bst_layer_t step, GBTreeMode
 class GBTree : public GradientBooster {
 public:
  explicit GBTree(LearnerModelParam const* booster_config, Context const* ctx)
-      : GradientBooster{ctx}, model_(booster_config, ctx_) {}
+      : GradientBooster{ctx}, model_(booster_config, ctx_) {
-
+    monitor_.Init(__func__);
-  void Configure(const Args& cfg) override;
+  }
  /*! \brief Map `tree_method` parameter to `updater` parameter */
  void ConfigureUpdaters();
  void Configure(Args const& cfg) override;
  /**
-   * \brief Optionally update the leaf value.
+   * @brief Optionally update the leaf value.
   */
  void UpdateTreeLeaf(DMatrix const* p_fmat, HostDeviceVector<float> const& predictions,
-                      ObjFunction const* obj,
+                      ObjFunction const* obj, std::int32_t group_idx,
                      std::int32_t group_idx,
                      std::vector<HostDeviceVector<bst_node_t>> const& node_position,
                      std::vector<std::unique_ptr<RegTree>>* p_trees);
-
+  /**
-  /*! \brief Carry out one iteration of boosting */
+   * @brief Carry out one iteration of boosting.
   */
  void DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
               PredictionCacheEntry* predt, ObjFunction const* obj) override;
@ -289,7 +286,6 @@ class GBTree : public GradientBooster {
  void PredictInstance(const SparsePage::Inst& inst, std::vector<bst_float>* out_preds,
                       uint32_t layer_begin, uint32_t layer_end) override {
    CHECK(configured_);
    std::uint32_t _, tree_end;
    std::tie(_, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end);
    cpu_predictor_->PredictInstance(inst, out_preds, model_, tree_end);
@ -307,7 +303,6 @@ class GBTree : public GradientBooster {
  void PredictContribution(DMatrix* p_fmat, HostDeviceVector<float>* out_contribs,
                           bst_layer_t layer_begin, bst_layer_t layer_end,
                           bool approximate) override {
    CHECK(configured_);
    auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);
    CHECK_EQ(tree_begin, 0) << "Predict contribution supports only iteration end: (0, "
                               "n_iteration), using model slicing instead.";
@ -318,7 +313,6 @@ class GBTree : public GradientBooster {
  void PredictInteractionContributions(DMatrix* p_fmat, HostDeviceVector<float>* out_contribs,
                                       bst_layer_t layer_begin, bst_layer_t layer_end,
                                       bool approximate) override {
    CHECK(configured_);
    auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);
    CHECK_EQ(tree_begin, 0) << "Predict interaction contribution supports only iteration end: (0, "
                               "n_iteration), using model slicing instead.";
@ -332,9 +326,6 @@ class GBTree : public GradientBooster {
  }
 protected:
  // initialize updater before using them
  void InitUpdater(Args const& cfg);
  void BoostNewTrees(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat, int bst_group,
                     std::vector<HostDeviceVector<bst_node_t>>* out_position,
                     std::vector<std::unique_ptr<RegTree>>* ret);
@ -352,10 +343,7 @@ class GBTree : public GradientBooster {
  GBTreeTrainParam tparam_;
  // Tree training parameter
  tree::TrainParam tree_param_;
  // ----training fields----
  bool showed_updater_warning_ {false};
  bool specified_updater_   {false};
  bool configured_ {false};
  // the updaters that can be applied to each of tree
  std::vector<std::unique_ptr<TreeUpdater>> updaters_;
  // Predictors
--- a/tests/cpp/gbm/test_gbtree.cc
+++ b/tests/cpp/gbm/test_gbtree.cc
@ -4,10 +4,12 @@
 #include <gtest/gtest.h>
 #include <xgboost/context.h>
 #include <xgboost/host_device_vector.h>  // for HostDeviceVector
 #include <xgboost/json.h>                // for Json, Object
 #include <xgboost/learner.h>             // for Learner
 #include <limits>    // for numeric_limits
 #include <memory>    // for shared_ptr
 #include <optional>  // for optional
 #include <string>    // for string
 #include "../../../src/data/proxy_dmatrix.h"  // for DMatrixProxy
@ -165,6 +167,115 @@ TEST(GBTree, ChoosePredictor) {
  // data is not pulled back into host
  ASSERT_FALSE(data.HostCanWrite());
 }
 TEST(GBTree, ChooseTreeMethod) {
  bst_row_t n_samples{128};
  bst_feature_t n_features{64};
  auto Xy = RandomDataGenerator{n_samples, n_features, 0.5f}.GenerateDMatrix(true);
  auto with_update = [&](std::optional<std::string> device,
                         std::optional<std::string> tree_method) {
    auto learner = std::unique_ptr<Learner>(Learner::Create({Xy}));
    if (tree_method.has_value()) {
      learner->SetParam("tree_method", tree_method.value());
    }
    if (device.has_value()) {
      learner->SetParam("gpu_id", device.value());
    }
    learner->Configure();
    for (std::int32_t i = 0; i < 3; ++i) {
      learner->UpdateOneIter(0, Xy);
    }
    Json config{Object{}};
    learner->SaveConfig(&config);
    auto updater = config["learner"]["gradient_booster"]["updater"];
    CHECK(!IsA<Null>(updater));
    return updater;
  };
  auto with_boost = [&](std::optional<std::string> device, std::optional<std::string> tree_method) {
    auto learner = std::unique_ptr<Learner>(Learner::Create({Xy}));
    if (tree_method.has_value()) {
      learner->SetParam("tree_method", tree_method.value());
    }
    if (device.has_value()) {
      learner->SetParam("gpu_id", device.value());
    }
    learner->Configure();
    for (std::int32_t i = 0; i < 3; ++i) {
      HostDeviceVector<GradientPair> gpair{GenerateRandomGradients(Xy->Info().num_row_)};
      learner->BoostOneIter(0, Xy, &gpair);
    }
    Json config{Object{}};
    learner->SaveConfig(&config);
    auto updater = config["learner"]["gradient_booster"]["updater"];
    return updater;
  };
  // |        | hist    | gpu_hist | exact | NA  |
  // |--------+---------+----------+-------+-----|
  // | CUDA:0 | GPU     | GPU (w)  | Err   | GPU | # not yet tested
  // | CPU    | CPU     | Err      | CPU   | CPU | # not yet tested
  // |--------+---------+----------+-------+-----|
  // | -1     | CPU     | GPU (w)  | CPU   | CPU |
  // | 0      | GPU     | GPU (w)  | Err   | GPU |
  // | NA     | CPU     | GPU (w)  | CPU   | CPU |
  //
  // - (w): warning
  // - CPU: Run on CPU.
  // - GPU: Run on CUDA.
  // - Err: Not feasible.
  // - NA:  Parameter is not specified.
  // When GPU hist is specified with a CPU context, we should emit an error. However, it's
  // quite difficult to detect whether the CPU context is being used because it's the
  // default or because it's specified by the user.
  std::map<std::pair<std::optional<std::string>, std::optional<std::string>>, std::string>
      expectation{
          // hist
          {{"hist", "-1"}, "grow_quantile_histmaker"},
          {{"hist", "0"}, "grow_gpu_hist"},
          {{"hist", std::nullopt}, "grow_quantile_histmaker"},
          // gpu_hist
          {{"gpu_hist", "-1"}, "grow_gpu_hist"},
          {{"gpu_hist", "0"}, "grow_gpu_hist"},
          {{"gpu_hist", std::nullopt}, "grow_gpu_hist"},
          // exact
          {{"exact", "-1"}, "grow_colmaker,prune"},
          {{"exact", "0"}, "err"},
          {{"exact", std::nullopt}, "grow_colmaker,prune"},
          // NA
          {{std::nullopt, "-1"}, "grow_quantile_histmaker"},
          {{std::nullopt, "0"}, "grow_gpu_hist"},  // default to hist
          {{std::nullopt, std::nullopt}, "grow_quantile_histmaker"},
      };
  auto run_test = [&](auto fn) {
    for (auto const& kv : expectation) {
      auto device = kv.first.second;
      auto tm = kv.first.first;
      if (kv.second == "err") {
        ASSERT_THROW({ fn(device, tm); }, dmlc::Error)
            << " device:" << device.value_or("NA") << " tm:" << tm.value_or("NA");
        continue;
      }
      auto up = fn(device, tm);
      auto ups = get<Array const>(up);
      auto exp_names = common::Split(kv.second, ',');
      ASSERT_EQ(exp_names.size(), ups.size());
      for (std::size_t i = 0; i < exp_names.size(); ++i) {
        ASSERT_EQ(get<String const>(ups[i]["name"]), exp_names[i])
            << " device:" << device.value_or("NA") << " tm:" << tm.value_or("NA");
      }
    }
  };
  run_test(with_update);
  run_test(with_boost);
 }
 #endif  // XGBOOST_USE_CUDA
 // Some other parts of test are in `Tree.JsonIO'.
--- a/tests/python/test_training_continuation.py
+++ b/tests/python/test_training_continuation.py
@ -57,12 +57,12 @@ class TestTrainingContinuation:
        gbdt_02 = xgb.train(xgb_params_01, dtrain_2class,
                            num_boost_round=0)
-        gbdt_02.save_model('xgb_tc.model')
+        gbdt_02.save_model('xgb_tc.json')
        gbdt_02a = xgb.train(xgb_params_01, dtrain_2class,
                             num_boost_round=10, xgb_model=gbdt_02)
        gbdt_02b = xgb.train(xgb_params_01, dtrain_2class,
-                             num_boost_round=10, xgb_model="xgb_tc.model")
+                             num_boost_round=10, xgb_model="xgb_tc.json")
        ntrees_02a = len(gbdt_02a.get_dump())
        ntrees_02b = len(gbdt_02b.get_dump())
        assert ntrees_02a == 10
@ -78,18 +78,18 @@ class TestTrainingContinuation:
        gbdt_03 = xgb.train(xgb_params_01, dtrain_2class,
                            num_boost_round=3)
-        gbdt_03.save_model('xgb_tc.model')
+        gbdt_03.save_model('xgb_tc.json')
        gbdt_03a = xgb.train(xgb_params_01, dtrain_2class,
                             num_boost_round=7, xgb_model=gbdt_03)
        gbdt_03b = xgb.train(xgb_params_01, dtrain_2class,
-                             num_boost_round=7, xgb_model="xgb_tc.model")
+                             num_boost_round=7, xgb_model="xgb_tc.json")
        ntrees_03a = len(gbdt_03a.get_dump())
        ntrees_03b = len(gbdt_03b.get_dump())
        assert ntrees_03a == 10
        assert ntrees_03b == 10
-        os.remove('xgb_tc.model')
+        os.remove('xgb_tc.json')
        res1 = mean_squared_error(y_2class, gbdt_03a.predict(dtrain_2class))
        res2 = mean_squared_error(y_2class, gbdt_03b.predict(dtrain_2class))