diff --git a/include/xgboost/gbm.h b/include/xgboost/gbm.h index 99ff7989c..3811d7283 100644 --- a/include/xgboost/gbm.h +++ b/include/xgboost/gbm.h @@ -154,6 +154,12 @@ class GradientBooster { GenericParameter const* gparam, const std::vector >& cache_mats, bst_float base_margin); + + static void AssertGPUSupport() { +#ifndef XGBOOST_USE_CUDA + LOG(FATAL) << "XGBoost version not compiled with GPU support."; +#endif // XGBOOST_USE_CUDA + } }; /*! diff --git a/src/gbm/gblinear.cc b/src/gbm/gblinear.cc index ea371e5b1..c1bf9cc6b 100644 --- a/src/gbm/gblinear.cc +++ b/src/gbm/gblinear.cc @@ -65,7 +65,11 @@ class GBLinear : public GradientBooster { updater_.reset(LinearUpdater::Create(param_.updater, learner_param_)); updater_->Configure(cfg); monitor_.Init("GBLinear"); + if (param_.updater == "gpu_coord_descent") { + this->AssertGPUSupport(); + } } + void Load(dmlc::Stream* fi) override { model_.Load(fi); } diff --git a/src/gbm/gbtree.cc b/src/gbm/gbtree.cc index 8d790e9e4..7a399aed2 100644 --- a/src/gbm/gbtree.cc +++ b/src/gbm/gbtree.cc @@ -47,24 +47,41 @@ void GBTree::Configure(const Args& cfg) { if (!cpu_predictor_) { cpu_predictor_ = std::unique_ptr( Predictor::Create("cpu_predictor", this->learner_param_)); + cpu_predictor_->Configure(cfg, cache_); } #if defined(XGBOOST_USE_CUDA) if (!gpu_predictor_) { gpu_predictor_ = std::unique_ptr( Predictor::Create("gpu_predictor", this->learner_param_)); + gpu_predictor_->Configure(cfg, cache_); } #endif // defined(XGBOOST_USE_CUDA) monitor_.Init("GBTree"); - if (tparam_.tree_method == TreeMethod::kGPUHist && - std::none_of(cfg.cbegin(), cfg.cend(), + specified_predictor_ = std::any_of(cfg.cbegin(), cfg.cend(), [](std::pair const& arg) { return arg.first == "predictor"; - })) { + }); + if (!specified_predictor_ && tparam_.tree_method == TreeMethod::kGPUHist) { tparam_.predictor = "gpu_predictor"; } + specified_updater_ = std::any_of(cfg.cbegin(), cfg.cend(), + [](std::pair const& arg) { + return arg.first == "updater"; + }); + if (specified_updater_) { + LOG(WARNING) << "DANGER AHEAD: You have manually specified `updater` " + "parameter. The `tree_method` parameter will be ignored. " + "Incorrect sequence of updaters will produce undefined " + "behavior. For common uses, we recommend using " + "`tree_method` parameter instead."; + } else { + this->ConfigureUpdaters(); + LOG(DEBUG) << "Using updaters: " << tparam_.updater_seq; + } + configured_ = true; } @@ -72,26 +89,23 @@ void GBTree::Configure(const Args& cfg) { // depends on whether external memory is used and how large is dataset. We can remove the // dependency on DMatrix once `hist` tree method can handle external memory so that we can // make it default. -void GBTree::ConfigureWithKnownData(std::map const& cfg, DMatrix* fmat) { +void GBTree::ConfigureWithKnownData(Args const& cfg, DMatrix* fmat) { std::string updater_seq = tparam_.updater_seq; - tparam_.InitAllowUnknown(cfg); - this->PerformTreeMethodHeuristic({this->cfg_.begin(), this->cfg_.end()}, fmat); - this->ConfigureUpdaters({this->cfg_.begin(), this->cfg_.end()}); - LOG(DEBUG) << "Using updaters: " << tparam_.updater_seq; + + this->PerformTreeMethodHeuristic(fmat); + this->ConfigureUpdaters(); + // initialize the updaters only when needed. if (updater_seq != tparam_.updater_seq) { + LOG(DEBUG) << "Using updaters: " << tparam_.updater_seq; this->updaters_.clear(); } - this->InitUpdater(); - cpu_predictor_->Configure({cfg.cbegin(), cfg.cend()}, cache_); -#if defined(XGBOOST_USE_CUDA) - gpu_predictor_->Configure({cfg.cbegin(), cfg.cend()}, cache_); -#endif // defined(XGBOOST_USE_CUDA) + + this->InitUpdater(cfg); } -void GBTree::PerformTreeMethodHeuristic(std::map const& cfg, - DMatrix* fmat) { - if (cfg.find("updater") != cfg.cend()) { +void GBTree::PerformTreeMethodHeuristic(DMatrix* fmat) { + if (specified_updater_) { // This method is disabled when `updater` parameter is explicitly // set, since only experts are expected to do so. return; @@ -124,17 +138,8 @@ void GBTree::PerformTreeMethodHeuristic(std::map const LOG(DEBUG) << "Using tree method: " << static_cast(tparam_.tree_method); } -void GBTree::ConfigureUpdaters(const std::map& cfg) { +void GBTree::ConfigureUpdaters() { // `updater` parameter was manually specified - if (cfg.find("updater") != cfg.cend()) { - LOG(WARNING) << "DANGER AHEAD: You have manually specified `updater` " - "parameter. The `tree_method` parameter will be ignored. " - "Incorrect sequence of updaters will produce undefined " - "behavior. For common uses, we recommend using " - "`tree_method` parameter instead."; - return; - } - /* Choose updaters according to tree_method parameters */ switch (tparam_.tree_method) { case TreeMethod::kAuto: @@ -157,7 +162,7 @@ void GBTree::ConfigureUpdaters(const std::map& cfg) { case TreeMethod::kGPUHist: this->AssertGPUSupport(); tparam_.updater_seq = "grow_gpu_hist"; - if (cfg.find("predictor") == cfg.cend()) { + if (!specified_predictor_) { tparam_.predictor = "gpu_predictor"; } break; @@ -172,7 +177,7 @@ void GBTree::DoBoost(DMatrix* p_fmat, ObjFunction* obj) { std::vector > > new_trees; const int ngroup = model_.param.num_output_group; - ConfigureWithKnownData({this->cfg_.cbegin(), this->cfg_.cend()}, p_fmat); + ConfigureWithKnownData(this->cfg_, p_fmat); monitor_.Start("BoostNewTrees"); if (ngroup == 1) { std::vector > ret; @@ -199,18 +204,43 @@ void GBTree::DoBoost(DMatrix* p_fmat, } } monitor_.Stop("BoostNewTrees"); - monitor_.Start("CommitModel"); this->CommitModel(std::move(new_trees)); - monitor_.Stop("CommitModel"); } -void GBTree::InitUpdater() { - if (updaters_.size() != 0) return; +void GBTree::InitUpdater(Args const& cfg) { std::string tval = tparam_.updater_seq; std::vector ups = common::Split(tval, ','); + + if (updaters_.size() != 0) { + // Assert we have a valid set of updaters. + CHECK_EQ(ups.size(), updaters_.size()); + for (auto const& up : updaters_) { + bool contains = std::any_of(ups.cbegin(), ups.cend(), + [&up](std::string const& name) { + return name == up->Name(); + }); + if (!contains) { + std::stringstream ss; + ss << "Internal Error: " << " mismatched updater sequence.\n"; + ss << "Specified updaters: "; + std::for_each(ups.cbegin(), ups.cend(), + [&ss](std::string const& name){ + ss << name << " "; + }); + ss << "\n" << "Actual updaters: "; + std::for_each(updaters_.cbegin(), updaters_.cend(), + [&ss](std::unique_ptr const& updater){ + ss << updater->Name() << " "; + }); + LOG(FATAL) << ss.str(); + } + } + return; + } + for (const std::string& pstr : ups) { std::unique_ptr up(TreeUpdater::Create(pstr.c_str(), learner_param_)); - up->Configure(this->cfg_); + up->Configure(cfg); updaters_.push_back(std::move(up)); } } @@ -245,6 +275,7 @@ void GBTree::BoostNewTrees(HostDeviceVector* gpair, } void GBTree::CommitModel(std::vector>>&& new_trees) { + monitor_.Start("CommitModel"); int num_new_trees = 0; for (int gid = 0; gid < model_.param.num_output_group; ++gid) { num_new_trees += new_trees[gid].size(); @@ -252,6 +283,7 @@ void GBTree::CommitModel(std::vector>>&& ne } CHECK(configured_); GetPredictor()->UpdatePredictionCache(model_, &updaters_, num_new_trees); + monitor_.Stop("CommitModel"); } diff --git a/src/gbm/gbtree.h b/src/gbm/gbtree.h index fa6ede83f..63c5263f7 100644 --- a/src/gbm/gbtree.h +++ b/src/gbm/gbtree.h @@ -147,20 +147,13 @@ class GBTree : public GradientBooster { cache_ = cache; } - static void AssertGPUSupport() { -#ifndef XGBOOST_USE_CUDA - LOG(FATAL) << "XGBoost version not compiled with GPU support."; -#endif // XGBOOST_USE_CUDA - } - void Configure(const Args& cfg) override; // Revise `tree_method` and `updater` parameters after seeing the training - // data matrix - void PerformTreeMethodHeuristic(std::map const& cfg, - DMatrix* fmat); + // data matrix, only useful when tree_method is auto. + void PerformTreeMethodHeuristic(DMatrix* fmat); /*! \brief Map `tree_method` parameter to `updater` parameter */ - void ConfigureUpdaters(const std::map& cfg); - void ConfigureWithKnownData(std::map const& cfg, DMatrix* fmat); + void ConfigureUpdaters(); + void ConfigureWithKnownData(Args const& cfg, DMatrix* fmat); /*! \brief Carry out one iteration of boosting */ void DoBoost(DMatrix* p_fmat, @@ -241,7 +234,7 @@ class GBTree : public GradientBooster { protected: // initialize updater before using them - void InitUpdater(); + void InitUpdater(Args const& cfg); // do group specific group void BoostNewTrees(HostDeviceVector* gpair, @@ -277,6 +270,8 @@ class GBTree : public GradientBooster { // training parameter GBTreeTrainParam tparam_; // ----training fields---- + bool specified_updater_ {false}; + bool specified_predictor_ {false}; bool configured_ {false}; // configurations for tree Args cfg_; diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu index 83525d64b..94ad15e51 100644 --- a/src/tree/updater_gpu_hist.cu +++ b/src/tree/updater_gpu_hist.cu @@ -1460,7 +1460,7 @@ class GPUHistMaker : public TreeUpdater { } char const* Name() const override { - return "gpu_hist"; + return "grow_gpu_hist"; } private: diff --git a/src/tree/updater_histmaker.cc b/src/tree/updater_histmaker.cc index cce7bab11..4b6223454 100644 --- a/src/tree/updater_histmaker.cc +++ b/src/tree/updater_histmaker.cc @@ -635,7 +635,7 @@ class CQHistMaker: public HistMaker { class GlobalProposalHistMaker: public CQHistMaker { public: char const* Name() const override { - return "grow_global_histmaker"; + return "grow_histmaker"; } protected: @@ -740,12 +740,6 @@ XGBOOST_REGISTER_TREE_UPDATER(LocalHistMaker, "grow_local_histmaker") return new CQHistMaker(); }); -XGBOOST_REGISTER_TREE_UPDATER(GlobalHistMaker, "grow_global_histmaker") -.describe("Tree constructor that uses approximate global proposal of histogram construction.") -.set_body([]() { - return new GlobalProposalHistMaker(); - }); - XGBOOST_REGISTER_TREE_UPDATER(HistMaker, "grow_histmaker") .describe("Tree constructor that uses approximate global of histogram construction.") .set_body([]() { diff --git a/tests/cpp/gbm/test_gbtree.cc b/tests/cpp/gbm/test_gbtree.cc index 0ae2ba856..fd48e9c77 100644 --- a/tests/cpp/gbm/test_gbtree.cc +++ b/tests/cpp/gbm/test_gbtree.cc @@ -5,46 +5,42 @@ namespace xgboost { TEST(GBTree, SelectTreeMethod) { - using Arg = std::pair; - size_t constexpr kRows = 10; size_t constexpr kCols = 10; - auto p_shared_ptr_dmat = CreateDMatrix(kRows, kCols, 0); - auto p_dmat {(*p_shared_ptr_dmat).get()}; GenericParameter generic_param; - generic_param.InitAllowUnknown(std::vector{}); + generic_param.InitAllowUnknown(Args{}); std::unique_ptr p_gbm{ GradientBooster::Create("gbtree", &generic_param, {}, 0)}; auto& gbtree = dynamic_cast (*p_gbm); // Test if `tree_method` can be set std::string n_feat = std::to_string(kCols); - std::map args {Arg{"tree_method", "approx"}, Arg{"num_feature", n_feat}}; + Args args {{"tree_method", "approx"}, {"num_feature", n_feat}}; gbtree.Configure({args.cbegin(), args.cend()}); - gbtree.ConfigureWithKnownData(args, p_dmat); + gbtree.Configure(args); auto const& tparam = gbtree.GetTrainParam(); - gbtree.ConfigureWithKnownData({Arg{"tree_method", "approx"}, Arg{"num_feature", n_feat}}, p_dmat); + gbtree.Configure({{"tree_method", "approx"}, {"num_feature", n_feat}}); ASSERT_EQ(tparam.updater_seq, "grow_histmaker,prune"); - gbtree.ConfigureWithKnownData({Arg("tree_method", "exact"), Arg("num_feature", n_feat)}, p_dmat); + gbtree.Configure({{"tree_method", "exact"}, {"num_feature", n_feat}}); ASSERT_EQ(tparam.updater_seq, "grow_colmaker,prune"); - gbtree.ConfigureWithKnownData({Arg("tree_method", "hist"), Arg("num_feature", n_feat)}, p_dmat); + gbtree.Configure({{"tree_method", "hist"}, {"num_feature", n_feat}}); ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker"); ASSERT_EQ(tparam.predictor, "cpu_predictor"); - gbtree.ConfigureWithKnownData({Arg{"booster", "dart"}, Arg{"tree_method", "hist"}, - Arg{"num_feature", n_feat}}, p_dmat); + gbtree.Configure({{"booster", "dart"}, {"tree_method", "hist"}, + {"num_feature", n_feat}}); ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker"); + ASSERT_EQ(tparam.predictor, "cpu_predictor"); + #ifdef XGBOOST_USE_CUDA - generic_param.InitAllowUnknown(std::vector{Arg{"gpu_id", "0"}}); - gbtree.ConfigureWithKnownData({Arg("tree_method", "gpu_hist"), Arg("num_feature", n_feat)}, - p_dmat); + generic_param.InitAllowUnknown(Args{{"gpu_id", "0"}}); + gbtree.Configure({{"tree_method", "gpu_hist"}, {"num_feature", n_feat}}); ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist"); ASSERT_EQ(tparam.predictor, "gpu_predictor"); - gbtree.ConfigureWithKnownData({Arg{"booster", "dart"}, Arg{"tree_method", "gpu_hist"}, - Arg{"num_feature", n_feat}}, p_dmat); + gbtree.Configure({{"booster", "dart"}, {"tree_method", "gpu_hist"}, + {"num_feature", n_feat}}); ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist"); + ASSERT_EQ(tparam.predictor, "gpu_predictor"); #endif - - delete p_shared_ptr_dmat; } } // namespace xgboost