Restrict access to cfg_ in gbm. (#4801)

* Restrict access to `cfg_` in gbm.

* Verify having correct updaters.

* Remove `grow_global_histmaker`

This updater is the same as `grow_histmaker`.  The former is not in our
document so we just remove it.
This commit is contained in:
Jiaming Yuan 2019-09-02 00:43:19 -04:00 committed by GitHub
parent 2aed0ae230
commit c0fbeff0ab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 99 additions and 72 deletions

View File

@ -154,6 +154,12 @@ class GradientBooster {
GenericParameter const* gparam, GenericParameter const* gparam,
const std::vector<std::shared_ptr<DMatrix> >& cache_mats, const std::vector<std::shared_ptr<DMatrix> >& cache_mats,
bst_float base_margin); bst_float base_margin);
static void AssertGPUSupport() {
#ifndef XGBOOST_USE_CUDA
LOG(FATAL) << "XGBoost version not compiled with GPU support.";
#endif // XGBOOST_USE_CUDA
}
}; };
/*! /*!

View File

@ -65,7 +65,11 @@ class GBLinear : public GradientBooster {
updater_.reset(LinearUpdater::Create(param_.updater, learner_param_)); updater_.reset(LinearUpdater::Create(param_.updater, learner_param_));
updater_->Configure(cfg); updater_->Configure(cfg);
monitor_.Init("GBLinear"); monitor_.Init("GBLinear");
if (param_.updater == "gpu_coord_descent") {
this->AssertGPUSupport();
} }
}
void Load(dmlc::Stream* fi) override { void Load(dmlc::Stream* fi) override {
model_.Load(fi); model_.Load(fi);
} }

View File

@ -47,24 +47,41 @@ void GBTree::Configure(const Args& cfg) {
if (!cpu_predictor_) { if (!cpu_predictor_) {
cpu_predictor_ = std::unique_ptr<Predictor>( cpu_predictor_ = std::unique_ptr<Predictor>(
Predictor::Create("cpu_predictor", this->learner_param_)); Predictor::Create("cpu_predictor", this->learner_param_));
cpu_predictor_->Configure(cfg, cache_);
} }
#if defined(XGBOOST_USE_CUDA) #if defined(XGBOOST_USE_CUDA)
if (!gpu_predictor_) { if (!gpu_predictor_) {
gpu_predictor_ = std::unique_ptr<Predictor>( gpu_predictor_ = std::unique_ptr<Predictor>(
Predictor::Create("gpu_predictor", this->learner_param_)); Predictor::Create("gpu_predictor", this->learner_param_));
gpu_predictor_->Configure(cfg, cache_);
} }
#endif // defined(XGBOOST_USE_CUDA) #endif // defined(XGBOOST_USE_CUDA)
monitor_.Init("GBTree"); monitor_.Init("GBTree");
if (tparam_.tree_method == TreeMethod::kGPUHist && specified_predictor_ = std::any_of(cfg.cbegin(), cfg.cend(),
std::none_of(cfg.cbegin(), cfg.cend(),
[](std::pair<std::string, std::string> const& arg) { [](std::pair<std::string, std::string> const& arg) {
return arg.first == "predictor"; return arg.first == "predictor";
})) { });
if (!specified_predictor_ && tparam_.tree_method == TreeMethod::kGPUHist) {
tparam_.predictor = "gpu_predictor"; tparam_.predictor = "gpu_predictor";
} }
specified_updater_ = std::any_of(cfg.cbegin(), cfg.cend(),
[](std::pair<std::string, std::string> const& arg) {
return arg.first == "updater";
});
if (specified_updater_) {
LOG(WARNING) << "DANGER AHEAD: You have manually specified `updater` "
"parameter. The `tree_method` parameter will be ignored. "
"Incorrect sequence of updaters will produce undefined "
"behavior. For common uses, we recommend using "
"`tree_method` parameter instead.";
} else {
this->ConfigureUpdaters();
LOG(DEBUG) << "Using updaters: " << tparam_.updater_seq;
}
configured_ = true; configured_ = true;
} }
@ -72,26 +89,23 @@ void GBTree::Configure(const Args& cfg) {
// depends on whether external memory is used and how large is dataset. We can remove the // depends on whether external memory is used and how large is dataset. We can remove the
// dependency on DMatrix once `hist` tree method can handle external memory so that we can // dependency on DMatrix once `hist` tree method can handle external memory so that we can
// make it default. // make it default.
void GBTree::ConfigureWithKnownData(std::map<std::string, std::string> const& cfg, DMatrix* fmat) { void GBTree::ConfigureWithKnownData(Args const& cfg, DMatrix* fmat) {
std::string updater_seq = tparam_.updater_seq; std::string updater_seq = tparam_.updater_seq;
tparam_.InitAllowUnknown(cfg);
this->PerformTreeMethodHeuristic({this->cfg_.begin(), this->cfg_.end()}, fmat); this->PerformTreeMethodHeuristic(fmat);
this->ConfigureUpdaters({this->cfg_.begin(), this->cfg_.end()}); this->ConfigureUpdaters();
LOG(DEBUG) << "Using updaters: " << tparam_.updater_seq;
// initialize the updaters only when needed. // initialize the updaters only when needed.
if (updater_seq != tparam_.updater_seq) { if (updater_seq != tparam_.updater_seq) {
LOG(DEBUG) << "Using updaters: " << tparam_.updater_seq;
this->updaters_.clear(); this->updaters_.clear();
} }
this->InitUpdater();
cpu_predictor_->Configure({cfg.cbegin(), cfg.cend()}, cache_); this->InitUpdater(cfg);
#if defined(XGBOOST_USE_CUDA)
gpu_predictor_->Configure({cfg.cbegin(), cfg.cend()}, cache_);
#endif // defined(XGBOOST_USE_CUDA)
} }
void GBTree::PerformTreeMethodHeuristic(std::map<std::string, std::string> const& cfg, void GBTree::PerformTreeMethodHeuristic(DMatrix* fmat) {
DMatrix* fmat) { if (specified_updater_) {
if (cfg.find("updater") != cfg.cend()) {
// This method is disabled when `updater` parameter is explicitly // This method is disabled when `updater` parameter is explicitly
// set, since only experts are expected to do so. // set, since only experts are expected to do so.
return; return;
@ -124,17 +138,8 @@ void GBTree::PerformTreeMethodHeuristic(std::map<std::string, std::string> const
LOG(DEBUG) << "Using tree method: " << static_cast<int>(tparam_.tree_method); LOG(DEBUG) << "Using tree method: " << static_cast<int>(tparam_.tree_method);
} }
void GBTree::ConfigureUpdaters(const std::map<std::string, std::string>& cfg) { void GBTree::ConfigureUpdaters() {
// `updater` parameter was manually specified // `updater` parameter was manually specified
if (cfg.find("updater") != cfg.cend()) {
LOG(WARNING) << "DANGER AHEAD: You have manually specified `updater` "
"parameter. The `tree_method` parameter will be ignored. "
"Incorrect sequence of updaters will produce undefined "
"behavior. For common uses, we recommend using "
"`tree_method` parameter instead.";
return;
}
/* Choose updaters according to tree_method parameters */ /* Choose updaters according to tree_method parameters */
switch (tparam_.tree_method) { switch (tparam_.tree_method) {
case TreeMethod::kAuto: case TreeMethod::kAuto:
@ -157,7 +162,7 @@ void GBTree::ConfigureUpdaters(const std::map<std::string, std::string>& cfg) {
case TreeMethod::kGPUHist: case TreeMethod::kGPUHist:
this->AssertGPUSupport(); this->AssertGPUSupport();
tparam_.updater_seq = "grow_gpu_hist"; tparam_.updater_seq = "grow_gpu_hist";
if (cfg.find("predictor") == cfg.cend()) { if (!specified_predictor_) {
tparam_.predictor = "gpu_predictor"; tparam_.predictor = "gpu_predictor";
} }
break; break;
@ -172,7 +177,7 @@ void GBTree::DoBoost(DMatrix* p_fmat,
ObjFunction* obj) { ObjFunction* obj) {
std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees; std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
const int ngroup = model_.param.num_output_group; const int ngroup = model_.param.num_output_group;
ConfigureWithKnownData({this->cfg_.cbegin(), this->cfg_.cend()}, p_fmat); ConfigureWithKnownData(this->cfg_, p_fmat);
monitor_.Start("BoostNewTrees"); monitor_.Start("BoostNewTrees");
if (ngroup == 1) { if (ngroup == 1) {
std::vector<std::unique_ptr<RegTree> > ret; std::vector<std::unique_ptr<RegTree> > ret;
@ -199,18 +204,43 @@ void GBTree::DoBoost(DMatrix* p_fmat,
} }
} }
monitor_.Stop("BoostNewTrees"); monitor_.Stop("BoostNewTrees");
monitor_.Start("CommitModel");
this->CommitModel(std::move(new_trees)); this->CommitModel(std::move(new_trees));
monitor_.Stop("CommitModel");
} }
void GBTree::InitUpdater() { void GBTree::InitUpdater(Args const& cfg) {
if (updaters_.size() != 0) return;
std::string tval = tparam_.updater_seq; std::string tval = tparam_.updater_seq;
std::vector<std::string> ups = common::Split(tval, ','); std::vector<std::string> ups = common::Split(tval, ',');
if (updaters_.size() != 0) {
// Assert we have a valid set of updaters.
CHECK_EQ(ups.size(), updaters_.size());
for (auto const& up : updaters_) {
bool contains = std::any_of(ups.cbegin(), ups.cend(),
[&up](std::string const& name) {
return name == up->Name();
});
if (!contains) {
std::stringstream ss;
ss << "Internal Error: " << " mismatched updater sequence.\n";
ss << "Specified updaters: ";
std::for_each(ups.cbegin(), ups.cend(),
[&ss](std::string const& name){
ss << name << " ";
});
ss << "\n" << "Actual updaters: ";
std::for_each(updaters_.cbegin(), updaters_.cend(),
[&ss](std::unique_ptr<TreeUpdater> const& updater){
ss << updater->Name() << " ";
});
LOG(FATAL) << ss.str();
}
}
return;
}
for (const std::string& pstr : ups) { for (const std::string& pstr : ups) {
std::unique_ptr<TreeUpdater> up(TreeUpdater::Create(pstr.c_str(), learner_param_)); std::unique_ptr<TreeUpdater> up(TreeUpdater::Create(pstr.c_str(), learner_param_));
up->Configure(this->cfg_); up->Configure(cfg);
updaters_.push_back(std::move(up)); updaters_.push_back(std::move(up));
} }
} }
@ -245,6 +275,7 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
} }
void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) { void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) {
monitor_.Start("CommitModel");
int num_new_trees = 0; int num_new_trees = 0;
for (int gid = 0; gid < model_.param.num_output_group; ++gid) { for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
num_new_trees += new_trees[gid].size(); num_new_trees += new_trees[gid].size();
@ -252,6 +283,7 @@ void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& ne
} }
CHECK(configured_); CHECK(configured_);
GetPredictor()->UpdatePredictionCache(model_, &updaters_, num_new_trees); GetPredictor()->UpdatePredictionCache(model_, &updaters_, num_new_trees);
monitor_.Stop("CommitModel");
} }

View File

@ -147,20 +147,13 @@ class GBTree : public GradientBooster {
cache_ = cache; cache_ = cache;
} }
static void AssertGPUSupport() {
#ifndef XGBOOST_USE_CUDA
LOG(FATAL) << "XGBoost version not compiled with GPU support.";
#endif // XGBOOST_USE_CUDA
}
void Configure(const Args& cfg) override; void Configure(const Args& cfg) override;
// Revise `tree_method` and `updater` parameters after seeing the training // Revise `tree_method` and `updater` parameters after seeing the training
// data matrix // data matrix, only useful when tree_method is auto.
void PerformTreeMethodHeuristic(std::map<std::string, std::string> const& cfg, void PerformTreeMethodHeuristic(DMatrix* fmat);
DMatrix* fmat);
/*! \brief Map `tree_method` parameter to `updater` parameter */ /*! \brief Map `tree_method` parameter to `updater` parameter */
void ConfigureUpdaters(const std::map<std::string, std::string>& cfg); void ConfigureUpdaters();
void ConfigureWithKnownData(std::map<std::string, std::string> const& cfg, DMatrix* fmat); void ConfigureWithKnownData(Args const& cfg, DMatrix* fmat);
/*! \brief Carry out one iteration of boosting */ /*! \brief Carry out one iteration of boosting */
void DoBoost(DMatrix* p_fmat, void DoBoost(DMatrix* p_fmat,
@ -241,7 +234,7 @@ class GBTree : public GradientBooster {
protected: protected:
// initialize updater before using them // initialize updater before using them
void InitUpdater(); void InitUpdater(Args const& cfg);
// do group specific group // do group specific group
void BoostNewTrees(HostDeviceVector<GradientPair>* gpair, void BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
@ -277,6 +270,8 @@ class GBTree : public GradientBooster {
// training parameter // training parameter
GBTreeTrainParam tparam_; GBTreeTrainParam tparam_;
// ----training fields---- // ----training fields----
bool specified_updater_ {false};
bool specified_predictor_ {false};
bool configured_ {false}; bool configured_ {false};
// configurations for tree // configurations for tree
Args cfg_; Args cfg_;

View File

@ -1460,7 +1460,7 @@ class GPUHistMaker : public TreeUpdater {
} }
char const* Name() const override { char const* Name() const override {
return "gpu_hist"; return "grow_gpu_hist";
} }
private: private:

View File

@ -635,7 +635,7 @@ class CQHistMaker: public HistMaker {
class GlobalProposalHistMaker: public CQHistMaker { class GlobalProposalHistMaker: public CQHistMaker {
public: public:
char const* Name() const override { char const* Name() const override {
return "grow_global_histmaker"; return "grow_histmaker";
} }
protected: protected:
@ -740,12 +740,6 @@ XGBOOST_REGISTER_TREE_UPDATER(LocalHistMaker, "grow_local_histmaker")
return new CQHistMaker(); return new CQHistMaker();
}); });
XGBOOST_REGISTER_TREE_UPDATER(GlobalHistMaker, "grow_global_histmaker")
.describe("Tree constructor that uses approximate global proposal of histogram construction.")
.set_body([]() {
return new GlobalProposalHistMaker();
});
XGBOOST_REGISTER_TREE_UPDATER(HistMaker, "grow_histmaker") XGBOOST_REGISTER_TREE_UPDATER(HistMaker, "grow_histmaker")
.describe("Tree constructor that uses approximate global of histogram construction.") .describe("Tree constructor that uses approximate global of histogram construction.")
.set_body([]() { .set_body([]() {

View File

@ -5,46 +5,42 @@
namespace xgboost { namespace xgboost {
TEST(GBTree, SelectTreeMethod) { TEST(GBTree, SelectTreeMethod) {
using Arg = std::pair<std::string, std::string>;
size_t constexpr kRows = 10;
size_t constexpr kCols = 10; size_t constexpr kCols = 10;
auto p_shared_ptr_dmat = CreateDMatrix(kRows, kCols, 0);
auto p_dmat {(*p_shared_ptr_dmat).get()};
GenericParameter generic_param; GenericParameter generic_param;
generic_param.InitAllowUnknown(std::vector<Arg>{}); generic_param.InitAllowUnknown(Args{});
std::unique_ptr<GradientBooster> p_gbm{ std::unique_ptr<GradientBooster> p_gbm{
GradientBooster::Create("gbtree", &generic_param, {}, 0)}; GradientBooster::Create("gbtree", &generic_param, {}, 0)};
auto& gbtree = dynamic_cast<gbm::GBTree&> (*p_gbm); auto& gbtree = dynamic_cast<gbm::GBTree&> (*p_gbm);
// Test if `tree_method` can be set // Test if `tree_method` can be set
std::string n_feat = std::to_string(kCols); std::string n_feat = std::to_string(kCols);
std::map<std::string, std::string> args {Arg{"tree_method", "approx"}, Arg{"num_feature", n_feat}}; Args args {{"tree_method", "approx"}, {"num_feature", n_feat}};
gbtree.Configure({args.cbegin(), args.cend()}); gbtree.Configure({args.cbegin(), args.cend()});
gbtree.ConfigureWithKnownData(args, p_dmat); gbtree.Configure(args);
auto const& tparam = gbtree.GetTrainParam(); auto const& tparam = gbtree.GetTrainParam();
gbtree.ConfigureWithKnownData({Arg{"tree_method", "approx"}, Arg{"num_feature", n_feat}}, p_dmat); gbtree.Configure({{"tree_method", "approx"}, {"num_feature", n_feat}});
ASSERT_EQ(tparam.updater_seq, "grow_histmaker,prune"); ASSERT_EQ(tparam.updater_seq, "grow_histmaker,prune");
gbtree.ConfigureWithKnownData({Arg("tree_method", "exact"), Arg("num_feature", n_feat)}, p_dmat); gbtree.Configure({{"tree_method", "exact"}, {"num_feature", n_feat}});
ASSERT_EQ(tparam.updater_seq, "grow_colmaker,prune"); ASSERT_EQ(tparam.updater_seq, "grow_colmaker,prune");
gbtree.ConfigureWithKnownData({Arg("tree_method", "hist"), Arg("num_feature", n_feat)}, p_dmat); gbtree.Configure({{"tree_method", "hist"}, {"num_feature", n_feat}});
ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker"); ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker");
ASSERT_EQ(tparam.predictor, "cpu_predictor"); ASSERT_EQ(tparam.predictor, "cpu_predictor");
gbtree.ConfigureWithKnownData({Arg{"booster", "dart"}, Arg{"tree_method", "hist"}, gbtree.Configure({{"booster", "dart"}, {"tree_method", "hist"},
Arg{"num_feature", n_feat}}, p_dmat); {"num_feature", n_feat}});
ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker"); ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker");
ASSERT_EQ(tparam.predictor, "cpu_predictor");
#ifdef XGBOOST_USE_CUDA #ifdef XGBOOST_USE_CUDA
generic_param.InitAllowUnknown(std::vector<Arg>{Arg{"gpu_id", "0"}}); generic_param.InitAllowUnknown(Args{{"gpu_id", "0"}});
gbtree.ConfigureWithKnownData({Arg("tree_method", "gpu_hist"), Arg("num_feature", n_feat)}, gbtree.Configure({{"tree_method", "gpu_hist"}, {"num_feature", n_feat}});
p_dmat);
ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist"); ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
ASSERT_EQ(tparam.predictor, "gpu_predictor"); ASSERT_EQ(tparam.predictor, "gpu_predictor");
gbtree.ConfigureWithKnownData({Arg{"booster", "dart"}, Arg{"tree_method", "gpu_hist"}, gbtree.Configure({{"booster", "dart"}, {"tree_method", "gpu_hist"},
Arg{"num_feature", n_feat}}, p_dmat); {"num_feature", n_feat}});
ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist"); ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
ASSERT_EQ(tparam.predictor, "gpu_predictor");
#endif #endif
delete p_shared_ptr_dmat;
} }
} // namespace xgboost } // namespace xgboost