Restrict access to cfg_ in gbm. (#4801)

* Restrict access to `cfg_` in gbm.

* Verify having correct updaters.

* Remove `grow_global_histmaker`

This updater is the same as `grow_histmaker`.  The former is not in our
document so we just remove it.
This commit is contained in:
Jiaming Yuan
2019-09-02 00:43:19 -04:00
committed by GitHub
parent 2aed0ae230
commit c0fbeff0ab
7 changed files with 99 additions and 72 deletions

View File

@@ -65,7 +65,11 @@ class GBLinear : public GradientBooster {
updater_.reset(LinearUpdater::Create(param_.updater, learner_param_));
updater_->Configure(cfg);
monitor_.Init("GBLinear");
if (param_.updater == "gpu_coord_descent") {
this->AssertGPUSupport();
}
}
void Load(dmlc::Stream* fi) override {
model_.Load(fi);
}

View File

@@ -47,24 +47,41 @@ void GBTree::Configure(const Args& cfg) {
if (!cpu_predictor_) {
cpu_predictor_ = std::unique_ptr<Predictor>(
Predictor::Create("cpu_predictor", this->learner_param_));
cpu_predictor_->Configure(cfg, cache_);
}
#if defined(XGBOOST_USE_CUDA)
if (!gpu_predictor_) {
gpu_predictor_ = std::unique_ptr<Predictor>(
Predictor::Create("gpu_predictor", this->learner_param_));
gpu_predictor_->Configure(cfg, cache_);
}
#endif // defined(XGBOOST_USE_CUDA)
monitor_.Init("GBTree");
if (tparam_.tree_method == TreeMethod::kGPUHist &&
std::none_of(cfg.cbegin(), cfg.cend(),
specified_predictor_ = std::any_of(cfg.cbegin(), cfg.cend(),
[](std::pair<std::string, std::string> const& arg) {
return arg.first == "predictor";
})) {
});
if (!specified_predictor_ && tparam_.tree_method == TreeMethod::kGPUHist) {
tparam_.predictor = "gpu_predictor";
}
specified_updater_ = std::any_of(cfg.cbegin(), cfg.cend(),
[](std::pair<std::string, std::string> const& arg) {
return arg.first == "updater";
});
if (specified_updater_) {
LOG(WARNING) << "DANGER AHEAD: You have manually specified `updater` "
"parameter. The `tree_method` parameter will be ignored. "
"Incorrect sequence of updaters will produce undefined "
"behavior. For common uses, we recommend using "
"`tree_method` parameter instead.";
} else {
this->ConfigureUpdaters();
LOG(DEBUG) << "Using updaters: " << tparam_.updater_seq;
}
configured_ = true;
}
@@ -72,26 +89,23 @@ void GBTree::Configure(const Args& cfg) {
// depends on whether external memory is used and how large is dataset. We can remove the
// dependency on DMatrix once `hist` tree method can handle external memory so that we can
// make it default.
void GBTree::ConfigureWithKnownData(std::map<std::string, std::string> const& cfg, DMatrix* fmat) {
void GBTree::ConfigureWithKnownData(Args const& cfg, DMatrix* fmat) {
std::string updater_seq = tparam_.updater_seq;
tparam_.InitAllowUnknown(cfg);
this->PerformTreeMethodHeuristic({this->cfg_.begin(), this->cfg_.end()}, fmat);
this->ConfigureUpdaters({this->cfg_.begin(), this->cfg_.end()});
LOG(DEBUG) << "Using updaters: " << tparam_.updater_seq;
this->PerformTreeMethodHeuristic(fmat);
this->ConfigureUpdaters();
// initialize the updaters only when needed.
if (updater_seq != tparam_.updater_seq) {
LOG(DEBUG) << "Using updaters: " << tparam_.updater_seq;
this->updaters_.clear();
}
this->InitUpdater();
cpu_predictor_->Configure({cfg.cbegin(), cfg.cend()}, cache_);
#if defined(XGBOOST_USE_CUDA)
gpu_predictor_->Configure({cfg.cbegin(), cfg.cend()}, cache_);
#endif // defined(XGBOOST_USE_CUDA)
this->InitUpdater(cfg);
}
void GBTree::PerformTreeMethodHeuristic(std::map<std::string, std::string> const& cfg,
DMatrix* fmat) {
if (cfg.find("updater") != cfg.cend()) {
void GBTree::PerformTreeMethodHeuristic(DMatrix* fmat) {
if (specified_updater_) {
// This method is disabled when `updater` parameter is explicitly
// set, since only experts are expected to do so.
return;
@@ -124,17 +138,8 @@ void GBTree::PerformTreeMethodHeuristic(std::map<std::string, std::string> const
LOG(DEBUG) << "Using tree method: " << static_cast<int>(tparam_.tree_method);
}
void GBTree::ConfigureUpdaters(const std::map<std::string, std::string>& cfg) {
void GBTree::ConfigureUpdaters() {
// `updater` parameter was manually specified
if (cfg.find("updater") != cfg.cend()) {
LOG(WARNING) << "DANGER AHEAD: You have manually specified `updater` "
"parameter. The `tree_method` parameter will be ignored. "
"Incorrect sequence of updaters will produce undefined "
"behavior. For common uses, we recommend using "
"`tree_method` parameter instead.";
return;
}
/* Choose updaters according to tree_method parameters */
switch (tparam_.tree_method) {
case TreeMethod::kAuto:
@@ -157,7 +162,7 @@ void GBTree::ConfigureUpdaters(const std::map<std::string, std::string>& cfg) {
case TreeMethod::kGPUHist:
this->AssertGPUSupport();
tparam_.updater_seq = "grow_gpu_hist";
if (cfg.find("predictor") == cfg.cend()) {
if (!specified_predictor_) {
tparam_.predictor = "gpu_predictor";
}
break;
@@ -172,7 +177,7 @@ void GBTree::DoBoost(DMatrix* p_fmat,
ObjFunction* obj) {
std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
const int ngroup = model_.param.num_output_group;
ConfigureWithKnownData({this->cfg_.cbegin(), this->cfg_.cend()}, p_fmat);
ConfigureWithKnownData(this->cfg_, p_fmat);
monitor_.Start("BoostNewTrees");
if (ngroup == 1) {
std::vector<std::unique_ptr<RegTree> > ret;
@@ -199,18 +204,43 @@ void GBTree::DoBoost(DMatrix* p_fmat,
}
}
monitor_.Stop("BoostNewTrees");
monitor_.Start("CommitModel");
this->CommitModel(std::move(new_trees));
monitor_.Stop("CommitModel");
}
void GBTree::InitUpdater() {
if (updaters_.size() != 0) return;
void GBTree::InitUpdater(Args const& cfg) {
std::string tval = tparam_.updater_seq;
std::vector<std::string> ups = common::Split(tval, ',');
if (updaters_.size() != 0) {
// Assert we have a valid set of updaters.
CHECK_EQ(ups.size(), updaters_.size());
for (auto const& up : updaters_) {
bool contains = std::any_of(ups.cbegin(), ups.cend(),
[&up](std::string const& name) {
return name == up->Name();
});
if (!contains) {
std::stringstream ss;
ss << "Internal Error: " << " mismatched updater sequence.\n";
ss << "Specified updaters: ";
std::for_each(ups.cbegin(), ups.cend(),
[&ss](std::string const& name){
ss << name << " ";
});
ss << "\n" << "Actual updaters: ";
std::for_each(updaters_.cbegin(), updaters_.cend(),
[&ss](std::unique_ptr<TreeUpdater> const& updater){
ss << updater->Name() << " ";
});
LOG(FATAL) << ss.str();
}
}
return;
}
for (const std::string& pstr : ups) {
std::unique_ptr<TreeUpdater> up(TreeUpdater::Create(pstr.c_str(), learner_param_));
up->Configure(this->cfg_);
up->Configure(cfg);
updaters_.push_back(std::move(up));
}
}
@@ -245,6 +275,7 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
}
void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) {
monitor_.Start("CommitModel");
int num_new_trees = 0;
for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
num_new_trees += new_trees[gid].size();
@@ -252,6 +283,7 @@ void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& ne
}
CHECK(configured_);
GetPredictor()->UpdatePredictionCache(model_, &updaters_, num_new_trees);
monitor_.Stop("CommitModel");
}

View File

@@ -147,20 +147,13 @@ class GBTree : public GradientBooster {
cache_ = cache;
}
static void AssertGPUSupport() {
#ifndef XGBOOST_USE_CUDA
LOG(FATAL) << "XGBoost version not compiled with GPU support.";
#endif // XGBOOST_USE_CUDA
}
void Configure(const Args& cfg) override;
// Revise `tree_method` and `updater` parameters after seeing the training
// data matrix
void PerformTreeMethodHeuristic(std::map<std::string, std::string> const& cfg,
DMatrix* fmat);
// data matrix, only useful when tree_method is auto.
void PerformTreeMethodHeuristic(DMatrix* fmat);
/*! \brief Map `tree_method` parameter to `updater` parameter */
void ConfigureUpdaters(const std::map<std::string, std::string>& cfg);
void ConfigureWithKnownData(std::map<std::string, std::string> const& cfg, DMatrix* fmat);
void ConfigureUpdaters();
void ConfigureWithKnownData(Args const& cfg, DMatrix* fmat);
/*! \brief Carry out one iteration of boosting */
void DoBoost(DMatrix* p_fmat,
@@ -241,7 +234,7 @@ class GBTree : public GradientBooster {
protected:
// initialize updater before using them
void InitUpdater();
void InitUpdater(Args const& cfg);
// do group specific group
void BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
@@ -277,6 +270,8 @@ class GBTree : public GradientBooster {
// training parameter
GBTreeTrainParam tparam_;
// ----training fields----
bool specified_updater_ {false};
bool specified_predictor_ {false};
bool configured_ {false};
// configurations for tree
Args cfg_;

View File

@@ -1460,7 +1460,7 @@ class GPUHistMaker : public TreeUpdater {
}
char const* Name() const override {
return "gpu_hist";
return "grow_gpu_hist";
}
private:

View File

@@ -635,7 +635,7 @@ class CQHistMaker: public HistMaker {
class GlobalProposalHistMaker: public CQHistMaker {
public:
char const* Name() const override {
return "grow_global_histmaker";
return "grow_histmaker";
}
protected:
@@ -740,12 +740,6 @@ XGBOOST_REGISTER_TREE_UPDATER(LocalHistMaker, "grow_local_histmaker")
return new CQHistMaker();
});
XGBOOST_REGISTER_TREE_UPDATER(GlobalHistMaker, "grow_global_histmaker")
.describe("Tree constructor that uses approximate global proposal of histogram construction.")
.set_body([]() {
return new GlobalProposalHistMaker();
});
XGBOOST_REGISTER_TREE_UPDATER(HistMaker, "grow_histmaker")
.describe("Tree constructor that uses approximate global of histogram construction.")
.set_body([]() {