Restrict access to cfg_ in gbm. (#4801)
* Restrict access to `cfg_` in gbm. * Verify having correct updaters. * Remove `grow_global_histmaker` This updater is the same as `grow_histmaker`. The former is not in our document so we just remove it.
This commit is contained in:
parent
2aed0ae230
commit
c0fbeff0ab
@ -154,6 +154,12 @@ class GradientBooster {
|
||||
GenericParameter const* gparam,
|
||||
const std::vector<std::shared_ptr<DMatrix> >& cache_mats,
|
||||
bst_float base_margin);
|
||||
|
||||
static void AssertGPUSupport() {
|
||||
#ifndef XGBOOST_USE_CUDA
|
||||
LOG(FATAL) << "XGBoost version not compiled with GPU support.";
|
||||
#endif // XGBOOST_USE_CUDA
|
||||
}
|
||||
};
|
||||
|
||||
/*!
|
||||
|
||||
@ -65,7 +65,11 @@ class GBLinear : public GradientBooster {
|
||||
updater_.reset(LinearUpdater::Create(param_.updater, learner_param_));
|
||||
updater_->Configure(cfg);
|
||||
monitor_.Init("GBLinear");
|
||||
if (param_.updater == "gpu_coord_descent") {
|
||||
this->AssertGPUSupport();
|
||||
}
|
||||
}
|
||||
|
||||
void Load(dmlc::Stream* fi) override {
|
||||
model_.Load(fi);
|
||||
}
|
||||
|
||||
@ -47,24 +47,41 @@ void GBTree::Configure(const Args& cfg) {
|
||||
if (!cpu_predictor_) {
|
||||
cpu_predictor_ = std::unique_ptr<Predictor>(
|
||||
Predictor::Create("cpu_predictor", this->learner_param_));
|
||||
cpu_predictor_->Configure(cfg, cache_);
|
||||
}
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
if (!gpu_predictor_) {
|
||||
gpu_predictor_ = std::unique_ptr<Predictor>(
|
||||
Predictor::Create("gpu_predictor", this->learner_param_));
|
||||
gpu_predictor_->Configure(cfg, cache_);
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
monitor_.Init("GBTree");
|
||||
|
||||
if (tparam_.tree_method == TreeMethod::kGPUHist &&
|
||||
std::none_of(cfg.cbegin(), cfg.cend(),
|
||||
specified_predictor_ = std::any_of(cfg.cbegin(), cfg.cend(),
|
||||
[](std::pair<std::string, std::string> const& arg) {
|
||||
return arg.first == "predictor";
|
||||
})) {
|
||||
});
|
||||
if (!specified_predictor_ && tparam_.tree_method == TreeMethod::kGPUHist) {
|
||||
tparam_.predictor = "gpu_predictor";
|
||||
}
|
||||
|
||||
specified_updater_ = std::any_of(cfg.cbegin(), cfg.cend(),
|
||||
[](std::pair<std::string, std::string> const& arg) {
|
||||
return arg.first == "updater";
|
||||
});
|
||||
if (specified_updater_) {
|
||||
LOG(WARNING) << "DANGER AHEAD: You have manually specified `updater` "
|
||||
"parameter. The `tree_method` parameter will be ignored. "
|
||||
"Incorrect sequence of updaters will produce undefined "
|
||||
"behavior. For common uses, we recommend using "
|
||||
"`tree_method` parameter instead.";
|
||||
} else {
|
||||
this->ConfigureUpdaters();
|
||||
LOG(DEBUG) << "Using updaters: " << tparam_.updater_seq;
|
||||
}
|
||||
|
||||
configured_ = true;
|
||||
}
|
||||
|
||||
@ -72,26 +89,23 @@ void GBTree::Configure(const Args& cfg) {
|
||||
// depends on whether external memory is used and how large is dataset. We can remove the
|
||||
// dependency on DMatrix once `hist` tree method can handle external memory so that we can
|
||||
// make it default.
|
||||
void GBTree::ConfigureWithKnownData(std::map<std::string, std::string> const& cfg, DMatrix* fmat) {
|
||||
void GBTree::ConfigureWithKnownData(Args const& cfg, DMatrix* fmat) {
|
||||
std::string updater_seq = tparam_.updater_seq;
|
||||
tparam_.InitAllowUnknown(cfg);
|
||||
this->PerformTreeMethodHeuristic({this->cfg_.begin(), this->cfg_.end()}, fmat);
|
||||
this->ConfigureUpdaters({this->cfg_.begin(), this->cfg_.end()});
|
||||
LOG(DEBUG) << "Using updaters: " << tparam_.updater_seq;
|
||||
|
||||
this->PerformTreeMethodHeuristic(fmat);
|
||||
this->ConfigureUpdaters();
|
||||
|
||||
// initialize the updaters only when needed.
|
||||
if (updater_seq != tparam_.updater_seq) {
|
||||
LOG(DEBUG) << "Using updaters: " << tparam_.updater_seq;
|
||||
this->updaters_.clear();
|
||||
}
|
||||
this->InitUpdater();
|
||||
cpu_predictor_->Configure({cfg.cbegin(), cfg.cend()}, cache_);
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
gpu_predictor_->Configure({cfg.cbegin(), cfg.cend()}, cache_);
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
this->InitUpdater(cfg);
|
||||
}
|
||||
|
||||
void GBTree::PerformTreeMethodHeuristic(std::map<std::string, std::string> const& cfg,
|
||||
DMatrix* fmat) {
|
||||
if (cfg.find("updater") != cfg.cend()) {
|
||||
void GBTree::PerformTreeMethodHeuristic(DMatrix* fmat) {
|
||||
if (specified_updater_) {
|
||||
// This method is disabled when `updater` parameter is explicitly
|
||||
// set, since only experts are expected to do so.
|
||||
return;
|
||||
@ -124,17 +138,8 @@ void GBTree::PerformTreeMethodHeuristic(std::map<std::string, std::string> const
|
||||
LOG(DEBUG) << "Using tree method: " << static_cast<int>(tparam_.tree_method);
|
||||
}
|
||||
|
||||
void GBTree::ConfigureUpdaters(const std::map<std::string, std::string>& cfg) {
|
||||
void GBTree::ConfigureUpdaters() {
|
||||
// `updater` parameter was manually specified
|
||||
if (cfg.find("updater") != cfg.cend()) {
|
||||
LOG(WARNING) << "DANGER AHEAD: You have manually specified `updater` "
|
||||
"parameter. The `tree_method` parameter will be ignored. "
|
||||
"Incorrect sequence of updaters will produce undefined "
|
||||
"behavior. For common uses, we recommend using "
|
||||
"`tree_method` parameter instead.";
|
||||
return;
|
||||
}
|
||||
|
||||
/* Choose updaters according to tree_method parameters */
|
||||
switch (tparam_.tree_method) {
|
||||
case TreeMethod::kAuto:
|
||||
@ -157,7 +162,7 @@ void GBTree::ConfigureUpdaters(const std::map<std::string, std::string>& cfg) {
|
||||
case TreeMethod::kGPUHist:
|
||||
this->AssertGPUSupport();
|
||||
tparam_.updater_seq = "grow_gpu_hist";
|
||||
if (cfg.find("predictor") == cfg.cend()) {
|
||||
if (!specified_predictor_) {
|
||||
tparam_.predictor = "gpu_predictor";
|
||||
}
|
||||
break;
|
||||
@ -172,7 +177,7 @@ void GBTree::DoBoost(DMatrix* p_fmat,
|
||||
ObjFunction* obj) {
|
||||
std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
|
||||
const int ngroup = model_.param.num_output_group;
|
||||
ConfigureWithKnownData({this->cfg_.cbegin(), this->cfg_.cend()}, p_fmat);
|
||||
ConfigureWithKnownData(this->cfg_, p_fmat);
|
||||
monitor_.Start("BoostNewTrees");
|
||||
if (ngroup == 1) {
|
||||
std::vector<std::unique_ptr<RegTree> > ret;
|
||||
@ -199,18 +204,43 @@ void GBTree::DoBoost(DMatrix* p_fmat,
|
||||
}
|
||||
}
|
||||
monitor_.Stop("BoostNewTrees");
|
||||
monitor_.Start("CommitModel");
|
||||
this->CommitModel(std::move(new_trees));
|
||||
monitor_.Stop("CommitModel");
|
||||
}
|
||||
|
||||
void GBTree::InitUpdater() {
|
||||
if (updaters_.size() != 0) return;
|
||||
void GBTree::InitUpdater(Args const& cfg) {
|
||||
std::string tval = tparam_.updater_seq;
|
||||
std::vector<std::string> ups = common::Split(tval, ',');
|
||||
|
||||
if (updaters_.size() != 0) {
|
||||
// Assert we have a valid set of updaters.
|
||||
CHECK_EQ(ups.size(), updaters_.size());
|
||||
for (auto const& up : updaters_) {
|
||||
bool contains = std::any_of(ups.cbegin(), ups.cend(),
|
||||
[&up](std::string const& name) {
|
||||
return name == up->Name();
|
||||
});
|
||||
if (!contains) {
|
||||
std::stringstream ss;
|
||||
ss << "Internal Error: " << " mismatched updater sequence.\n";
|
||||
ss << "Specified updaters: ";
|
||||
std::for_each(ups.cbegin(), ups.cend(),
|
||||
[&ss](std::string const& name){
|
||||
ss << name << " ";
|
||||
});
|
||||
ss << "\n" << "Actual updaters: ";
|
||||
std::for_each(updaters_.cbegin(), updaters_.cend(),
|
||||
[&ss](std::unique_ptr<TreeUpdater> const& updater){
|
||||
ss << updater->Name() << " ";
|
||||
});
|
||||
LOG(FATAL) << ss.str();
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
for (const std::string& pstr : ups) {
|
||||
std::unique_ptr<TreeUpdater> up(TreeUpdater::Create(pstr.c_str(), learner_param_));
|
||||
up->Configure(this->cfg_);
|
||||
up->Configure(cfg);
|
||||
updaters_.push_back(std::move(up));
|
||||
}
|
||||
}
|
||||
@ -245,6 +275,7 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
|
||||
}
|
||||
|
||||
void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) {
|
||||
monitor_.Start("CommitModel");
|
||||
int num_new_trees = 0;
|
||||
for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
|
||||
num_new_trees += new_trees[gid].size();
|
||||
@ -252,6 +283,7 @@ void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& ne
|
||||
}
|
||||
CHECK(configured_);
|
||||
GetPredictor()->UpdatePredictionCache(model_, &updaters_, num_new_trees);
|
||||
monitor_.Stop("CommitModel");
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -147,20 +147,13 @@ class GBTree : public GradientBooster {
|
||||
cache_ = cache;
|
||||
}
|
||||
|
||||
static void AssertGPUSupport() {
|
||||
#ifndef XGBOOST_USE_CUDA
|
||||
LOG(FATAL) << "XGBoost version not compiled with GPU support.";
|
||||
#endif // XGBOOST_USE_CUDA
|
||||
}
|
||||
|
||||
void Configure(const Args& cfg) override;
|
||||
// Revise `tree_method` and `updater` parameters after seeing the training
|
||||
// data matrix
|
||||
void PerformTreeMethodHeuristic(std::map<std::string, std::string> const& cfg,
|
||||
DMatrix* fmat);
|
||||
// data matrix, only useful when tree_method is auto.
|
||||
void PerformTreeMethodHeuristic(DMatrix* fmat);
|
||||
/*! \brief Map `tree_method` parameter to `updater` parameter */
|
||||
void ConfigureUpdaters(const std::map<std::string, std::string>& cfg);
|
||||
void ConfigureWithKnownData(std::map<std::string, std::string> const& cfg, DMatrix* fmat);
|
||||
void ConfigureUpdaters();
|
||||
void ConfigureWithKnownData(Args const& cfg, DMatrix* fmat);
|
||||
|
||||
/*! \brief Carry out one iteration of boosting */
|
||||
void DoBoost(DMatrix* p_fmat,
|
||||
@ -241,7 +234,7 @@ class GBTree : public GradientBooster {
|
||||
|
||||
protected:
|
||||
// initialize updater before using them
|
||||
void InitUpdater();
|
||||
void InitUpdater(Args const& cfg);
|
||||
|
||||
// do group specific group
|
||||
void BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
|
||||
@ -277,6 +270,8 @@ class GBTree : public GradientBooster {
|
||||
// training parameter
|
||||
GBTreeTrainParam tparam_;
|
||||
// ----training fields----
|
||||
bool specified_updater_ {false};
|
||||
bool specified_predictor_ {false};
|
||||
bool configured_ {false};
|
||||
// configurations for tree
|
||||
Args cfg_;
|
||||
|
||||
@ -1460,7 +1460,7 @@ class GPUHistMaker : public TreeUpdater {
|
||||
}
|
||||
|
||||
char const* Name() const override {
|
||||
return "gpu_hist";
|
||||
return "grow_gpu_hist";
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
@ -635,7 +635,7 @@ class CQHistMaker: public HistMaker {
|
||||
class GlobalProposalHistMaker: public CQHistMaker {
|
||||
public:
|
||||
char const* Name() const override {
|
||||
return "grow_global_histmaker";
|
||||
return "grow_histmaker";
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -740,12 +740,6 @@ XGBOOST_REGISTER_TREE_UPDATER(LocalHistMaker, "grow_local_histmaker")
|
||||
return new CQHistMaker();
|
||||
});
|
||||
|
||||
XGBOOST_REGISTER_TREE_UPDATER(GlobalHistMaker, "grow_global_histmaker")
|
||||
.describe("Tree constructor that uses approximate global proposal of histogram construction.")
|
||||
.set_body([]() {
|
||||
return new GlobalProposalHistMaker();
|
||||
});
|
||||
|
||||
XGBOOST_REGISTER_TREE_UPDATER(HistMaker, "grow_histmaker")
|
||||
.describe("Tree constructor that uses approximate global of histogram construction.")
|
||||
.set_body([]() {
|
||||
|
||||
@ -5,46 +5,42 @@
|
||||
|
||||
namespace xgboost {
|
||||
TEST(GBTree, SelectTreeMethod) {
|
||||
using Arg = std::pair<std::string, std::string>;
|
||||
size_t constexpr kRows = 10;
|
||||
size_t constexpr kCols = 10;
|
||||
auto p_shared_ptr_dmat = CreateDMatrix(kRows, kCols, 0);
|
||||
auto p_dmat {(*p_shared_ptr_dmat).get()};
|
||||
|
||||
GenericParameter generic_param;
|
||||
generic_param.InitAllowUnknown(std::vector<Arg>{});
|
||||
generic_param.InitAllowUnknown(Args{});
|
||||
std::unique_ptr<GradientBooster> p_gbm{
|
||||
GradientBooster::Create("gbtree", &generic_param, {}, 0)};
|
||||
auto& gbtree = dynamic_cast<gbm::GBTree&> (*p_gbm);
|
||||
|
||||
// Test if `tree_method` can be set
|
||||
std::string n_feat = std::to_string(kCols);
|
||||
std::map<std::string, std::string> args {Arg{"tree_method", "approx"}, Arg{"num_feature", n_feat}};
|
||||
Args args {{"tree_method", "approx"}, {"num_feature", n_feat}};
|
||||
gbtree.Configure({args.cbegin(), args.cend()});
|
||||
|
||||
gbtree.ConfigureWithKnownData(args, p_dmat);
|
||||
gbtree.Configure(args);
|
||||
auto const& tparam = gbtree.GetTrainParam();
|
||||
gbtree.ConfigureWithKnownData({Arg{"tree_method", "approx"}, Arg{"num_feature", n_feat}}, p_dmat);
|
||||
gbtree.Configure({{"tree_method", "approx"}, {"num_feature", n_feat}});
|
||||
ASSERT_EQ(tparam.updater_seq, "grow_histmaker,prune");
|
||||
gbtree.ConfigureWithKnownData({Arg("tree_method", "exact"), Arg("num_feature", n_feat)}, p_dmat);
|
||||
gbtree.Configure({{"tree_method", "exact"}, {"num_feature", n_feat}});
|
||||
ASSERT_EQ(tparam.updater_seq, "grow_colmaker,prune");
|
||||
gbtree.ConfigureWithKnownData({Arg("tree_method", "hist"), Arg("num_feature", n_feat)}, p_dmat);
|
||||
gbtree.Configure({{"tree_method", "hist"}, {"num_feature", n_feat}});
|
||||
ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker");
|
||||
ASSERT_EQ(tparam.predictor, "cpu_predictor");
|
||||
gbtree.ConfigureWithKnownData({Arg{"booster", "dart"}, Arg{"tree_method", "hist"},
|
||||
Arg{"num_feature", n_feat}}, p_dmat);
|
||||
gbtree.Configure({{"booster", "dart"}, {"tree_method", "hist"},
|
||||
{"num_feature", n_feat}});
|
||||
ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker");
|
||||
ASSERT_EQ(tparam.predictor, "cpu_predictor");
|
||||
|
||||
#ifdef XGBOOST_USE_CUDA
|
||||
generic_param.InitAllowUnknown(std::vector<Arg>{Arg{"gpu_id", "0"}});
|
||||
gbtree.ConfigureWithKnownData({Arg("tree_method", "gpu_hist"), Arg("num_feature", n_feat)},
|
||||
p_dmat);
|
||||
generic_param.InitAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
gbtree.Configure({{"tree_method", "gpu_hist"}, {"num_feature", n_feat}});
|
||||
ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
|
||||
ASSERT_EQ(tparam.predictor, "gpu_predictor");
|
||||
gbtree.ConfigureWithKnownData({Arg{"booster", "dart"}, Arg{"tree_method", "gpu_hist"},
|
||||
Arg{"num_feature", n_feat}}, p_dmat);
|
||||
gbtree.Configure({{"booster", "dart"}, {"tree_method", "gpu_hist"},
|
||||
{"num_feature", n_feat}});
|
||||
ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
|
||||
ASSERT_EQ(tparam.predictor, "gpu_predictor");
|
||||
#endif
|
||||
|
||||
delete p_shared_ptr_dmat;
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user