Use hist as the default tree method. (#9320)
This commit is contained in:
@@ -39,7 +39,6 @@ namespace xgboost::gbm {
|
||||
DMLC_REGISTRY_FILE_TAG(gbtree);
|
||||
|
||||
void GBTree::Configure(Args const& cfg) {
|
||||
this->cfg_ = cfg;
|
||||
std::string updater_seq = tparam_.updater_seq;
|
||||
tparam_.UpdateAllowUnknown(cfg);
|
||||
tree_param_.UpdateAllowUnknown(cfg);
|
||||
@@ -78,10 +77,9 @@ void GBTree::Configure(Args const& cfg) {
|
||||
|
||||
monitor_.Init("GBTree");
|
||||
|
||||
specified_updater_ = std::any_of(cfg.cbegin(), cfg.cend(),
|
||||
[](std::pair<std::string, std::string> const& arg) {
|
||||
return arg.first == "updater";
|
||||
});
|
||||
specified_updater_ = std::any_of(
|
||||
cfg.cbegin(), cfg.cend(),
|
||||
[](std::pair<std::string, std::string> const& arg) { return arg.first == "updater"; });
|
||||
|
||||
if (specified_updater_ && !showed_updater_warning_) {
|
||||
LOG(WARNING) << "DANGER AHEAD: You have manually specified `updater` "
|
||||
@@ -93,12 +91,19 @@ void GBTree::Configure(Args const& cfg) {
|
||||
showed_updater_warning_ = true;
|
||||
}
|
||||
|
||||
if (model_.learner_model_param->IsVectorLeaf()) {
|
||||
CHECK(tparam_.tree_method == TreeMethod::kHist || tparam_.tree_method == TreeMethod::kAuto)
|
||||
<< "Only the hist tree method is supported for building multi-target trees with vector "
|
||||
"leaf.";
|
||||
}
|
||||
LOG(DEBUG) << "Using tree method: " << static_cast<int>(tparam_.tree_method);
|
||||
this->ConfigureUpdaters();
|
||||
|
||||
if (updater_seq != tparam_.updater_seq) {
|
||||
updaters_.clear();
|
||||
this->InitUpdater(cfg);
|
||||
} else {
|
||||
for (auto &up : updaters_) {
|
||||
for (auto& up : updaters_) {
|
||||
up->Configure(cfg);
|
||||
}
|
||||
}
|
||||
@@ -106,66 +111,6 @@ void GBTree::Configure(Args const& cfg) {
|
||||
configured_ = true;
|
||||
}
|
||||
|
||||
// FIXME(trivialfis): This handles updaters. Because the choice of updaters depends on
|
||||
// whether external memory is used and how large is dataset. We can remove the dependency
|
||||
// on DMatrix once `hist` tree method can handle external memory so that we can make it
|
||||
// default.
|
||||
void GBTree::ConfigureWithKnownData(Args const& cfg, DMatrix* fmat) {
|
||||
CHECK(this->configured_);
|
||||
std::string updater_seq = tparam_.updater_seq;
|
||||
CHECK(tparam_.GetInitialised());
|
||||
|
||||
tparam_.UpdateAllowUnknown(cfg);
|
||||
|
||||
this->PerformTreeMethodHeuristic(fmat);
|
||||
this->ConfigureUpdaters();
|
||||
|
||||
// initialize the updaters only when needed.
|
||||
if (updater_seq != tparam_.updater_seq) {
|
||||
LOG(DEBUG) << "Using updaters: " << tparam_.updater_seq;
|
||||
this->updaters_.clear();
|
||||
this->InitUpdater(cfg);
|
||||
}
|
||||
}
|
||||
|
||||
void GBTree::PerformTreeMethodHeuristic(DMatrix* fmat) {
|
||||
if (specified_updater_) {
|
||||
// This method is disabled when `updater` parameter is explicitly
|
||||
// set, since only experts are expected to do so.
|
||||
return;
|
||||
}
|
||||
if (model_.learner_model_param->IsVectorLeaf()) {
|
||||
CHECK(tparam_.tree_method == TreeMethod::kHist)
|
||||
<< "Only the hist tree method is supported for building multi-target trees with vector "
|
||||
"leaf.";
|
||||
}
|
||||
|
||||
// tparam_ is set before calling this function.
|
||||
if (tparam_.tree_method != TreeMethod::kAuto) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (collective::IsDistributed()) {
|
||||
LOG(INFO) << "Tree method is automatically selected to be 'approx' "
|
||||
"for distributed training.";
|
||||
tparam_.tree_method = TreeMethod::kApprox;
|
||||
} else if (!fmat->SingleColBlock()) {
|
||||
LOG(INFO) << "Tree method is automatically set to 'approx' "
|
||||
"since external-memory data matrix is used.";
|
||||
tparam_.tree_method = TreeMethod::kApprox;
|
||||
} else if (fmat->Info().num_row_ >= (4UL << 20UL)) {
|
||||
/* Choose tree_method='approx' automatically for large data matrix */
|
||||
LOG(INFO) << "Tree method is automatically selected to be "
|
||||
"'approx' for faster speed. To use old behavior "
|
||||
"(exact greedy algorithm on single machine), "
|
||||
"set tree_method to 'exact'.";
|
||||
tparam_.tree_method = TreeMethod::kApprox;
|
||||
} else {
|
||||
tparam_.tree_method = TreeMethod::kExact;
|
||||
}
|
||||
LOG(DEBUG) << "Using tree method: " << static_cast<int>(tparam_.tree_method);
|
||||
}
|
||||
|
||||
void GBTree::ConfigureUpdaters() {
|
||||
if (specified_updater_) {
|
||||
return;
|
||||
@@ -173,31 +118,25 @@ void GBTree::ConfigureUpdaters() {
|
||||
// `updater` parameter was manually specified
|
||||
/* Choose updaters according to tree_method parameters */
|
||||
switch (tparam_.tree_method) {
|
||||
case TreeMethod::kAuto:
|
||||
// Use heuristic to choose between 'exact' and 'approx' This
|
||||
// choice is carried out in PerformTreeMethodHeuristic() before
|
||||
// calling this function.
|
||||
case TreeMethod::kAuto: // Use hist as default in 2.0
|
||||
case TreeMethod::kHist: {
|
||||
tparam_.updater_seq = "grow_quantile_histmaker";
|
||||
break;
|
||||
}
|
||||
case TreeMethod::kApprox:
|
||||
tparam_.updater_seq = "grow_histmaker";
|
||||
break;
|
||||
case TreeMethod::kExact:
|
||||
tparam_.updater_seq = "grow_colmaker,prune";
|
||||
break;
|
||||
case TreeMethod::kHist: {
|
||||
LOG(INFO) << "Tree method is selected to be 'hist', which uses a single updater "
|
||||
"grow_quantile_histmaker.";
|
||||
tparam_.updater_seq = "grow_quantile_histmaker";
|
||||
break;
|
||||
}
|
||||
case TreeMethod::kGPUHist: {
|
||||
common::AssertGPUSupport();
|
||||
tparam_.updater_seq = "grow_gpu_hist";
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOG(FATAL) << "Unknown tree_method ("
|
||||
<< static_cast<int>(tparam_.tree_method) << ") detected";
|
||||
LOG(FATAL) << "Unknown tree_method (" << static_cast<int>(tparam_.tree_method)
|
||||
<< ") detected";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -253,7 +192,6 @@ void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
|
||||
PredictionCacheEntry* predt, ObjFunction const* obj) {
|
||||
TreesOneIter new_trees;
|
||||
bst_target_t const n_groups = model_.learner_model_param->OutputLength();
|
||||
ConfigureWithKnownData(this->cfg_, p_fmat);
|
||||
monitor_.Start("BoostNewTrees");
|
||||
|
||||
// Weird case that tree method is cpu-based but gpu_id is set. Ideally we should let
|
||||
|
||||
@@ -56,9 +56,7 @@ DECLARE_FIELD_ENUM_CLASS(xgboost::TreeMethod);
|
||||
DECLARE_FIELD_ENUM_CLASS(xgboost::TreeProcessType);
|
||||
DECLARE_FIELD_ENUM_CLASS(xgboost::PredictorType);
|
||||
|
||||
namespace xgboost {
|
||||
namespace gbm {
|
||||
|
||||
namespace xgboost::gbm {
|
||||
/*! \brief training parameters */
|
||||
struct GBTreeTrainParam : public XGBoostParameter<GBTreeTrainParam> {
|
||||
/*! \brief tree updater sequence */
|
||||
@@ -192,12 +190,8 @@ class GBTree : public GradientBooster {
|
||||
: GradientBooster{ctx}, model_(booster_config, ctx_) {}
|
||||
|
||||
void Configure(const Args& cfg) override;
|
||||
// Revise `tree_method` and `updater` parameters after seeing the training
|
||||
// data matrix, only useful when tree_method is auto.
|
||||
void PerformTreeMethodHeuristic(DMatrix* fmat);
|
||||
/*! \brief Map `tree_method` parameter to `updater` parameter */
|
||||
void ConfigureUpdaters();
|
||||
void ConfigureWithKnownData(Args const& cfg, DMatrix* fmat);
|
||||
|
||||
/**
|
||||
* \brief Optionally update the leaf value.
|
||||
@@ -222,11 +216,7 @@ class GBTree : public GradientBooster {
|
||||
return tparam_;
|
||||
}
|
||||
|
||||
void Load(dmlc::Stream* fi) override {
|
||||
model_.Load(fi);
|
||||
this->cfg_.clear();
|
||||
}
|
||||
|
||||
void Load(dmlc::Stream* fi) override { model_.Load(fi); }
|
||||
void Save(dmlc::Stream* fo) const override {
|
||||
model_.Save(fo);
|
||||
}
|
||||
@@ -416,8 +406,6 @@ class GBTree : public GradientBooster {
|
||||
bool showed_updater_warning_ {false};
|
||||
bool specified_updater_ {false};
|
||||
bool configured_ {false};
|
||||
// configurations for tree
|
||||
Args cfg_;
|
||||
// the updaters that can be applied to each of tree
|
||||
std::vector<std::unique_ptr<TreeUpdater>> updaters_;
|
||||
// Predictors
|
||||
@@ -431,7 +419,6 @@ class GBTree : public GradientBooster {
|
||||
common::Monitor monitor_;
|
||||
};
|
||||
|
||||
} // namespace gbm
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::gbm
|
||||
|
||||
#endif // XGBOOST_GBM_GBTREE_H_
|
||||
|
||||
Reference in New Issue
Block a user