Move num_parallel_tree to model parameter. (#7751)

The size of forest should be a property of model itself instead of a training
hyper-parameter.
This commit is contained in:
Jiaming Yuan
2022-03-29 02:32:42 +08:00
committed by GitHub
parent 8b3ecfca25
commit 3c9b04460a
11 changed files with 158 additions and 101 deletions

View File

@@ -323,7 +323,7 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
std::vector<RegTree*> new_trees;
ret->clear();
// create the trees
for (int i = 0; i < tparam_.num_parallel_tree; ++i) {
for (int i = 0; i < model_.param.num_parallel_tree; ++i) {
if (tparam_.process_type == TreeProcessType::kDefault) {
CHECK(!updaters_.front()->CanModifyTree())
<< "Updater: `" << updaters_.front()->Name() << "` "
@@ -347,7 +347,7 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
<< "boosting rounds can not exceed previous training rounds";
// move an existing tree from trees_to_update
auto t = std::move(model_.trees_to_update[model_.trees.size() +
bst_group * tparam_.num_parallel_tree + i]);
bst_group * model_.param.num_parallel_tree + i]);
new_trees.push_back(t.get());
ret->push_back(std::move(t));
}
@@ -414,6 +414,10 @@ void GBTree::SaveConfig(Json* p_out) const {
// e.g. updating a model, then saving and loading it would result in an empty
// model
out["gbtree_train_param"]["process_type"] = String("default");
// Duplicated from SaveModel so that user can get `num_parallel_tree` without parsing
// the model. We might remove this once we can deprecate `best_ntree_limit` so that the
// language binding doesn't need to know about the forest size.
out["gbtree_model_param"] = ToJson(model_.param);
out["updater"] = Object();
@@ -460,6 +464,7 @@ void GBTree::Slice(int32_t layer_begin, int32_t layer_end, int32_t step,
std::vector<int32_t> &out_trees_info = out_model.tree_info;
out_trees_info.resize(layer_trees * n_layers);
out_model.param.num_trees = out_model.trees.size();
out_model.param.num_parallel_tree = model_.param.num_parallel_tree;
if (!this->model_.trees_to_update.empty()) {
CHECK_EQ(this->model_.trees_to_update.size(), this->model_.trees.size())
<< "Not all trees are updated, "
@@ -512,8 +517,7 @@ void GBTree::PredictBatch(DMatrix* p_fmat,
}
uint32_t tree_begin, tree_end;
std::tie(tree_begin, tree_end) =
detail::LayerToTree(model_, tparam_, layer_begin, layer_end);
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end);
CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees.";
if (tree_end > tree_begin) {
predictor->PredictBatch(p_fmat, out_preds, model_, tree_begin, tree_end);
@@ -723,8 +727,7 @@ class Dart : public GBTree {
model_);
p_out_preds->version = 0;
uint32_t tree_begin, tree_end;
std::tie(tree_begin, tree_end) =
detail::LayerToTree(model_, tparam_, layer_begin, layer_end);
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end);
auto n_groups = model_.learner_model_param->num_output_group;
PredictionCacheEntry predts; // temporary storage for prediction
@@ -779,7 +782,7 @@ class Dart : public GBTree {
float missing, PredictionCacheEntry *out_preds,
uint32_t layer_begin, unsigned layer_end) const override {
uint32_t tree_begin, tree_end;
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, tparam_, layer_begin, layer_end);
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end);
std::vector<Predictor const *> predictors{
cpu_predictor_.get(),
#if defined(XGBOOST_USE_CUDA)
@@ -867,7 +870,7 @@ class Dart : public GBTree {
DropTrees(false);
auto &predictor = this->GetPredictor();
uint32_t _, tree_end;
std::tie(_, tree_end) = detail::LayerToTree(model_, tparam_, layer_begin, layer_end);
std::tie(_, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end);
predictor->PredictInstance(inst, out_preds, model_, tree_end);
}
@@ -877,7 +880,7 @@ class Dart : public GBTree {
unsigned) override {
CHECK(configured_);
uint32_t tree_begin, tree_end;
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, tparam_, layer_begin, layer_end);
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end);
cpu_predictor_->PredictContribution(p_fmat, out_contribs, model_,
tree_end, &weight_drop_, approximate);
}
@@ -887,9 +890,9 @@ class Dart : public GBTree {
unsigned layer_begin, unsigned layer_end, bool approximate) override {
CHECK(configured_);
uint32_t tree_begin, tree_end;
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, tparam_, layer_begin, layer_end);
cpu_predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_,
tree_end, &weight_drop_, approximate);
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end);
cpu_predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_, tree_end,
&weight_drop_, approximate);
}
protected:

View File

@@ -60,11 +60,6 @@ namespace gbm {
/*! \brief training parameters */
struct GBTreeTrainParam : public XGBoostParameter<GBTreeTrainParam> {
/*!
* \brief number of parallel trees constructed each iteration
* use this option to support boosted random forest
*/
int num_parallel_tree;
/*! \brief tree updater sequence */
std::string updater_seq;
/*! \brief type of boosting process to run */
@@ -75,11 +70,6 @@ struct GBTreeTrainParam : public XGBoostParameter<GBTreeTrainParam> {
TreeMethod tree_method;
// declare parameters
DMLC_DECLARE_PARAMETER(GBTreeTrainParam) {
DMLC_DECLARE_FIELD(num_parallel_tree)
.set_default(1)
.set_lower_bound(1)
.describe("Number of parallel trees constructed during each iteration."\
" This option is used to support boosted random forest.");
DMLC_DECLARE_FIELD(updater_seq)
.set_default("grow_colmaker,prune")
.describe("Tree updater sequence.");
@@ -156,12 +146,11 @@ struct DartTrainParam : public XGBoostParameter<DartTrainParam> {
namespace detail {
// From here on, layer becomes concrete trees.
inline std::pair<uint32_t, uint32_t> LayerToTree(gbm::GBTreeModel const &model,
GBTreeTrainParam const &tparam,
size_t layer_begin,
size_t layer_end) {
bst_group_t groups = model.learner_model_param->num_output_group;
uint32_t tree_begin = layer_begin * groups * tparam.num_parallel_tree;
uint32_t tree_end = layer_end * groups * tparam.num_parallel_tree;
uint32_t tree_begin = layer_begin * groups * model.param.num_parallel_tree;
uint32_t tree_end = layer_end * groups * model.param.num_parallel_tree;
if (tree_end == 0) {
tree_end = static_cast<uint32_t>(model.trees.size());
}
@@ -177,7 +166,7 @@ inline bool SliceTrees(int32_t layer_begin, int32_t layer_end, int32_t step,
GBTreeModel const &model, GBTreeTrainParam const &tparam,
uint32_t layer_trees, Func fn) {
uint32_t tree_begin, tree_end;
std::tie(tree_begin, tree_end) = detail::LayerToTree(model, tparam, layer_begin, layer_end);
std::tie(tree_begin, tree_end) = detail::LayerToTree(model, layer_begin, layer_end);
if (tree_end > model.trees.size()) {
return true;
}
@@ -249,7 +238,7 @@ class GBTree : public GradientBooster {
// Number of trees per layer.
auto LayerTrees() const {
auto n_trees = model_.learner_model_param->num_output_group * tparam_.num_parallel_tree;
auto n_trees = model_.learner_model_param->num_output_group * model_.param.num_parallel_tree;
return n_trees;
}
@@ -258,7 +247,7 @@ class GBTree : public GradientBooster {
GradientBooster *out, bool* out_of_bound) const override;
int32_t BoostedRounds() const override {
CHECK_NE(tparam_.num_parallel_tree, 0);
CHECK_NE(model_.param.num_parallel_tree, 0);
CHECK_NE(model_.learner_model_param->num_output_group, 0);
return model_.trees.size() / this->LayerTrees();
}
@@ -271,8 +260,7 @@ class GBTree : public GradientBooster {
uint32_t layer_begin, unsigned layer_end) const override {
CHECK(configured_);
uint32_t tree_begin, tree_end;
std::tie(tree_begin, tree_end) =
detail::LayerToTree(model_, tparam_, layer_begin, layer_end);
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end);
CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees.";
std::vector<Predictor const *> predictors{
cpu_predictor_.get(),
@@ -371,16 +359,15 @@ class GBTree : public GradientBooster {
uint32_t layer_begin, uint32_t layer_end) override {
CHECK(configured_);
uint32_t tree_begin, tree_end;
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, tparam_, layer_begin, layer_end);
cpu_predictor_->PredictInstance(inst, out_preds, model_,
tree_end);
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end);
cpu_predictor_->PredictInstance(inst, out_preds, model_, tree_end);
}
void PredictLeaf(DMatrix* p_fmat,
HostDeviceVector<bst_float>* out_preds,
uint32_t layer_begin, uint32_t layer_end) override {
uint32_t tree_begin, tree_end;
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, tparam_, layer_begin, layer_end);
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end);
CHECK_EQ(tree_begin, 0) << "Predict leaf supports only iteration end: (0, "
"n_iteration), use model slicing instead.";
this->GetPredictor()->PredictLeaf(p_fmat, out_preds, model_, tree_end);
@@ -392,7 +379,7 @@ class GBTree : public GradientBooster {
int, unsigned) override {
CHECK(configured_);
uint32_t tree_begin, tree_end;
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, tparam_, layer_begin, layer_end);
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end);
CHECK_EQ(tree_begin, 0)
<< "Predict contribution supports only iteration end: (0, "
"n_iteration), using model slicing instead.";
@@ -405,7 +392,7 @@ class GBTree : public GradientBooster {
uint32_t layer_begin, uint32_t layer_end, bool approximate) override {
CHECK(configured_);
uint32_t tree_begin, tree_end;
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, tparam_, layer_begin, layer_end);
std::tie(tree_begin, tree_end) = detail::LayerToTree(model_, layer_begin, layer_end);
CHECK_EQ(tree_begin, 0)
<< "Predict interaction contribution supports only iteration end: (0, "
"n_iteration), using model slicing instead.";

View File

@@ -31,7 +31,7 @@ struct GBTreeModelParam : public dmlc::Parameter<GBTreeModelParam> {
/*! \brief number of trees */
int32_t num_trees;
/*! \brief (Deprecated) number of roots */
int32_t deprecated_num_roots;
int32_t num_parallel_tree;
/*! \brief number of features to be used by trees */
int32_t deprecated_num_feature;
/*! \brief pad this space, for backward compatibility reason.*/
@@ -50,7 +50,7 @@ struct GBTreeModelParam : public dmlc::Parameter<GBTreeModelParam> {
std::memset(this, 0, sizeof(GBTreeModelParam)); // FIXME(trivialfis): Why?
static_assert(sizeof(GBTreeModelParam) == (4 + 2 + 2 + 32) * sizeof(int32_t),
"64/32 bit compatibility issue");
deprecated_num_roots = 1;
num_parallel_tree = 1;
}
// declare parameters, only declare those that need to be set.
@@ -59,6 +59,12 @@ struct GBTreeModelParam : public dmlc::Parameter<GBTreeModelParam> {
.set_lower_bound(0)
.set_default(0)
.describe("Number of features used for training and prediction.");
DMLC_DECLARE_FIELD(num_parallel_tree)
.set_default(1)
.set_lower_bound(1)
.describe(
"Number of parallel trees constructed during each iteration."
" This option is used to support boosted random forest.");
DMLC_DECLARE_FIELD(size_leaf_vector)
.set_lower_bound(0)
.set_default(0)
@@ -70,7 +76,7 @@ struct GBTreeModelParam : public dmlc::Parameter<GBTreeModelParam> {
inline GBTreeModelParam ByteSwap() const {
GBTreeModelParam x = *this;
dmlc::ByteSwap(&x.num_trees, sizeof(x.num_trees), 1);
dmlc::ByteSwap(&x.deprecated_num_roots, sizeof(x.deprecated_num_roots), 1);
dmlc::ByteSwap(&x.num_parallel_tree, sizeof(x.num_parallel_tree), 1);
dmlc::ByteSwap(&x.deprecated_num_feature, sizeof(x.deprecated_num_feature), 1);
dmlc::ByteSwap(&x.pad_32bit, sizeof(x.pad_32bit), 1);
dmlc::ByteSwap(&x.deprecated_num_pbuffer, sizeof(x.deprecated_num_pbuffer), 1);