Support learning rate for zero-hessian objectives. (#8866)
This commit is contained in:
@@ -32,15 +32,14 @@
|
||||
#include "xgboost/string_view.h"
|
||||
#include "xgboost/tree_updater.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace gbm {
|
||||
|
||||
namespace xgboost::gbm {
|
||||
DMLC_REGISTRY_FILE_TAG(gbtree);
|
||||
|
||||
void GBTree::Configure(const Args& cfg) {
|
||||
void GBTree::Configure(Args const& cfg) {
|
||||
this->cfg_ = cfg;
|
||||
std::string updater_seq = tparam_.updater_seq;
|
||||
tparam_.UpdateAllowUnknown(cfg);
|
||||
tree_param_.UpdateAllowUnknown(cfg);
|
||||
|
||||
model_.Configure(cfg);
|
||||
|
||||
@@ -235,9 +234,11 @@ void GBTree::UpdateTreeLeaf(DMatrix const* p_fmat, HostDeviceVector<float> const
|
||||
CHECK_EQ(model_.param.num_parallel_tree, trees.size());
|
||||
CHECK_EQ(model_.param.num_parallel_tree, 1)
|
||||
<< "Boosting random forest is not supported for current objective.";
|
||||
CHECK_EQ(trees.size(), model_.param.num_parallel_tree);
|
||||
for (std::size_t tree_idx = 0; tree_idx < trees.size(); ++tree_idx) {
|
||||
auto const& position = node_position.at(tree_idx);
|
||||
obj->UpdateTreeLeaf(position, p_fmat->Info(), predictions, group_idx, trees[tree_idx].get());
|
||||
obj->UpdateTreeLeaf(position, p_fmat->Info(), tree_param_.learning_rate / trees.size(),
|
||||
predictions, group_idx, trees[tree_idx].get());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -388,9 +389,15 @@ void GBTree::BoostNewTrees(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fma
|
||||
|
||||
CHECK(out_position);
|
||||
out_position->resize(new_trees.size());
|
||||
|
||||
// Rescale learning rate according to the size of trees
|
||||
auto lr = tree_param_.learning_rate;
|
||||
tree_param_.learning_rate /= static_cast<float>(new_trees.size());
|
||||
for (auto& up : updaters_) {
|
||||
up->Update(gpair, p_fmat, common::Span<HostDeviceVector<bst_node_t>>{*out_position}, new_trees);
|
||||
up->Update(&tree_param_, gpair, p_fmat,
|
||||
common::Span<HostDeviceVector<bst_node_t>>{*out_position}, new_trees);
|
||||
}
|
||||
tree_param_.learning_rate = lr;
|
||||
}
|
||||
|
||||
void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) {
|
||||
@@ -404,6 +411,8 @@ void GBTree::CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& ne
|
||||
void GBTree::LoadConfig(Json const& in) {
|
||||
CHECK_EQ(get<String>(in["name"]), "gbtree");
|
||||
FromJson(in["gbtree_train_param"], &tparam_);
|
||||
FromJson(in["tree_train_param"], &tree_param_);
|
||||
|
||||
// Process type cannot be kUpdate from loaded model
|
||||
// This would cause all trees to be pushed to trees_to_update
|
||||
// e.g. updating a model, then saving and loading it would result in an empty model
|
||||
@@ -451,6 +460,7 @@ void GBTree::SaveConfig(Json* p_out) const {
|
||||
auto& out = *p_out;
|
||||
out["name"] = String("gbtree");
|
||||
out["gbtree_train_param"] = ToJson(tparam_);
|
||||
out["tree_train_param"] = ToJson(tree_param_);
|
||||
|
||||
// Process type cannot be kUpdate from loaded model
|
||||
// This would cause all trees to be pushed to trees_to_update
|
||||
@@ -1058,5 +1068,4 @@ XGBOOST_REGISTER_GBM(Dart, "dart")
|
||||
GBTree* p = new Dart(booster_config, ctx);
|
||||
return p;
|
||||
});
|
||||
} // namespace gbm
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::gbm
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
|
||||
#include "../common/common.h"
|
||||
#include "../common/timer.h"
|
||||
#include "../tree/param.h" // TrainParam
|
||||
#include "gbtree_model.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/data.h"
|
||||
@@ -405,8 +406,8 @@ class GBTree : public GradientBooster {
|
||||
p_fmat, out_contribs, model_, tree_end, nullptr, approximate);
|
||||
}
|
||||
|
||||
std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,
|
||||
std::string format) const override {
|
||||
[[nodiscard]] std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,
|
||||
std::string format) const override {
|
||||
return model_.DumpModel(fmap, with_stats, this->ctx_->Threads(), format);
|
||||
}
|
||||
|
||||
@@ -428,6 +429,8 @@ class GBTree : public GradientBooster {
|
||||
GBTreeModel model_;
|
||||
// training parameter
|
||||
GBTreeTrainParam tparam_;
|
||||
// Tree training parameter
|
||||
tree::TrainParam tree_param_;
|
||||
// ----training fields----
|
||||
bool showed_updater_warning_ {false};
|
||||
bool specified_updater_ {false};
|
||||
|
||||
@@ -76,7 +76,7 @@ void EncodeTreeLeafHost(Context const* ctx, RegTree const& tree,
|
||||
}
|
||||
|
||||
void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& position,
|
||||
std::int32_t group_idx, MetaInfo const& info,
|
||||
std::int32_t group_idx, MetaInfo const& info, float learning_rate,
|
||||
HostDeviceVector<float> const& predt, float alpha, RegTree* p_tree) {
|
||||
auto& tree = *p_tree;
|
||||
|
||||
@@ -87,7 +87,7 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
|
||||
size_t n_leaf = nidx.size();
|
||||
if (nptr.empty()) {
|
||||
std::vector<float> quantiles;
|
||||
UpdateLeafValues(&quantiles, nidx, p_tree);
|
||||
UpdateLeafValues(&quantiles, nidx, learning_rate, p_tree);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -133,12 +133,13 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
|
||||
quantiles.at(k) = q;
|
||||
});
|
||||
|
||||
UpdateLeafValues(&quantiles, nidx, p_tree);
|
||||
UpdateLeafValues(&quantiles, nidx, learning_rate, p_tree);
|
||||
}
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
void UpdateTreeLeafDevice(Context const*, common::Span<bst_node_t const>, std::int32_t,
|
||||
MetaInfo const&, HostDeviceVector<float> const&, float, RegTree*) {
|
||||
MetaInfo const&, float learning_rate, HostDeviceVector<float> const&,
|
||||
float, RegTree*) {
|
||||
common::AssertGPUSupport();
|
||||
}
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
|
||||
@@ -140,7 +140,7 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
|
||||
}
|
||||
|
||||
void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> position,
|
||||
std::int32_t group_idx, MetaInfo const& info,
|
||||
std::int32_t group_idx, MetaInfo const& info, float learning_rate,
|
||||
HostDeviceVector<float> const& predt, float alpha, RegTree* p_tree) {
|
||||
dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
|
||||
dh::device_vector<size_t> ridx;
|
||||
@@ -151,7 +151,7 @@ void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
|
||||
|
||||
if (nptr.Empty()) {
|
||||
std::vector<float> quantiles;
|
||||
UpdateLeafValues(&quantiles, nidx.ConstHostVector(), p_tree);
|
||||
UpdateLeafValues(&quantiles, nidx.ConstHostVector(), learning_rate, p_tree);
|
||||
}
|
||||
|
||||
HostDeviceVector<float> quantiles;
|
||||
@@ -186,7 +186,7 @@ void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
|
||||
w_it + d_weights.size(), &quantiles);
|
||||
}
|
||||
|
||||
UpdateLeafValues(&quantiles.HostVector(), nidx.ConstHostVector(), p_tree);
|
||||
UpdateLeafValues(&quantiles.HostVector(), nidx.ConstHostVector(), learning_rate, p_tree);
|
||||
}
|
||||
} // namespace detail
|
||||
} // namespace obj
|
||||
|
||||
@@ -36,7 +36,7 @@ inline void FillMissingLeaf(std::vector<bst_node_t> const& maybe_missing,
|
||||
}
|
||||
|
||||
inline void UpdateLeafValues(std::vector<float>* p_quantiles, std::vector<bst_node_t> const& nidx,
|
||||
RegTree* p_tree) {
|
||||
float learning_rate, RegTree* p_tree) {
|
||||
auto& tree = *p_tree;
|
||||
auto& quantiles = *p_quantiles;
|
||||
auto const& h_node_idx = nidx;
|
||||
@@ -71,7 +71,7 @@ inline void UpdateLeafValues(std::vector<float>* p_quantiles, std::vector<bst_no
|
||||
auto nidx = h_node_idx[i];
|
||||
auto q = quantiles[i];
|
||||
CHECK(tree[nidx].IsLeaf());
|
||||
tree[nidx].SetLeaf(q);
|
||||
tree[nidx].SetLeaf(q * learning_rate);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -85,24 +85,24 @@ inline std::size_t IdxY(MetaInfo const& info, bst_group_t group_idx) {
|
||||
}
|
||||
|
||||
void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> position,
|
||||
std::int32_t group_idx, MetaInfo const& info,
|
||||
std::int32_t group_idx, MetaInfo const& info, float learning_rate,
|
||||
HostDeviceVector<float> const& predt, float alpha, RegTree* p_tree);
|
||||
|
||||
void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& position,
|
||||
std::int32_t group_idx, MetaInfo const& info,
|
||||
std::int32_t group_idx, MetaInfo const& info, float learning_rate,
|
||||
HostDeviceVector<float> const& predt, float alpha, RegTree* p_tree);
|
||||
} // namespace detail
|
||||
|
||||
inline void UpdateTreeLeaf(Context const* ctx, HostDeviceVector<bst_node_t> const& position,
|
||||
std::int32_t group_idx, MetaInfo const& info,
|
||||
std::int32_t group_idx, MetaInfo const& info, float learning_rate,
|
||||
HostDeviceVector<float> const& predt, float alpha, RegTree* p_tree) {
|
||||
if (ctx->IsCPU()) {
|
||||
detail::UpdateTreeLeafHost(ctx, position.ConstHostVector(), group_idx, info, predt, alpha,
|
||||
p_tree);
|
||||
detail::UpdateTreeLeafHost(ctx, position.ConstHostVector(), group_idx, info, learning_rate,
|
||||
predt, alpha, p_tree);
|
||||
} else {
|
||||
position.SetDevice(ctx->gpu_id);
|
||||
detail::UpdateTreeLeafDevice(ctx, position.ConstDeviceSpan(), group_idx, info, predt, alpha,
|
||||
p_tree);
|
||||
detail::UpdateTreeLeafDevice(ctx, position.ConstDeviceSpan(), group_idx, info, learning_rate,
|
||||
predt, alpha, p_tree);
|
||||
}
|
||||
}
|
||||
} // namespace obj
|
||||
|
||||
@@ -183,10 +183,11 @@ class QuantileRegression : public ObjFunction {
|
||||
}
|
||||
|
||||
void UpdateTreeLeaf(HostDeviceVector<bst_node_t> const& position, MetaInfo const& info,
|
||||
HostDeviceVector<float> const& prediction, std::int32_t group_idx,
|
||||
RegTree* p_tree) const override {
|
||||
float learning_rate, HostDeviceVector<float> const& prediction,
|
||||
std::int32_t group_idx, RegTree* p_tree) const override {
|
||||
auto alpha = param_.quantile_alpha[group_idx];
|
||||
::xgboost::obj::UpdateTreeLeaf(ctx_, position, group_idx, info, prediction, alpha, p_tree);
|
||||
::xgboost::obj::UpdateTreeLeaf(ctx_, position, group_idx, info, learning_rate, prediction,
|
||||
alpha, p_tree);
|
||||
}
|
||||
|
||||
void Configure(Args const& args) override {
|
||||
|
||||
@@ -742,9 +742,10 @@ class MeanAbsoluteError : public ObjFunction {
|
||||
}
|
||||
|
||||
void UpdateTreeLeaf(HostDeviceVector<bst_node_t> const& position, MetaInfo const& info,
|
||||
HostDeviceVector<float> const& prediction, std::int32_t group_idx,
|
||||
RegTree* p_tree) const override {
|
||||
::xgboost::obj::UpdateTreeLeaf(ctx_, position, group_idx, info, prediction, 0.5, p_tree);
|
||||
float learning_rate, HostDeviceVector<float> const& prediction,
|
||||
std::int32_t group_idx, RegTree* p_tree) const override {
|
||||
::xgboost::obj::UpdateTreeLeaf(ctx_, position, group_idx, info, learning_rate, prediction, 0.5,
|
||||
p_tree);
|
||||
}
|
||||
|
||||
const char* DefaultEvalMetric() const override { return "mae"; }
|
||||
|
||||
@@ -17,13 +17,11 @@
|
||||
#include "../../common/random.h"
|
||||
#include "../../data/gradient_index.h"
|
||||
#include "../constraints.h"
|
||||
#include "../param.h"
|
||||
#include "../param.h" // for TrainParam
|
||||
#include "../split_evaluator.h"
|
||||
#include "xgboost/context.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
namespace xgboost::tree {
|
||||
template <typename ExpandEntry>
|
||||
class HistEvaluator {
|
||||
private:
|
||||
@@ -36,7 +34,7 @@ class HistEvaluator {
|
||||
|
||||
private:
|
||||
Context const* ctx_;
|
||||
TrainParam param_;
|
||||
TrainParam const* param_;
|
||||
std::shared_ptr<common::ColumnSampler> column_sampler_;
|
||||
TreeEvaluator tree_evaluator_;
|
||||
bool is_col_split_{false};
|
||||
@@ -55,8 +53,9 @@ class HistEvaluator {
|
||||
}
|
||||
}
|
||||
|
||||
bool IsValid(GradStats const &left, GradStats const &right) const {
|
||||
return left.GetHess() >= param_.min_child_weight && right.GetHess() >= param_.min_child_weight;
|
||||
[[nodiscard]] bool IsValid(GradStats const &left, GradStats const &right) const {
|
||||
return left.GetHess() >= param_->min_child_weight &&
|
||||
right.GetHess() >= param_->min_child_weight;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -95,9 +94,10 @@ class HistEvaluator {
|
||||
right_sum = GradStats{hist[i]};
|
||||
left_sum.SetSubstract(parent.stats, right_sum);
|
||||
if (IsValid(left_sum, right_sum)) {
|
||||
auto missing_left_chg = static_cast<float>(
|
||||
evaluator.CalcSplitGain(param_, nidx, fidx, GradStats{left_sum}, GradStats{right_sum}) -
|
||||
parent.root_gain);
|
||||
auto missing_left_chg =
|
||||
static_cast<float>(evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{left_sum},
|
||||
GradStats{right_sum}) -
|
||||
parent.root_gain);
|
||||
best.Update(missing_left_chg, fidx, split_pt, true, true, left_sum, right_sum);
|
||||
}
|
||||
|
||||
@@ -105,9 +105,10 @@ class HistEvaluator {
|
||||
right_sum.Add(missing);
|
||||
left_sum.SetSubstract(parent.stats, right_sum);
|
||||
if (IsValid(left_sum, right_sum)) {
|
||||
auto missing_right_chg = static_cast<float>(
|
||||
evaluator.CalcSplitGain(param_, nidx, fidx, GradStats{left_sum}, GradStats{right_sum}) -
|
||||
parent.root_gain);
|
||||
auto missing_right_chg =
|
||||
static_cast<float>(evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{left_sum},
|
||||
GradStats{right_sum}) -
|
||||
parent.root_gain);
|
||||
best.Update(missing_right_chg, fidx, split_pt, false, true, left_sum, right_sum);
|
||||
}
|
||||
}
|
||||
@@ -152,7 +153,7 @@ class HistEvaluator {
|
||||
bst_bin_t f_begin = cut_ptr[fidx];
|
||||
bst_bin_t f_end = cut_ptr[fidx + 1];
|
||||
bst_bin_t n_bins_feature{f_end - f_begin};
|
||||
auto n_bins = std::min(param_.max_cat_threshold, n_bins_feature);
|
||||
auto n_bins = std::min(param_->max_cat_threshold, n_bins_feature);
|
||||
|
||||
// statistics on both sides of split
|
||||
GradStats left_sum;
|
||||
@@ -181,9 +182,9 @@ class HistEvaluator {
|
||||
right_sum.SetSubstract(parent.stats, left_sum); // missing on right
|
||||
}
|
||||
if (IsValid(left_sum, right_sum)) {
|
||||
auto loss_chg =
|
||||
evaluator.CalcSplitGain(param_, nidx, fidx, GradStats{left_sum}, GradStats{right_sum}) -
|
||||
parent.root_gain;
|
||||
auto loss_chg = evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{left_sum},
|
||||
GradStats{right_sum}) -
|
||||
parent.root_gain;
|
||||
// We don't have a numeric split point, nan here is a dummy split.
|
||||
if (best.Update(loss_chg, fidx, std::numeric_limits<float>::quiet_NaN(), d_step == 1, true,
|
||||
left_sum, right_sum)) {
|
||||
@@ -256,7 +257,7 @@ class HistEvaluator {
|
||||
if (d_step > 0) {
|
||||
// forward enumeration: split at right bound of each bin
|
||||
loss_chg =
|
||||
static_cast<float>(evaluator.CalcSplitGain(param_, nidx, fidx, GradStats{left_sum},
|
||||
static_cast<float>(evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{left_sum},
|
||||
GradStats{right_sum}) -
|
||||
parent.root_gain);
|
||||
split_pt = cut_val[i]; // not used for partition based
|
||||
@@ -264,7 +265,7 @@ class HistEvaluator {
|
||||
} else {
|
||||
// backward enumeration: split at left bound of each bin
|
||||
loss_chg =
|
||||
static_cast<float>(evaluator.CalcSplitGain(param_, nidx, fidx, GradStats{right_sum},
|
||||
static_cast<float>(evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{right_sum},
|
||||
GradStats{left_sum}) -
|
||||
parent.root_gain);
|
||||
if (i == imin) {
|
||||
@@ -326,7 +327,7 @@ class HistEvaluator {
|
||||
}
|
||||
if (is_cat) {
|
||||
auto n_bins = cut_ptrs.at(fidx + 1) - cut_ptrs[fidx];
|
||||
if (common::UseOneHot(n_bins, param_.max_cat_to_onehot)) {
|
||||
if (common::UseOneHot(n_bins, param_->max_cat_to_onehot)) {
|
||||
EnumerateOneHot(cut, histogram, fidx, nidx, evaluator, best);
|
||||
} else {
|
||||
std::vector<size_t> sorted_idx(n_bins);
|
||||
@@ -334,8 +335,8 @@ class HistEvaluator {
|
||||
auto feat_hist = histogram.subspan(cut_ptrs[fidx], n_bins);
|
||||
// Sort the histogram to get contiguous partitions.
|
||||
std::stable_sort(sorted_idx.begin(), sorted_idx.end(), [&](size_t l, size_t r) {
|
||||
auto ret = evaluator.CalcWeightCat(param_, feat_hist[l]) <
|
||||
evaluator.CalcWeightCat(param_, feat_hist[r]);
|
||||
auto ret = evaluator.CalcWeightCat(*param_, feat_hist[l]) <
|
||||
evaluator.CalcWeightCat(*param_, feat_hist[r]);
|
||||
return ret;
|
||||
});
|
||||
EnumeratePart<+1>(cut, sorted_idx, histogram, fidx, nidx, evaluator, best);
|
||||
@@ -382,24 +383,22 @@ class HistEvaluator {
|
||||
|
||||
GradStats parent_sum = candidate.split.left_sum;
|
||||
parent_sum.Add(candidate.split.right_sum);
|
||||
auto base_weight =
|
||||
evaluator.CalcWeight(candidate.nid, param_, GradStats{parent_sum});
|
||||
|
||||
auto base_weight = evaluator.CalcWeight(candidate.nid, *param_, GradStats{parent_sum});
|
||||
auto left_weight =
|
||||
evaluator.CalcWeight(candidate.nid, param_, GradStats{candidate.split.left_sum});
|
||||
evaluator.CalcWeight(candidate.nid, *param_, GradStats{candidate.split.left_sum});
|
||||
auto right_weight =
|
||||
evaluator.CalcWeight(candidate.nid, param_, GradStats{candidate.split.right_sum});
|
||||
evaluator.CalcWeight(candidate.nid, *param_, GradStats{candidate.split.right_sum});
|
||||
|
||||
if (candidate.split.is_cat) {
|
||||
tree.ExpandCategorical(
|
||||
candidate.nid, candidate.split.SplitIndex(), candidate.split.cat_bits,
|
||||
candidate.split.DefaultLeft(), base_weight, left_weight * param_.learning_rate,
|
||||
right_weight * param_.learning_rate, candidate.split.loss_chg, parent_sum.GetHess(),
|
||||
candidate.split.DefaultLeft(), base_weight, left_weight * param_->learning_rate,
|
||||
right_weight * param_->learning_rate, candidate.split.loss_chg, parent_sum.GetHess(),
|
||||
candidate.split.left_sum.GetHess(), candidate.split.right_sum.GetHess());
|
||||
} else {
|
||||
tree.ExpandNode(candidate.nid, candidate.split.SplitIndex(), candidate.split.split_value,
|
||||
candidate.split.DefaultLeft(), base_weight,
|
||||
left_weight * param_.learning_rate, right_weight * param_.learning_rate,
|
||||
left_weight * param_->learning_rate, right_weight * param_->learning_rate,
|
||||
candidate.split.loss_chg, parent_sum.GetHess(),
|
||||
candidate.split.left_sum.GetHess(), candidate.split.right_sum.GetHess());
|
||||
}
|
||||
@@ -415,11 +414,11 @@ class HistEvaluator {
|
||||
max_node = std::max(candidate.nid, max_node);
|
||||
snode_.resize(tree.GetNodes().size());
|
||||
snode_.at(left_child).stats = candidate.split.left_sum;
|
||||
snode_.at(left_child).root_gain = evaluator.CalcGain(
|
||||
candidate.nid, param_, GradStats{candidate.split.left_sum});
|
||||
snode_.at(left_child).root_gain =
|
||||
evaluator.CalcGain(candidate.nid, *param_, GradStats{candidate.split.left_sum});
|
||||
snode_.at(right_child).stats = candidate.split.right_sum;
|
||||
snode_.at(right_child).root_gain = evaluator.CalcGain(
|
||||
candidate.nid, param_, GradStats{candidate.split.right_sum});
|
||||
snode_.at(right_child).root_gain =
|
||||
evaluator.CalcGain(candidate.nid, *param_, GradStats{candidate.split.right_sum});
|
||||
|
||||
interaction_constraints_.Split(candidate.nid,
|
||||
tree[candidate.nid].SplitIndex(), left_child,
|
||||
@@ -429,31 +428,31 @@ class HistEvaluator {
|
||||
auto Evaluator() const { return tree_evaluator_.GetEvaluator(); }
|
||||
auto const& Stats() const { return snode_; }
|
||||
|
||||
float InitRoot(GradStats const& root_sum) {
|
||||
float InitRoot(GradStats const &root_sum) {
|
||||
snode_.resize(1);
|
||||
auto root_evaluator = tree_evaluator_.GetEvaluator();
|
||||
|
||||
snode_[0].stats = GradStats{root_sum.GetGrad(), root_sum.GetHess()};
|
||||
snode_[0].root_gain = root_evaluator.CalcGain(RegTree::kRoot, param_,
|
||||
GradStats{snode_[0].stats});
|
||||
auto weight = root_evaluator.CalcWeight(RegTree::kRoot, param_,
|
||||
GradStats{snode_[0].stats});
|
||||
snode_[0].root_gain =
|
||||
root_evaluator.CalcGain(RegTree::kRoot, *param_, GradStats{snode_[0].stats});
|
||||
auto weight = root_evaluator.CalcWeight(RegTree::kRoot, *param_, GradStats{snode_[0].stats});
|
||||
return weight;
|
||||
}
|
||||
|
||||
public:
|
||||
// The column sampler must be constructed by caller since we need to preserve the rng
|
||||
// for the entire training session.
|
||||
explicit HistEvaluator(Context const* ctx, TrainParam const ¶m, MetaInfo const &info,
|
||||
explicit HistEvaluator(Context const *ctx, TrainParam const *param, MetaInfo const &info,
|
||||
std::shared_ptr<common::ColumnSampler> sampler)
|
||||
: ctx_{ctx}, param_{param},
|
||||
: ctx_{ctx},
|
||||
param_{param},
|
||||
column_sampler_{std::move(sampler)},
|
||||
tree_evaluator_{param, static_cast<bst_feature_t>(info.num_col_), Context::kCpuId},
|
||||
tree_evaluator_{*param, static_cast<bst_feature_t>(info.num_col_), Context::kCpuId},
|
||||
is_col_split_{info.data_split_mode == DataSplitMode::kCol} {
|
||||
interaction_constraints_.Configure(param, info.num_col_);
|
||||
interaction_constraints_.Configure(*param, info.num_col_);
|
||||
column_sampler_->Init(ctx, info.num_col_, info.feature_weights.HostVector(),
|
||||
param_.colsample_bynode, param_.colsample_bylevel,
|
||||
param_.colsample_bytree);
|
||||
param_->colsample_bynode, param_->colsample_bylevel,
|
||||
param_->colsample_bytree);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -488,6 +487,5 @@ void UpdatePredictionCacheImpl(Context const *ctx, RegTree const *p_last_tree,
|
||||
});
|
||||
}
|
||||
}
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
#endif // XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_
|
||||
|
||||
@@ -23,8 +23,7 @@
|
||||
#include "xgboost/tree_model.h"
|
||||
#include "xgboost/tree_updater.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace xgboost::tree {
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(updater_approx);
|
||||
|
||||
@@ -41,7 +40,7 @@ auto BatchSpec(TrainParam const &p, common::Span<float> hess) {
|
||||
|
||||
class GloablApproxBuilder {
|
||||
protected:
|
||||
TrainParam param_;
|
||||
TrainParam const* param_;
|
||||
std::shared_ptr<common::ColumnSampler> col_sampler_;
|
||||
HistEvaluator<CPUExpandEntry> evaluator_;
|
||||
HistogramBuilder<CPUExpandEntry> histogram_builder_;
|
||||
@@ -64,7 +63,7 @@ class GloablApproxBuilder {
|
||||
bst_bin_t n_total_bins = 0;
|
||||
partitioner_.clear();
|
||||
// Generating the GHistIndexMatrix is quite slow, is there a way to speed it up?
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(param_, hess, task_))) {
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess, task_))) {
|
||||
if (n_total_bins == 0) {
|
||||
n_total_bins = page.cut.TotalBins();
|
||||
feature_values_ = page.cut;
|
||||
@@ -75,7 +74,7 @@ class GloablApproxBuilder {
|
||||
n_batches_++;
|
||||
}
|
||||
|
||||
histogram_builder_.Reset(n_total_bins, BatchSpec(param_, hess), ctx_->Threads(), n_batches_,
|
||||
histogram_builder_.Reset(n_total_bins, BatchSpec(*param_, hess), ctx_->Threads(), n_batches_,
|
||||
collective::IsDistributed(), p_fmat->IsColumnSplit());
|
||||
monitor_->Stop(__func__);
|
||||
}
|
||||
@@ -96,7 +95,7 @@ class GloablApproxBuilder {
|
||||
std::vector<CPUExpandEntry> nodes{best};
|
||||
size_t i = 0;
|
||||
auto space = ConstructHistSpace(partitioner_, nodes);
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(param_, hess))) {
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess))) {
|
||||
histogram_builder_.BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(), nodes,
|
||||
{}, gpair);
|
||||
i++;
|
||||
@@ -105,7 +104,7 @@ class GloablApproxBuilder {
|
||||
auto weight = evaluator_.InitRoot(root_sum);
|
||||
p_tree->Stat(RegTree::kRoot).sum_hess = root_sum.GetHess();
|
||||
p_tree->Stat(RegTree::kRoot).base_weight = weight;
|
||||
(*p_tree)[RegTree::kRoot].SetLeaf(param_.learning_rate * weight);
|
||||
(*p_tree)[RegTree::kRoot].SetLeaf(param_->learning_rate * weight);
|
||||
|
||||
auto const &histograms = histogram_builder_.Histogram();
|
||||
auto ft = p_fmat->Info().feature_types.ConstHostSpan();
|
||||
@@ -147,7 +146,7 @@ class GloablApproxBuilder {
|
||||
|
||||
size_t i = 0;
|
||||
auto space = ConstructHistSpace(partitioner_, nodes_to_build);
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(param_, hess))) {
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess))) {
|
||||
histogram_builder_.BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(),
|
||||
nodes_to_build, nodes_to_sub, gpair);
|
||||
i++;
|
||||
@@ -168,10 +167,10 @@ class GloablApproxBuilder {
|
||||
}
|
||||
|
||||
public:
|
||||
explicit GloablApproxBuilder(TrainParam param, MetaInfo const &info, Context const *ctx,
|
||||
explicit GloablApproxBuilder(TrainParam const *param, MetaInfo const &info, Context const *ctx,
|
||||
std::shared_ptr<common::ColumnSampler> column_sampler, ObjInfo task,
|
||||
common::Monitor *monitor)
|
||||
: param_{std::move(param)},
|
||||
: param_{param},
|
||||
col_sampler_{std::move(column_sampler)},
|
||||
evaluator_{ctx, param_, info, col_sampler_},
|
||||
ctx_{ctx},
|
||||
@@ -183,7 +182,7 @@ class GloablApproxBuilder {
|
||||
p_last_tree_ = p_tree;
|
||||
this->InitData(p_fmat, hess);
|
||||
|
||||
Driver<CPUExpandEntry> driver(param_);
|
||||
Driver<CPUExpandEntry> driver(*param_);
|
||||
auto &tree = *p_tree;
|
||||
driver.Push({this->InitRoot(p_fmat, gpair, hess, p_tree)});
|
||||
auto expand_set = driver.Pop();
|
||||
@@ -213,7 +212,7 @@ class GloablApproxBuilder {
|
||||
|
||||
monitor_->Start("UpdatePosition");
|
||||
size_t page_id = 0;
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(param_, hess))) {
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess))) {
|
||||
partitioner_.at(page_id).UpdatePosition(ctx_, page, applied, p_tree);
|
||||
page_id++;
|
||||
}
|
||||
@@ -250,7 +249,6 @@ class GloablApproxBuilder {
|
||||
* iteration.
|
||||
*/
|
||||
class GlobalApproxUpdater : public TreeUpdater {
|
||||
TrainParam param_;
|
||||
common::Monitor monitor_;
|
||||
// specializations for different histogram precision.
|
||||
std::unique_ptr<GloablApproxBuilder> pimpl_;
|
||||
@@ -265,15 +263,9 @@ class GlobalApproxUpdater : public TreeUpdater {
|
||||
monitor_.Init(__func__);
|
||||
}
|
||||
|
||||
void Configure(const Args &args) override { param_.UpdateAllowUnknown(args); }
|
||||
void LoadConfig(Json const &in) override {
|
||||
auto const &config = get<Object const>(in);
|
||||
FromJson(config.at("train_param"), &this->param_);
|
||||
}
|
||||
void SaveConfig(Json *p_out) const override {
|
||||
auto &out = *p_out;
|
||||
out["train_param"] = ToJson(param_);
|
||||
}
|
||||
void Configure(Args const &) override {}
|
||||
void LoadConfig(Json const &) override {}
|
||||
void SaveConfig(Json *) const override {}
|
||||
|
||||
void InitData(TrainParam const ¶m, HostDeviceVector<GradientPair> const *gpair,
|
||||
linalg::Matrix<GradientPair> *sampled) {
|
||||
@@ -283,20 +275,17 @@ class GlobalApproxUpdater : public TreeUpdater {
|
||||
SampleGradient(ctx_, param, sampled->HostView());
|
||||
}
|
||||
|
||||
char const *Name() const override { return "grow_histmaker"; }
|
||||
[[nodiscard]] char const *Name() const override { return "grow_histmaker"; }
|
||||
|
||||
void Update(HostDeviceVector<GradientPair> *gpair, DMatrix *m,
|
||||
void Update(TrainParam const *param, HostDeviceVector<GradientPair> *gpair, DMatrix *m,
|
||||
common::Span<HostDeviceVector<bst_node_t>> out_position,
|
||||
const std::vector<RegTree *> &trees) override {
|
||||
float lr = param_.learning_rate;
|
||||
param_.learning_rate = lr / trees.size();
|
||||
|
||||
pimpl_ = std::make_unique<GloablApproxBuilder>(param_, m->Info(), ctx_, column_sampler_, task_,
|
||||
pimpl_ = std::make_unique<GloablApproxBuilder>(param, m->Info(), ctx_, column_sampler_, task_,
|
||||
&monitor_);
|
||||
|
||||
linalg::Matrix<GradientPair> h_gpair;
|
||||
// Obtain the hessian values for weighted sketching
|
||||
InitData(param_, gpair, &h_gpair);
|
||||
InitData(*param, gpair, &h_gpair);
|
||||
std::vector<float> hess(h_gpair.Size());
|
||||
auto const &s_gpair = h_gpair.Data()->ConstHostVector();
|
||||
std::transform(s_gpair.begin(), s_gpair.end(), hess.begin(),
|
||||
@@ -304,12 +293,11 @@ class GlobalApproxUpdater : public TreeUpdater {
|
||||
|
||||
cached_ = m;
|
||||
|
||||
size_t t_idx = 0;
|
||||
std::size_t t_idx = 0;
|
||||
for (auto p_tree : trees) {
|
||||
this->pimpl_->UpdateTree(m, s_gpair, hess, p_tree, &out_position[t_idx]);
|
||||
++t_idx;
|
||||
}
|
||||
param_.learning_rate = lr;
|
||||
}
|
||||
|
||||
bool UpdatePredictionCache(const DMatrix *data, linalg::VectorView<float> out_preds) override {
|
||||
@@ -320,7 +308,7 @@ class GlobalApproxUpdater : public TreeUpdater {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HasNodePosition() const override { return true; }
|
||||
[[nodiscard]] bool HasNodePosition() const override { return true; }
|
||||
};
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(grow_histmaker);
|
||||
@@ -330,5 +318,4 @@ XGBOOST_REGISTER_TREE_UPDATER(GlobalHistMaker, "grow_histmaker")
|
||||
"Tree constructor that uses approximate histogram construction "
|
||||
"for each node.")
|
||||
.set_body([](Context const *ctx, ObjInfo task) { return new GlobalApproxUpdater(ctx, task); });
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2014-2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2014-2023 by XGBoost Contributors
|
||||
* \file updater_colmaker.cc
|
||||
* \brief use columnwise update to construct a tree
|
||||
* \author Tianqi Chen
|
||||
@@ -17,8 +17,7 @@
|
||||
#include "../common/random.h"
|
||||
#include "split_evaluator.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace xgboost::tree {
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(updater_colmaker);
|
||||
|
||||
@@ -57,18 +56,15 @@ class ColMaker: public TreeUpdater {
|
||||
public:
|
||||
explicit ColMaker(Context const *ctx) : TreeUpdater(ctx) {}
|
||||
void Configure(const Args &args) override {
|
||||
param_.UpdateAllowUnknown(args);
|
||||
colmaker_param_.UpdateAllowUnknown(args);
|
||||
}
|
||||
|
||||
void LoadConfig(Json const& in) override {
|
||||
auto const& config = get<Object const>(in);
|
||||
FromJson(config.at("train_param"), &this->param_);
|
||||
FromJson(config.at("colmaker_train_param"), &this->colmaker_param_);
|
||||
}
|
||||
void SaveConfig(Json* p_out) const override {
|
||||
auto& out = *p_out;
|
||||
out["train_param"] = ToJson(param_);
|
||||
void SaveConfig(Json *p_out) const override {
|
||||
auto &out = *p_out;
|
||||
out["colmaker_train_param"] = ToJson(colmaker_param_);
|
||||
}
|
||||
|
||||
@@ -95,7 +91,7 @@ class ColMaker: public TreeUpdater {
|
||||
}
|
||||
}
|
||||
|
||||
void Update(HostDeviceVector<GradientPair> *gpair, DMatrix *dmat,
|
||||
void Update(TrainParam const *param, HostDeviceVector<GradientPair> *gpair, DMatrix *dmat,
|
||||
common::Span<HostDeviceVector<bst_node_t>> /*out_position*/,
|
||||
const std::vector<RegTree *> &trees) override {
|
||||
if (collective::IsDistributed()) {
|
||||
@@ -108,22 +104,16 @@ class ColMaker: public TreeUpdater {
|
||||
}
|
||||
this->LazyGetColumnDensity(dmat);
|
||||
// rescale learning rate according to size of trees
|
||||
float lr = param_.learning_rate;
|
||||
param_.learning_rate = lr / trees.size();
|
||||
interaction_constraints_.Configure(param_, dmat->Info().num_row_);
|
||||
interaction_constraints_.Configure(*param, dmat->Info().num_row_);
|
||||
// build tree
|
||||
for (auto tree : trees) {
|
||||
CHECK(ctx_);
|
||||
Builder builder(param_, colmaker_param_, interaction_constraints_, ctx_,
|
||||
column_densities_);
|
||||
Builder builder(*param, colmaker_param_, interaction_constraints_, ctx_, column_densities_);
|
||||
builder.Update(gpair->ConstHostVector(), dmat, tree);
|
||||
}
|
||||
param_.learning_rate = lr;
|
||||
}
|
||||
|
||||
protected:
|
||||
// training parameter
|
||||
TrainParam param_;
|
||||
ColMakerTrainParam colmaker_param_;
|
||||
// SplitEvaluator that will be cloned for each Builder
|
||||
std::vector<float> column_densities_;
|
||||
@@ -614,5 +604,4 @@ class ColMaker: public TreeUpdater {
|
||||
XGBOOST_REGISTER_TREE_UPDATER(ColMaker, "grow_colmaker")
|
||||
.describe("Grow tree with parallelization over columns.")
|
||||
.set_body([](Context const *ctx, ObjInfo) { return new ColMaker(ctx); });
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2017-2022 XGBoost contributors
|
||||
/**
|
||||
* Copyright 2017-2023 by XGBoost contributors
|
||||
*/
|
||||
#include <thrust/copy.h>
|
||||
#include <thrust/reduce.h>
|
||||
@@ -756,7 +756,6 @@ class GPUHistMaker : public TreeUpdater {
|
||||
void Configure(const Args& args) override {
|
||||
// Used in test to count how many configurations are performed
|
||||
LOG(DEBUG) << "[GPU Hist]: Configure";
|
||||
param_.UpdateAllowUnknown(args);
|
||||
hist_maker_param_.UpdateAllowUnknown(args);
|
||||
dh::CheckComputeCapability();
|
||||
initialised_ = false;
|
||||
@@ -768,32 +767,26 @@ class GPUHistMaker : public TreeUpdater {
|
||||
auto const& config = get<Object const>(in);
|
||||
FromJson(config.at("gpu_hist_train_param"), &this->hist_maker_param_);
|
||||
initialised_ = false;
|
||||
FromJson(config.at("train_param"), ¶m_);
|
||||
}
|
||||
void SaveConfig(Json* p_out) const override {
|
||||
auto& out = *p_out;
|
||||
out["gpu_hist_train_param"] = ToJson(hist_maker_param_);
|
||||
out["train_param"] = ToJson(param_);
|
||||
}
|
||||
|
||||
~GPUHistMaker() { // NOLINT
|
||||
dh::GlobalMemoryLogger().Log();
|
||||
}
|
||||
|
||||
void Update(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
||||
void Update(TrainParam const* param, HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
||||
common::Span<HostDeviceVector<bst_node_t>> out_position,
|
||||
const std::vector<RegTree*>& trees) override {
|
||||
monitor_.Start("Update");
|
||||
|
||||
// rescale learning rate according to size of trees
|
||||
float lr = param_.learning_rate;
|
||||
param_.learning_rate = lr / trees.size();
|
||||
|
||||
// build tree
|
||||
try {
|
||||
size_t t_idx{0};
|
||||
for (xgboost::RegTree* tree : trees) {
|
||||
this->UpdateTree(gpair, dmat, tree, &out_position[t_idx]);
|
||||
this->UpdateTree(param, gpair, dmat, tree, &out_position[t_idx]);
|
||||
|
||||
if (hist_maker_param_.debug_synchronize) {
|
||||
this->CheckTreesSynchronized(tree);
|
||||
@@ -804,12 +797,10 @@ class GPUHistMaker : public TreeUpdater {
|
||||
} catch (const std::exception& e) {
|
||||
LOG(FATAL) << "Exception in gpu_hist: " << e.what() << std::endl;
|
||||
}
|
||||
|
||||
param_.learning_rate = lr;
|
||||
monitor_.Stop("Update");
|
||||
}
|
||||
|
||||
void InitDataOnce(DMatrix* dmat) {
|
||||
void InitDataOnce(TrainParam const* param, DMatrix* dmat) {
|
||||
CHECK_GE(ctx_->gpu_id, 0) << "Must have at least one device";
|
||||
info_ = &dmat->Info();
|
||||
|
||||
@@ -818,24 +809,24 @@ class GPUHistMaker : public TreeUpdater {
|
||||
collective::Broadcast(&column_sampling_seed, sizeof(column_sampling_seed), 0);
|
||||
|
||||
BatchParam batch_param{
|
||||
ctx_->gpu_id,
|
||||
param_.max_bin,
|
||||
ctx_->gpu_id,
|
||||
param->max_bin,
|
||||
};
|
||||
auto page = (*dmat->GetBatches<EllpackPage>(batch_param).begin()).Impl();
|
||||
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
|
||||
info_->feature_types.SetDevice(ctx_->gpu_id);
|
||||
maker.reset(new GPUHistMakerDevice<GradientSumT>(
|
||||
ctx_, page, info_->feature_types.ConstDeviceSpan(), info_->num_row_, param_,
|
||||
ctx_, page, info_->feature_types.ConstDeviceSpan(), info_->num_row_, *param,
|
||||
column_sampling_seed, info_->num_col_, batch_param));
|
||||
|
||||
p_last_fmat_ = dmat;
|
||||
initialised_ = true;
|
||||
}
|
||||
|
||||
void InitData(DMatrix* dmat, RegTree const* p_tree) {
|
||||
void InitData(TrainParam const* param, DMatrix* dmat, RegTree const* p_tree) {
|
||||
if (!initialised_) {
|
||||
monitor_.Start("InitDataOnce");
|
||||
this->InitDataOnce(dmat);
|
||||
this->InitDataOnce(param, dmat);
|
||||
monitor_.Stop("InitDataOnce");
|
||||
}
|
||||
p_last_tree_ = p_tree;
|
||||
@@ -856,10 +847,10 @@ class GPUHistMaker : public TreeUpdater {
|
||||
CHECK(*local_tree == reference_tree);
|
||||
}
|
||||
|
||||
void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat, RegTree* p_tree,
|
||||
HostDeviceVector<bst_node_t>* p_out_position) {
|
||||
void UpdateTree(TrainParam const* param, HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat,
|
||||
RegTree* p_tree, HostDeviceVector<bst_node_t>* p_out_position) {
|
||||
monitor_.Start("InitData");
|
||||
this->InitData(p_fmat, p_tree);
|
||||
this->InitData(param, p_fmat, p_tree);
|
||||
monitor_.Stop("InitData");
|
||||
|
||||
gpair->SetDevice(ctx_->gpu_id);
|
||||
@@ -878,7 +869,6 @@ class GPUHistMaker : public TreeUpdater {
|
||||
return result;
|
||||
}
|
||||
|
||||
TrainParam param_; // NOLINT
|
||||
MetaInfo* info_{}; // NOLINT
|
||||
|
||||
std::unique_ptr<GPUHistMakerDevice<GradientSumT>> maker; // NOLINT
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2014-2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2014-2023 by XGBoost Contributors
|
||||
* \file updater_prune.cc
|
||||
* \brief prune a tree given the statistics
|
||||
* \author Tianqi Chen
|
||||
@@ -8,13 +8,11 @@
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "../common/timer.h"
|
||||
#include "./param.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/json.h"
|
||||
#include "./param.h"
|
||||
#include "../common/timer.h"
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
|
||||
namespace xgboost::tree {
|
||||
DMLC_REGISTRY_FILE_TAG(updater_prune);
|
||||
|
||||
/*! \brief pruner that prunes a tree after growing finishes */
|
||||
@@ -24,47 +22,31 @@ class TreePruner : public TreeUpdater {
|
||||
syncher_.reset(TreeUpdater::Create("sync", ctx_, task));
|
||||
pruner_monitor_.Init("TreePruner");
|
||||
}
|
||||
char const* Name() const override {
|
||||
return "prune";
|
||||
}
|
||||
|
||||
[[nodiscard]] char const* Name() const override { return "prune"; }
|
||||
// set training parameter
|
||||
void Configure(const Args& args) override {
|
||||
param_.UpdateAllowUnknown(args);
|
||||
syncher_->Configure(args);
|
||||
}
|
||||
void Configure(const Args& args) override { syncher_->Configure(args); }
|
||||
|
||||
void LoadConfig(Json const& in) override {
|
||||
auto const& config = get<Object const>(in);
|
||||
FromJson(config.at("train_param"), &this->param_);
|
||||
}
|
||||
void SaveConfig(Json* p_out) const override {
|
||||
auto& out = *p_out;
|
||||
out["train_param"] = ToJson(param_);
|
||||
}
|
||||
bool CanModifyTree() const override {
|
||||
return true;
|
||||
}
|
||||
void LoadConfig(Json const&) override {}
|
||||
void SaveConfig(Json*) const override {}
|
||||
[[nodiscard]] bool CanModifyTree() const override { return true; }
|
||||
|
||||
// update the tree, do pruning
|
||||
void Update(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat,
|
||||
void Update(TrainParam const* param, HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat,
|
||||
common::Span<HostDeviceVector<bst_node_t>> out_position,
|
||||
const std::vector<RegTree*>& trees) override {
|
||||
pruner_monitor_.Start("PrunerUpdate");
|
||||
// rescale learning rate according to size of trees
|
||||
float lr = param_.learning_rate;
|
||||
param_.learning_rate = lr / trees.size();
|
||||
for (auto tree : trees) {
|
||||
this->DoPrune(tree);
|
||||
this->DoPrune(param, tree);
|
||||
}
|
||||
param_.learning_rate = lr;
|
||||
syncher_->Update(gpair, p_fmat, out_position, trees);
|
||||
syncher_->Update(param, gpair, p_fmat, out_position, trees);
|
||||
pruner_monitor_.Stop("PrunerUpdate");
|
||||
}
|
||||
|
||||
private:
|
||||
// try to prune off current leaf
|
||||
bst_node_t TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) { // NOLINT(*)
|
||||
bst_node_t TryPruneLeaf(TrainParam const* param, RegTree* p_tree, int nid, int depth,
|
||||
int npruned) {
|
||||
auto& tree = *p_tree;
|
||||
CHECK(tree[nid].IsLeaf());
|
||||
if (tree[nid].IsRoot()) {
|
||||
return npruned;
|
||||
@@ -77,22 +59,22 @@ class TreePruner : public TreeUpdater {
|
||||
auto right = tree[pid].RightChild();
|
||||
bool balanced = tree[left].IsLeaf() &&
|
||||
right != RegTree::kInvalidNodeId && tree[right].IsLeaf();
|
||||
if (balanced && param_.NeedPrune(s.loss_chg, depth)) {
|
||||
if (balanced && param->NeedPrune(s.loss_chg, depth)) {
|
||||
// need to be pruned
|
||||
tree.ChangeToLeaf(pid, param_.learning_rate * s.base_weight);
|
||||
tree.ChangeToLeaf(pid, param->learning_rate * s.base_weight);
|
||||
// tail recursion
|
||||
return this->TryPruneLeaf(tree, pid, depth - 1, npruned + 2);
|
||||
return this->TryPruneLeaf(param, p_tree, pid, depth - 1, npruned + 2);
|
||||
} else {
|
||||
return npruned;
|
||||
}
|
||||
}
|
||||
/*! \brief do pruning of a tree */
|
||||
void DoPrune(RegTree* p_tree) {
|
||||
void DoPrune(TrainParam const* param, RegTree* p_tree) {
|
||||
auto& tree = *p_tree;
|
||||
bst_node_t npruned = 0;
|
||||
for (int nid = 0; nid < tree.param.num_nodes; ++nid) {
|
||||
if (tree[nid].IsLeaf() && !tree[nid].IsDeleted()) {
|
||||
npruned = this->TryPruneLeaf(tree, nid, tree.GetDepth(nid), npruned);
|
||||
npruned = this->TryPruneLeaf(param, p_tree, nid, tree.GetDepth(nid), npruned);
|
||||
}
|
||||
}
|
||||
LOG(INFO) << "tree pruning end, "
|
||||
@@ -103,13 +85,10 @@ class TreePruner : public TreeUpdater {
|
||||
private:
|
||||
// synchronizer
|
||||
std::unique_ptr<TreeUpdater> syncher_;
|
||||
// training parameter
|
||||
TrainParam param_;
|
||||
common::Monitor pruner_monitor_;
|
||||
};
|
||||
|
||||
XGBOOST_REGISTER_TREE_UPDATER(TreePruner, "prune")
|
||||
.describe("Pruner that prune the tree according to statistics.")
|
||||
.set_body([](Context const* ctx, ObjInfo task) { return new TreePruner(ctx, task); });
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -28,21 +28,14 @@ namespace tree {
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(updater_quantile_hist);
|
||||
|
||||
void QuantileHistMaker::Configure(const Args &args) {
|
||||
param_.UpdateAllowUnknown(args);
|
||||
}
|
||||
|
||||
void QuantileHistMaker::Update(HostDeviceVector<GradientPair> *gpair, DMatrix *dmat,
|
||||
void QuantileHistMaker::Update(TrainParam const *param, HostDeviceVector<GradientPair> *gpair,
|
||||
DMatrix *dmat,
|
||||
common::Span<HostDeviceVector<bst_node_t>> out_position,
|
||||
const std::vector<RegTree *> &trees) {
|
||||
// rescale learning rate according to size of trees
|
||||
float lr = param_.learning_rate;
|
||||
param_.learning_rate = lr / trees.size();
|
||||
|
||||
// build tree
|
||||
const size_t n_trees = trees.size();
|
||||
if (!pimpl_) {
|
||||
pimpl_.reset(new Builder(n_trees, param_, dmat, task_, ctx_));
|
||||
pimpl_.reset(new Builder(n_trees, param, dmat, task_, ctx_));
|
||||
}
|
||||
|
||||
size_t t_idx{0};
|
||||
@@ -51,8 +44,6 @@ void QuantileHistMaker::Update(HostDeviceVector<GradientPair> *gpair, DMatrix *d
|
||||
this->pimpl_->UpdateTree(gpair, dmat, p_tree, &t_row_position);
|
||||
++t_idx;
|
||||
}
|
||||
|
||||
param_.learning_rate = lr;
|
||||
}
|
||||
|
||||
bool QuantileHistMaker::UpdatePredictionCache(const DMatrix *data,
|
||||
@@ -107,7 +98,7 @@ CPUExpandEntry QuantileHistMaker::Builder::InitRoot(
|
||||
auto weight = evaluator_->InitRoot(GradStats{grad_stat});
|
||||
p_tree->Stat(RegTree::kRoot).sum_hess = grad_stat.GetHess();
|
||||
p_tree->Stat(RegTree::kRoot).base_weight = weight;
|
||||
(*p_tree)[RegTree::kRoot].SetLeaf(param_.learning_rate * weight);
|
||||
(*p_tree)[RegTree::kRoot].SetLeaf(param_->learning_rate * weight);
|
||||
|
||||
std::vector<CPUExpandEntry> entries{node};
|
||||
monitor_->Start("EvaluateSplits");
|
||||
@@ -173,7 +164,7 @@ void QuantileHistMaker::Builder::ExpandTree(DMatrix *p_fmat, RegTree *p_tree,
|
||||
HostDeviceVector<bst_node_t> *p_out_position) {
|
||||
monitor_->Start(__func__);
|
||||
|
||||
Driver<CPUExpandEntry> driver(param_);
|
||||
Driver<CPUExpandEntry> driver(*param_);
|
||||
driver.Push(this->InitRoot(p_fmat, p_tree, gpair_h));
|
||||
auto const &tree = *p_tree;
|
||||
auto expand_set = driver.Pop();
|
||||
@@ -285,7 +276,7 @@ void QuantileHistMaker::Builder::InitData(DMatrix *fmat, const RegTree &tree,
|
||||
|
||||
auto m_gpair =
|
||||
linalg::MakeTensorView(*gpair, {gpair->size(), static_cast<std::size_t>(1)}, ctx_->gpu_id);
|
||||
SampleGradient(ctx_, param_, m_gpair);
|
||||
SampleGradient(ctx_, *param_, m_gpair);
|
||||
}
|
||||
|
||||
// store a pointer to the tree
|
||||
|
||||
@@ -35,49 +35,36 @@
|
||||
#include "../common/partition_builder.h"
|
||||
#include "../common/column_matrix.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
inline BatchParam HistBatch(TrainParam const& param) {
|
||||
return {param.max_bin, param.sparse_threshold};
|
||||
namespace xgboost::tree {
|
||||
inline BatchParam HistBatch(TrainParam const* param) {
|
||||
return {param->max_bin, param->sparse_threshold};
|
||||
}
|
||||
|
||||
/*! \brief construct a tree using quantized feature values */
|
||||
class QuantileHistMaker: public TreeUpdater {
|
||||
public:
|
||||
explicit QuantileHistMaker(Context const* ctx, ObjInfo task) : TreeUpdater(ctx), task_{task} {}
|
||||
void Configure(const Args& args) override;
|
||||
void Configure(const Args&) override {}
|
||||
|
||||
void Update(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
||||
void Update(TrainParam const* param, HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
|
||||
common::Span<HostDeviceVector<bst_node_t>> out_position,
|
||||
const std::vector<RegTree*>& trees) override;
|
||||
|
||||
bool UpdatePredictionCache(const DMatrix *data,
|
||||
linalg::VectorView<float> out_preds) override;
|
||||
|
||||
void LoadConfig(Json const& in) override {
|
||||
auto const& config = get<Object const>(in);
|
||||
FromJson(config.at("train_param"), &this->param_);
|
||||
}
|
||||
void SaveConfig(Json* p_out) const override {
|
||||
auto& out = *p_out;
|
||||
out["train_param"] = ToJson(param_);
|
||||
}
|
||||
void LoadConfig(Json const&) override {}
|
||||
void SaveConfig(Json*) const override {}
|
||||
|
||||
char const* Name() const override {
|
||||
return "grow_quantile_histmaker";
|
||||
}
|
||||
|
||||
bool HasNodePosition() const override { return true; }
|
||||
[[nodiscard]] char const* Name() const override { return "grow_quantile_histmaker"; }
|
||||
[[nodiscard]] bool HasNodePosition() const override { return true; }
|
||||
|
||||
protected:
|
||||
// training parameter
|
||||
TrainParam param_;
|
||||
|
||||
// actual builder that runs the algorithm
|
||||
struct Builder {
|
||||
public:
|
||||
// constructor
|
||||
explicit Builder(const size_t n_trees, const TrainParam& param, DMatrix const* fmat,
|
||||
explicit Builder(const size_t n_trees, TrainParam const* param, DMatrix const* fmat,
|
||||
ObjInfo task, Context const* ctx)
|
||||
: n_trees_(n_trees),
|
||||
param_(param),
|
||||
@@ -115,7 +102,7 @@ class QuantileHistMaker: public TreeUpdater {
|
||||
|
||||
private:
|
||||
const size_t n_trees_;
|
||||
const TrainParam& param_;
|
||||
TrainParam const* param_;
|
||||
std::shared_ptr<common::ColumnSampler> column_sampler_{
|
||||
std::make_shared<common::ColumnSampler>()};
|
||||
|
||||
@@ -140,7 +127,6 @@ class QuantileHistMaker: public TreeUpdater {
|
||||
std::unique_ptr<Builder> pimpl_;
|
||||
ObjInfo task_;
|
||||
};
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
#endif // XGBOOST_TREE_UPDATER_QUANTILE_HIST_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2014-2022 by XGBoost Contributors
|
||||
/**
|
||||
* Copyright 2014-2023 by XGBoost Contributors
|
||||
* \file updater_refresh.cc
|
||||
* \brief refresh the statistics and leaf value on the tree on the dataset
|
||||
* \author Tianqi Chen
|
||||
@@ -16,8 +16,7 @@
|
||||
#include "./param.h"
|
||||
#include "xgboost/json.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace xgboost::tree {
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(updater_refresh);
|
||||
|
||||
@@ -25,23 +24,14 @@ DMLC_REGISTRY_FILE_TAG(updater_refresh);
|
||||
class TreeRefresher : public TreeUpdater {
|
||||
public:
|
||||
explicit TreeRefresher(Context const *ctx) : TreeUpdater(ctx) {}
|
||||
void Configure(const Args &args) override { param_.UpdateAllowUnknown(args); }
|
||||
void LoadConfig(Json const& in) override {
|
||||
auto const& config = get<Object const>(in);
|
||||
FromJson(config.at("train_param"), &this->param_);
|
||||
}
|
||||
void SaveConfig(Json* p_out) const override {
|
||||
auto& out = *p_out;
|
||||
out["train_param"] = ToJson(param_);
|
||||
}
|
||||
char const* Name() const override {
|
||||
return "refresh";
|
||||
}
|
||||
bool CanModifyTree() const override {
|
||||
return true;
|
||||
}
|
||||
void Configure(const Args &) override {}
|
||||
void LoadConfig(Json const &) override {}
|
||||
void SaveConfig(Json *) const override {}
|
||||
|
||||
[[nodiscard]] char const *Name() const override { return "refresh"; }
|
||||
[[nodiscard]] bool CanModifyTree() const override { return true; }
|
||||
// update the tree, do pruning
|
||||
void Update(HostDeviceVector<GradientPair> *gpair, DMatrix *p_fmat,
|
||||
void Update(TrainParam const *param, HostDeviceVector<GradientPair> *gpair, DMatrix *p_fmat,
|
||||
common::Span<HostDeviceVector<bst_node_t>> /*out_position*/,
|
||||
const std::vector<RegTree *> &trees) override {
|
||||
if (trees.size() == 0) return;
|
||||
@@ -103,16 +93,11 @@ class TreeRefresher : public TreeUpdater {
|
||||
lazy_get_stats();
|
||||
collective::Allreduce<collective::Operation::kSum>(&dmlc::BeginPtr(stemp[0])->sum_grad,
|
||||
stemp[0].size() * 2);
|
||||
// rescale learning rate according to size of trees
|
||||
float lr = param_.learning_rate;
|
||||
param_.learning_rate = lr / trees.size();
|
||||
int offset = 0;
|
||||
for (auto tree : trees) {
|
||||
this->Refresh(dmlc::BeginPtr(stemp[0]) + offset, 0, tree);
|
||||
this->Refresh(param, dmlc::BeginPtr(stemp[0]) + offset, 0, tree);
|
||||
offset += tree->param.num_nodes;
|
||||
}
|
||||
// set learning rate back
|
||||
param_.learning_rate = lr;
|
||||
}
|
||||
|
||||
private:
|
||||
@@ -135,31 +120,27 @@ class TreeRefresher : public TreeUpdater {
|
||||
gstats[pid].Add(gpair[ridx]);
|
||||
}
|
||||
}
|
||||
inline void Refresh(const GradStats *gstats,
|
||||
int nid, RegTree *p_tree) {
|
||||
inline void Refresh(TrainParam const *param, const GradStats *gstats, int nid, RegTree *p_tree) {
|
||||
RegTree &tree = *p_tree;
|
||||
tree.Stat(nid).base_weight =
|
||||
static_cast<bst_float>(CalcWeight(param_, gstats[nid]));
|
||||
static_cast<bst_float>(CalcWeight(*param, gstats[nid]));
|
||||
tree.Stat(nid).sum_hess = static_cast<bst_float>(gstats[nid].sum_hess);
|
||||
if (tree[nid].IsLeaf()) {
|
||||
if (param_.refresh_leaf) {
|
||||
tree[nid].SetLeaf(tree.Stat(nid).base_weight * param_.learning_rate);
|
||||
if (param->refresh_leaf) {
|
||||
tree[nid].SetLeaf(tree.Stat(nid).base_weight * param->learning_rate);
|
||||
}
|
||||
} else {
|
||||
tree.Stat(nid).loss_chg = static_cast<bst_float>(
|
||||
xgboost::tree::CalcGain(param_, gstats[tree[nid].LeftChild()]) +
|
||||
xgboost::tree::CalcGain(param_, gstats[tree[nid].RightChild()]) -
|
||||
xgboost::tree::CalcGain(param_, gstats[nid]));
|
||||
this->Refresh(gstats, tree[nid].LeftChild(), p_tree);
|
||||
this->Refresh(gstats, tree[nid].RightChild(), p_tree);
|
||||
tree.Stat(nid).loss_chg =
|
||||
static_cast<bst_float>(xgboost::tree::CalcGain(*param, gstats[tree[nid].LeftChild()]) +
|
||||
xgboost::tree::CalcGain(*param, gstats[tree[nid].RightChild()]) -
|
||||
xgboost::tree::CalcGain(*param, gstats[nid]));
|
||||
this->Refresh(param, gstats, tree[nid].LeftChild(), p_tree);
|
||||
this->Refresh(param, gstats, tree[nid].RightChild(), p_tree);
|
||||
}
|
||||
}
|
||||
// training parameter
|
||||
TrainParam param_;
|
||||
};
|
||||
|
||||
XGBOOST_REGISTER_TREE_UPDATER(TreeRefresher, "refresh")
|
||||
.describe("Refresher that refreshes the weight and statistics according to data.")
|
||||
.set_body([](Context const *ctx, ObjInfo) { return new TreeRefresher(ctx); });
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2014-2019 by Contributors
|
||||
/**
|
||||
* Copyright 2014-2013 by XBGoost Contributors
|
||||
* \file updater_sync.cc
|
||||
* \brief synchronize the tree in all distributed nodes
|
||||
*/
|
||||
@@ -13,8 +13,7 @@
|
||||
#include "../common/io.h"
|
||||
#include "xgboost/json.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace xgboost::tree {
|
||||
|
||||
DMLC_REGISTRY_FILE_TAG(updater_sync);
|
||||
|
||||
@@ -30,11 +29,9 @@ class TreeSyncher : public TreeUpdater {
|
||||
void LoadConfig(Json const&) override {}
|
||||
void SaveConfig(Json*) const override {}
|
||||
|
||||
char const* Name() const override {
|
||||
return "prune";
|
||||
}
|
||||
[[nodiscard]] char const* Name() const override { return "prune"; }
|
||||
|
||||
void Update(HostDeviceVector<GradientPair>*, DMatrix*,
|
||||
void Update(TrainParam const*, HostDeviceVector<GradientPair>*, DMatrix*,
|
||||
common::Span<HostDeviceVector<bst_node_t>> /*out_position*/,
|
||||
const std::vector<RegTree*>& trees) override {
|
||||
if (collective::GetWorldSize() == 1) return;
|
||||
@@ -57,5 +54,4 @@ class TreeSyncher : public TreeUpdater {
|
||||
XGBOOST_REGISTER_TREE_UPDATER(TreeSyncher, "sync")
|
||||
.describe("Syncher that synchronize the tree in all distributed nodes.")
|
||||
.set_body([](Context const* ctx, ObjInfo) { return new TreeSyncher(ctx); });
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost::tree
|
||||
|
||||
Reference in New Issue
Block a user