Merge branch 'master' into dev-hui

This commit is contained in:
amdsc21
2023-03-08 00:39:33 +01:00
221 changed files with 3122 additions and 1486 deletions

View File

@@ -9,6 +9,7 @@
#include <limits> // std::numeric_limits
#include <vector>
#include "../collective/communicator-inl.h"
#include "../common/numeric.h" // Iota
#include "../common/partition_builder.h"
#include "hist/expand_entry.h" // CPUExpandEntry
@@ -16,17 +17,73 @@
namespace xgboost {
namespace tree {
class CommonRowPartitioner {
static constexpr size_t kPartitionBlockSize = 2048;
common::PartitionBuilder<kPartitionBlockSize> partition_builder_;
common::RowSetCollection row_set_collection_;
static constexpr size_t kPartitionBlockSize = 2048;
class ColumnSplitHelper {
public:
ColumnSplitHelper() = default;
ColumnSplitHelper(bst_row_t num_row,
common::PartitionBuilder<kPartitionBlockSize>* partition_builder,
common::RowSetCollection* row_set_collection)
: partition_builder_{partition_builder}, row_set_collection_{row_set_collection} {
decision_storage_.resize(num_row);
decision_bits_ = BitVector(common::Span<BitVector::value_type>(decision_storage_));
missing_storage_.resize(num_row);
missing_bits_ = BitVector(common::Span<BitVector::value_type>(missing_storage_));
}
void Partition(common::BlockedSpace2d const& space, std::int32_t n_threads,
GHistIndexMatrix const& gmat, common::ColumnMatrix const& column_matrix,
std::vector<CPUExpandEntry> const& nodes, RegTree const* p_tree) {
// When data is split by column, we don't have all the feature values in the local worker, so
// we first collect all the decisions and whether the feature is missing into bit vectors.
std::fill(decision_storage_.begin(), decision_storage_.end(), 0);
std::fill(missing_storage_.begin(), missing_storage_.end(), 0);
common::ParallelFor2d(space, n_threads, [&](size_t node_in_set, common::Range1d r) {
const int32_t nid = nodes[node_in_set].nid;
partition_builder_->MaskRows(node_in_set, nodes, r, gmat, column_matrix, *p_tree,
(*row_set_collection_)[nid].begin, &decision_bits_,
&missing_bits_);
});
// Then aggregate the bit vectors across all the workers.
collective::Allreduce<collective::Operation::kBitwiseOR>(decision_storage_.data(),
decision_storage_.size());
collective::Allreduce<collective::Operation::kBitwiseAND>(missing_storage_.data(),
missing_storage_.size());
// Finally use the bit vectors to partition the rows.
common::ParallelFor2d(space, n_threads, [&](size_t node_in_set, common::Range1d r) {
size_t begin = r.begin();
const int32_t nid = nodes[node_in_set].nid;
const size_t task_id = partition_builder_->GetTaskIdx(node_in_set, begin);
partition_builder_->AllocateForTask(task_id);
partition_builder_->PartitionByMask(node_in_set, nodes, r, gmat, column_matrix, *p_tree,
(*row_set_collection_)[nid].begin, decision_bits_,
missing_bits_);
});
}
private:
using BitVector = RBitField8;
std::vector<BitVector::value_type> decision_storage_{};
BitVector decision_bits_{};
std::vector<BitVector::value_type> missing_storage_{};
BitVector missing_bits_{};
common::PartitionBuilder<kPartitionBlockSize>* partition_builder_;
common::RowSetCollection* row_set_collection_;
};
class CommonRowPartitioner {
public:
bst_row_t base_rowid = 0;
CommonRowPartitioner() = default;
CommonRowPartitioner(Context const* ctx, bst_row_t num_row, bst_row_t _base_rowid)
: base_rowid{_base_rowid} {
CommonRowPartitioner(Context const* ctx, bst_row_t num_row, bst_row_t _base_rowid,
bool is_col_split)
: base_rowid{_base_rowid}, is_col_split_{is_col_split} {
row_set_collection_.Clear();
std::vector<size_t>& row_indices = *row_set_collection_.Data();
row_indices.resize(num_row);
@@ -34,6 +91,10 @@ class CommonRowPartitioner {
std::size_t* p_row_indices = row_indices.data();
common::Iota(ctx, p_row_indices, p_row_indices + row_indices.size(), base_rowid);
row_set_collection_.Init();
if (is_col_split_) {
column_split_helper_ = ColumnSplitHelper{num_row, &partition_builder_, &row_set_collection_};
}
}
void FindSplitConditions(const std::vector<CPUExpandEntry>& nodes, const RegTree& tree,
@@ -156,16 +217,20 @@ class CommonRowPartitioner {
// 2.3 Split elements of row_set_collection_ to left and right child-nodes for each node
// Store results in intermediate buffers from partition_builder_
common::ParallelFor2d(space, ctx->Threads(), [&](size_t node_in_set, common::Range1d r) {
size_t begin = r.begin();
const int32_t nid = nodes[node_in_set].nid;
const size_t task_id = partition_builder_.GetTaskIdx(node_in_set, begin);
partition_builder_.AllocateForTask(task_id);
bst_bin_t split_cond = column_matrix.IsInitialized() ? split_conditions[node_in_set] : 0;
partition_builder_.template Partition<BinIdxType, any_missing, any_cat>(
node_in_set, nodes, r, split_cond, gmat, column_matrix, *p_tree,
row_set_collection_[nid].begin);
});
if (is_col_split_) {
column_split_helper_.Partition(space, ctx->Threads(), gmat, column_matrix, nodes, p_tree);
} else {
common::ParallelFor2d(space, ctx->Threads(), [&](size_t node_in_set, common::Range1d r) {
size_t begin = r.begin();
const int32_t nid = nodes[node_in_set].nid;
const size_t task_id = partition_builder_.GetTaskIdx(node_in_set, begin);
partition_builder_.AllocateForTask(task_id);
bst_bin_t split_cond = column_matrix.IsInitialized() ? split_conditions[node_in_set] : 0;
partition_builder_.template Partition<BinIdxType, any_missing, any_cat>(
node_in_set, nodes, r, split_cond, gmat, column_matrix, *p_tree,
row_set_collection_[nid].begin);
});
}
// 3. Compute offsets to copy blocks of row-indexes
// from partition_builder_ to row_set_collection_
@@ -205,6 +270,12 @@ class CommonRowPartitioner {
ctx, tree, this->Partitions(), p_out_position,
[&](size_t idx) -> bool { return gpair[idx].GetHess() - .0f == .0f; });
}
private:
common::PartitionBuilder<kPartitionBlockSize> partition_builder_;
common::RowSetCollection row_set_collection_;
bool is_col_split_;
ColumnSplitHelper column_split_helper_;
};
} // namespace tree

View File

@@ -97,7 +97,7 @@ class EvaluateSplitAgent {
idx += kBlockSize) {
local_sum += LoadGpair(node_histogram + idx);
}
local_sum = SumReduceT(temp_storage->sum_reduce).Sum(local_sum);
local_sum = SumReduceT(temp_storage->sum_reduce).Sum(local_sum); // NOLINT
// Broadcast result from thread 0
return {__shfl_sync(0xffffffff, local_sum.GetQuantisedGrad(), 0),
__shfl_sync(0xffffffff, local_sum.GetQuantisedHess(), 0)};
@@ -359,8 +359,8 @@ void GPUHistEvaluator::LaunchEvaluateSplits(
// One block for each feature
uint32_t constexpr kBlockThreads = 32;
dh::LaunchKernel{static_cast<uint32_t>(combined_num_features), kBlockThreads,
0}(
dh::LaunchKernel {static_cast<uint32_t>(combined_num_features), kBlockThreads,
0}(
EvaluateSplitsKernel<kBlockThreads>, max_active_features, d_inputs,
shared_inputs,
this->SortedIdx(d_inputs.size(), shared_inputs.feature_values.size()),

View File

@@ -1,15 +1,15 @@
/*!
* Copyright 2020-2021 by XGBoost Contributors
/**
* Copyright 2020-2023 by XGBoost Contributors
*/
#include <thrust/iterator/transform_iterator.h>
#include <thrust/reduce.h>
#include <algorithm>
#include <ctgmath>
#include <cstdint> // uint32_t
#include <limits>
#include "../../common/device_helpers.cuh"
#include "../../common/deterministic.cuh"
#include "../../common/device_helpers.cuh"
#include "../../data/ellpack_page.cuh"
#include "histogram.cuh"
#include "row_partitioner.cuh"
@@ -83,7 +83,8 @@ GradientQuantiser::GradientQuantiser(common::Span<GradientPair const> gpair) {
*/
to_floating_point_ =
histogram_rounding /
T(IntT(1) << (sizeof(typename GradientSumT::ValueT) * 8 - 2)); // keep 1 for sign bit
static_cast<T>(static_cast<IntT>(1)
<< (sizeof(typename GradientSumT::ValueT) * 8 - 2)); // keep 1 for sign bit
/**
* Factor for converting gradients from floating-point to fixed-point. For
* f64:
@@ -93,8 +94,8 @@ GradientQuantiser::GradientQuantiser(common::Span<GradientPair const> gpair) {
* rounding is calcuated as exp(m), see the rounding factor calcuation for
* details.
*/
to_fixed_point_ =
GradientSumT(T(1) / to_floating_point_.GetGrad(), T(1) / to_floating_point_.GetHess());
to_fixed_point_ = GradientSumT(static_cast<T>(1) / to_floating_point_.GetGrad(),
static_cast<T>(1) / to_floating_point_.GetHess());
}
@@ -153,7 +154,8 @@ class HistogramAgent {
d_gpair_(d_gpair) {}
__device__ void ProcessPartialTileShared(std::size_t offset) {
for (std::size_t idx = offset + threadIdx.x;
idx < min(offset + kBlockThreads * kItemsPerTile, n_elements_); idx += kBlockThreads) {
idx < std::min(offset + kBlockThreads * kItemsPerTile, n_elements_);
idx += kBlockThreads) {
int ridx = d_ridx_[idx / feature_stride_];
int gidx =
matrix_
@@ -295,11 +297,10 @@ void BuildGradientHistogram(CUDAContext const* ctx, EllpackDeviceAccessor const&
// Allocate number of blocks such that each block has about kMinItemsPerBlock work
// Up to a maximum where the device is saturated
grid_size =
min(grid_size,
unsigned(common::DivRoundUp(items_per_group, kMinItemsPerBlock)));
grid_size = std::min(grid_size, static_cast<std::uint32_t>(
common::DivRoundUp(items_per_group, kMinItemsPerBlock)));
dh::LaunchKernel{dim3(grid_size, num_groups), static_cast<uint32_t>(kBlockThreads), smem_size,
dh::LaunchKernel {dim3(grid_size, num_groups), static_cast<uint32_t>(kBlockThreads), smem_size,
ctx->Stream()} (kernel, matrix, feature_groups, d_ridx, histogram.data(),
gpair.data(), rounding);
};

View File

@@ -130,7 +130,7 @@ void SortPositionBatch(common::Span<const PerNodeData<OpDataT>> d_batch_info,
std::size_t item_idx;
AssignBatch(batch_info_itr, idx, &batch_idx, &item_idx);
auto op_res = op(ridx[item_idx], batch_info_itr[batch_idx].data);
return IndexFlagTuple{bst_uint(item_idx), op_res, batch_idx, op_res};
return IndexFlagTuple{static_cast<bst_uint>(item_idx), op_res, batch_idx, op_res};
});
size_t temp_bytes = 0;
if (tmp->empty()) {

View File

@@ -1,10 +1,11 @@
/*!
* Copyright 2021-2022 by XGBoost Contributors
/**
* Copyright 2021-2023 by XGBoost Contributors
*/
#ifndef XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_
#define XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_
#include <algorithm>
#include <cstddef> // for size_t
#include <limits>
#include <memory>
#include <numeric>
@@ -16,13 +17,11 @@
#include "../../common/random.h"
#include "../../data/gradient_index.h"
#include "../constraints.h"
#include "../param.h"
#include "../param.h" // for TrainParam
#include "../split_evaluator.h"
#include "xgboost/context.h"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
template <typename ExpandEntry>
class HistEvaluator {
private:
@@ -34,10 +33,11 @@ class HistEvaluator {
};
private:
TrainParam param_;
Context const* ctx_;
TrainParam const* param_;
std::shared_ptr<common::ColumnSampler> column_sampler_;
TreeEvaluator tree_evaluator_;
int32_t n_threads_ {0};
bool is_col_split_{false};
FeatureInteractionConstraintHost interaction_constraints_;
std::vector<NodeEntry> snode_;
@@ -53,8 +53,9 @@ class HistEvaluator {
}
}
bool IsValid(GradStats const &left, GradStats const &right) const {
return left.GetHess() >= param_.min_child_weight && right.GetHess() >= param_.min_child_weight;
[[nodiscard]] bool IsValid(GradStats const &left, GradStats const &right) const {
return left.GetHess() >= param_->min_child_weight &&
right.GetHess() >= param_->min_child_weight;
}
/**
@@ -93,9 +94,10 @@ class HistEvaluator {
right_sum = GradStats{hist[i]};
left_sum.SetSubstract(parent.stats, right_sum);
if (IsValid(left_sum, right_sum)) {
auto missing_left_chg = static_cast<float>(
evaluator.CalcSplitGain(param_, nidx, fidx, GradStats{left_sum}, GradStats{right_sum}) -
parent.root_gain);
auto missing_left_chg =
static_cast<float>(evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{left_sum},
GradStats{right_sum}) -
parent.root_gain);
best.Update(missing_left_chg, fidx, split_pt, true, true, left_sum, right_sum);
}
@@ -103,9 +105,10 @@ class HistEvaluator {
right_sum.Add(missing);
left_sum.SetSubstract(parent.stats, right_sum);
if (IsValid(left_sum, right_sum)) {
auto missing_right_chg = static_cast<float>(
evaluator.CalcSplitGain(param_, nidx, fidx, GradStats{left_sum}, GradStats{right_sum}) -
parent.root_gain);
auto missing_right_chg =
static_cast<float>(evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{left_sum},
GradStats{right_sum}) -
parent.root_gain);
best.Update(missing_right_chg, fidx, split_pt, false, true, left_sum, right_sum);
}
}
@@ -150,7 +153,7 @@ class HistEvaluator {
bst_bin_t f_begin = cut_ptr[fidx];
bst_bin_t f_end = cut_ptr[fidx + 1];
bst_bin_t n_bins_feature{f_end - f_begin};
auto n_bins = std::min(param_.max_cat_threshold, n_bins_feature);
auto n_bins = std::min(param_->max_cat_threshold, n_bins_feature);
// statistics on both sides of split
GradStats left_sum;
@@ -179,9 +182,9 @@ class HistEvaluator {
right_sum.SetSubstract(parent.stats, left_sum); // missing on right
}
if (IsValid(left_sum, right_sum)) {
auto loss_chg =
evaluator.CalcSplitGain(param_, nidx, fidx, GradStats{left_sum}, GradStats{right_sum}) -
parent.root_gain;
auto loss_chg = evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{left_sum},
GradStats{right_sum}) -
parent.root_gain;
// We don't have a numeric split point, nan here is a dummy split.
if (best.Update(loss_chg, fidx, std::numeric_limits<float>::quiet_NaN(), d_step == 1, true,
left_sum, right_sum)) {
@@ -254,7 +257,7 @@ class HistEvaluator {
if (d_step > 0) {
// forward enumeration: split at right bound of each bin
loss_chg =
static_cast<float>(evaluator.CalcSplitGain(param_, nidx, fidx, GradStats{left_sum},
static_cast<float>(evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{left_sum},
GradStats{right_sum}) -
parent.root_gain);
split_pt = cut_val[i]; // not used for partition based
@@ -262,7 +265,7 @@ class HistEvaluator {
} else {
// backward enumeration: split at left bound of each bin
loss_chg =
static_cast<float>(evaluator.CalcSplitGain(param_, nidx, fidx, GradStats{right_sum},
static_cast<float>(evaluator.CalcSplitGain(*param_, nidx, fidx, GradStats{right_sum},
GradStats{left_sum}) -
parent.root_gain);
if (i == imin) {
@@ -283,6 +286,7 @@ class HistEvaluator {
void EvaluateSplits(const common::HistCollection &hist, common::HistogramCuts const &cut,
common::Span<FeatureType const> feature_types, const RegTree &tree,
std::vector<ExpandEntry> *p_entries) {
auto n_threads = ctx_->Threads();
auto& entries = *p_entries;
// All nodes are on the same level, so we can store the shared ptr.
std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> features(
@@ -294,23 +298,23 @@ class HistEvaluator {
}
CHECK(!features.empty());
const size_t grain_size =
std::max<size_t>(1, features.front()->Size() / n_threads_);
std::max<size_t>(1, features.front()->Size() / n_threads);
common::BlockedSpace2d space(entries.size(), [&](size_t nidx_in_set) {
return features[nidx_in_set]->Size();
}, grain_size);
std::vector<ExpandEntry> tloc_candidates(n_threads_ * entries.size());
std::vector<ExpandEntry> tloc_candidates(n_threads * entries.size());
for (size_t i = 0; i < entries.size(); ++i) {
for (decltype(n_threads_) j = 0; j < n_threads_; ++j) {
tloc_candidates[i * n_threads_ + j] = entries[i];
for (decltype(n_threads) j = 0; j < n_threads; ++j) {
tloc_candidates[i * n_threads + j] = entries[i];
}
}
auto evaluator = tree_evaluator_.GetEvaluator();
auto const& cut_ptrs = cut.Ptrs();
common::ParallelFor2d(space, n_threads_, [&](size_t nidx_in_set, common::Range1d r) {
common::ParallelFor2d(space, n_threads, [&](size_t nidx_in_set, common::Range1d r) {
auto tidx = omp_get_thread_num();
auto entry = &tloc_candidates[n_threads_ * nidx_in_set + tidx];
auto entry = &tloc_candidates[n_threads * nidx_in_set + tidx];
auto best = &entry->split;
auto nidx = entry->nid;
auto histogram = hist[nidx];
@@ -323,7 +327,7 @@ class HistEvaluator {
}
if (is_cat) {
auto n_bins = cut_ptrs.at(fidx + 1) - cut_ptrs[fidx];
if (common::UseOneHot(n_bins, param_.max_cat_to_onehot)) {
if (common::UseOneHot(n_bins, param_->max_cat_to_onehot)) {
EnumerateOneHot(cut, histogram, fidx, nidx, evaluator, best);
} else {
std::vector<size_t> sorted_idx(n_bins);
@@ -331,8 +335,8 @@ class HistEvaluator {
auto feat_hist = histogram.subspan(cut_ptrs[fidx], n_bins);
// Sort the histogram to get contiguous partitions.
std::stable_sort(sorted_idx.begin(), sorted_idx.end(), [&](size_t l, size_t r) {
auto ret = evaluator.CalcWeightCat(param_, feat_hist[l]) <
evaluator.CalcWeightCat(param_, feat_hist[r]);
auto ret = evaluator.CalcWeightCat(*param_, feat_hist[l]) <
evaluator.CalcWeightCat(*param_, feat_hist[r]);
return ret;
});
EnumeratePart<+1>(cut, sorted_idx, histogram, fidx, nidx, evaluator, best);
@@ -349,12 +353,29 @@ class HistEvaluator {
for (unsigned nidx_in_set = 0; nidx_in_set < entries.size();
++nidx_in_set) {
for (auto tidx = 0; tidx < n_threads_; ++tidx) {
for (auto tidx = 0; tidx < n_threads; ++tidx) {
entries[nidx_in_set].split.Update(
tloc_candidates[n_threads_ * nidx_in_set + tidx].split);
tloc_candidates[n_threads * nidx_in_set + tidx].split);
}
}
if (is_col_split_) {
// With column-wise data split, we gather the best splits from all the workers and update the
// expand entries accordingly.
auto const world = collective::GetWorldSize();
auto const rank = collective::GetRank();
auto const num_entries = entries.size();
std::vector<ExpandEntry> buffer{num_entries * world};
std::copy_n(entries.cbegin(), num_entries, buffer.begin() + num_entries * rank);
collective::Allgather(buffer.data(), buffer.size() * sizeof(ExpandEntry));
for (auto worker = 0; worker < world; ++worker) {
for (std::size_t nidx_in_set = 0; nidx_in_set < entries.size(); ++nidx_in_set) {
entries[nidx_in_set].split.Update(buffer[worker * num_entries + nidx_in_set].split);
}
}
}
}
// Add splits to tree, handles all statistic
void ApplyTreeSplit(ExpandEntry const& candidate, RegTree *p_tree) {
auto evaluator = tree_evaluator_.GetEvaluator();
@@ -362,24 +383,22 @@ class HistEvaluator {
GradStats parent_sum = candidate.split.left_sum;
parent_sum.Add(candidate.split.right_sum);
auto base_weight =
evaluator.CalcWeight(candidate.nid, param_, GradStats{parent_sum});
auto base_weight = evaluator.CalcWeight(candidate.nid, *param_, GradStats{parent_sum});
auto left_weight =
evaluator.CalcWeight(candidate.nid, param_, GradStats{candidate.split.left_sum});
evaluator.CalcWeight(candidate.nid, *param_, GradStats{candidate.split.left_sum});
auto right_weight =
evaluator.CalcWeight(candidate.nid, param_, GradStats{candidate.split.right_sum});
evaluator.CalcWeight(candidate.nid, *param_, GradStats{candidate.split.right_sum});
if (candidate.split.is_cat) {
tree.ExpandCategorical(
candidate.nid, candidate.split.SplitIndex(), candidate.split.cat_bits,
candidate.split.DefaultLeft(), base_weight, left_weight * param_.learning_rate,
right_weight * param_.learning_rate, candidate.split.loss_chg, parent_sum.GetHess(),
candidate.split.DefaultLeft(), base_weight, left_weight * param_->learning_rate,
right_weight * param_->learning_rate, candidate.split.loss_chg, parent_sum.GetHess(),
candidate.split.left_sum.GetHess(), candidate.split.right_sum.GetHess());
} else {
tree.ExpandNode(candidate.nid, candidate.split.SplitIndex(), candidate.split.split_value,
candidate.split.DefaultLeft(), base_weight,
left_weight * param_.learning_rate, right_weight * param_.learning_rate,
left_weight * param_->learning_rate, right_weight * param_->learning_rate,
candidate.split.loss_chg, parent_sum.GetHess(),
candidate.split.left_sum.GetHess(), candidate.split.right_sum.GetHess());
}
@@ -395,11 +414,11 @@ class HistEvaluator {
max_node = std::max(candidate.nid, max_node);
snode_.resize(tree.GetNodes().size());
snode_.at(left_child).stats = candidate.split.left_sum;
snode_.at(left_child).root_gain = evaluator.CalcGain(
candidate.nid, param_, GradStats{candidate.split.left_sum});
snode_.at(left_child).root_gain =
evaluator.CalcGain(candidate.nid, *param_, GradStats{candidate.split.left_sum});
snode_.at(right_child).stats = candidate.split.right_sum;
snode_.at(right_child).root_gain = evaluator.CalcGain(
candidate.nid, param_, GradStats{candidate.split.right_sum});
snode_.at(right_child).root_gain =
evaluator.CalcGain(candidate.nid, *param_, GradStats{candidate.split.right_sum});
interaction_constraints_.Split(candidate.nid,
tree[candidate.nid].SplitIndex(), left_child,
@@ -409,30 +428,31 @@ class HistEvaluator {
auto Evaluator() const { return tree_evaluator_.GetEvaluator(); }
auto const& Stats() const { return snode_; }
float InitRoot(GradStats const& root_sum) {
float InitRoot(GradStats const &root_sum) {
snode_.resize(1);
auto root_evaluator = tree_evaluator_.GetEvaluator();
snode_[0].stats = GradStats{root_sum.GetGrad(), root_sum.GetHess()};
snode_[0].root_gain = root_evaluator.CalcGain(RegTree::kRoot, param_,
GradStats{snode_[0].stats});
auto weight = root_evaluator.CalcWeight(RegTree::kRoot, param_,
GradStats{snode_[0].stats});
snode_[0].root_gain =
root_evaluator.CalcGain(RegTree::kRoot, *param_, GradStats{snode_[0].stats});
auto weight = root_evaluator.CalcWeight(RegTree::kRoot, *param_, GradStats{snode_[0].stats});
return weight;
}
public:
// The column sampler must be constructed by caller since we need to preserve the rng
// for the entire training session.
explicit HistEvaluator(TrainParam const &param, MetaInfo const &info, int32_t n_threads,
explicit HistEvaluator(Context const *ctx, TrainParam const *param, MetaInfo const &info,
std::shared_ptr<common::ColumnSampler> sampler)
: param_{param},
: ctx_{ctx},
param_{param},
column_sampler_{std::move(sampler)},
tree_evaluator_{param, static_cast<bst_feature_t>(info.num_col_), Context::kCpuId},
n_threads_{n_threads} {
interaction_constraints_.Configure(param, info.num_col_);
column_sampler_->Init(info.num_col_, info.feature_weights.HostVector(), param_.colsample_bynode,
param_.colsample_bylevel, param_.colsample_bytree);
tree_evaluator_{*param, static_cast<bst_feature_t>(info.num_col_), Context::kCpuId},
is_col_split_{info.data_split_mode == DataSplitMode::kCol} {
interaction_constraints_.Configure(*param, info.num_col_);
column_sampler_->Init(ctx, info.num_col_, info.feature_weights.HostVector(),
param_->colsample_bynode, param_->colsample_bylevel,
param_->colsample_bytree);
}
};
@@ -467,6 +487,5 @@ void UpdatePredictionCacheImpl(Context const *ctx, RegTree const *p_last_tree,
});
}
}
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree
#endif // XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_

View File

@@ -29,6 +29,7 @@ class HistogramBuilder {
size_t n_batches_{0};
// Whether XGBoost is running in distributed environment.
bool is_distributed_{false};
bool is_col_split_{false};
public:
/**
@@ -40,7 +41,7 @@ class HistogramBuilder {
* of using global rabit variable.
*/
void Reset(uint32_t total_bins, BatchParam p, int32_t n_threads, size_t n_batches,
bool is_distributed) {
bool is_distributed, bool is_col_split) {
CHECK_GE(n_threads, 1);
n_threads_ = n_threads;
n_batches_ = n_batches;
@@ -50,6 +51,7 @@ class HistogramBuilder {
buffer_.Init(total_bins);
builder_ = common::GHistBuilder(total_bins);
is_distributed_ = is_distributed;
is_col_split_ = is_col_split;
// Workaround s390x gcc 7.5.0
auto DMLC_ATTRIBUTE_UNUSED __force_instantiation = &GradientPairPrecise::Reduce;
}
@@ -96,7 +98,7 @@ class HistogramBuilder {
std::vector<ExpandEntry> const &nodes_for_explicit_hist_build,
std::vector<ExpandEntry> const &nodes_for_subtraction_trick,
RegTree const *p_tree) {
if (is_distributed_) {
if (is_distributed_ && !is_col_split_) {
this->AddHistRowsDistributed(starting_index, sync_count, nodes_for_explicit_hist_build,
nodes_for_subtraction_trick, p_tree);
} else {
@@ -130,7 +132,7 @@ class HistogramBuilder {
return;
}
if (is_distributed_) {
if (is_distributed_ && !is_col_split_) {
this->SyncHistogramDistributed(p_tree, nodes_for_explicit_hist_build,
nodes_for_subtraction_trick,
starting_index, sync_count);

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2014-2021 by Contributors
/**
* Copyright 2014-2023 by XGBoost Contributors
* \file param.h
* \brief training parameters, statistics used to support tree construction.
* \author Tianqi Chen
@@ -238,9 +238,8 @@ XGBOOST_DEVICE inline static T1 ThresholdL1(T1 w, T2 alpha) {
// calculate the cost of loss function
template <typename TrainingParams, typename T>
XGBOOST_DEVICE inline T CalcGainGivenWeight(const TrainingParams &p,
T sum_grad, T sum_hess, T w) {
return -(T(2.0) * sum_grad * w + (sum_hess + p.reg_lambda) * common::Sqr(w));
XGBOOST_DEVICE inline T CalcGainGivenWeight(const TrainingParams &p, T sum_grad, T sum_hess, T w) {
return -(static_cast<T>(2.0) * sum_grad * w + (sum_hess + p.reg_lambda) * common::Sqr(w));
}
// calculate weight given the statistics
@@ -261,7 +260,7 @@ XGBOOST_DEVICE inline T CalcWeight(const TrainingParams &p, T sum_grad,
template <typename TrainingParams, typename T>
XGBOOST_DEVICE inline T CalcGain(const TrainingParams &p, T sum_grad, T sum_hess) {
if (sum_hess < p.min_child_weight || sum_hess <= 0.0) {
return T(0.0);
return static_cast<T>(0.0);
}
if (p.max_delta_step == 0.0f) {
if (p.reg_alpha == 0.0f) {

View File

@@ -1069,8 +1069,8 @@ bool LoadModelImpl(Json const& in, TreeParam* param, std::vector<RTreeNodeStat>*
split_types = std::remove_reference_t<decltype(split_types)>(n_nodes);
split_categories_segments = std::remove_reference_t<decltype(split_categories_segments)>(n_nodes);
static_assert(std::is_integral<decltype(GetElem<Integer>(lefts, 0))>::value, "");
static_assert(std::is_floating_point<decltype(GetElem<Number>(loss_changes, 0))>::value, "");
static_assert(std::is_integral<decltype(GetElem<Integer>(lefts, 0))>::value);
static_assert(std::is_floating_point<decltype(GetElem<Number>(loss_changes, 0))>::value);
CHECK_EQ(n_nodes, split_categories_segments.size());
// Set node

View File

@@ -23,8 +23,7 @@
#include "xgboost/tree_model.h"
#include "xgboost/tree_updater.h"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
DMLC_REGISTRY_FILE_TAG(updater_approx);
@@ -41,7 +40,7 @@ auto BatchSpec(TrainParam const &p, common::Span<float> hess) {
class GloablApproxBuilder {
protected:
TrainParam param_;
TrainParam const* param_;
std::shared_ptr<common::ColumnSampler> col_sampler_;
HistEvaluator<CPUExpandEntry> evaluator_;
HistogramBuilder<CPUExpandEntry> histogram_builder_;
@@ -64,19 +63,19 @@ class GloablApproxBuilder {
bst_bin_t n_total_bins = 0;
partitioner_.clear();
// Generating the GHistIndexMatrix is quite slow, is there a way to speed it up?
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(param_, hess, task_))) {
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess, task_))) {
if (n_total_bins == 0) {
n_total_bins = page.cut.TotalBins();
feature_values_ = page.cut;
} else {
CHECK_EQ(n_total_bins, page.cut.TotalBins());
}
partitioner_.emplace_back(this->ctx_, page.Size(), page.base_rowid);
partitioner_.emplace_back(this->ctx_, page.Size(), page.base_rowid, p_fmat->IsColumnSplit());
n_batches_++;
}
histogram_builder_.Reset(n_total_bins, BatchSpec(param_, hess), ctx_->Threads(), n_batches_,
collective::IsDistributed());
histogram_builder_.Reset(n_total_bins, BatchSpec(*param_, hess), ctx_->Threads(), n_batches_,
collective::IsDistributed(), p_fmat->IsColumnSplit());
monitor_->Stop(__func__);
}
@@ -90,11 +89,13 @@ class GloablApproxBuilder {
for (auto const &g : gpair) {
root_sum.Add(g);
}
collective::Allreduce<collective::Operation::kSum>(reinterpret_cast<double *>(&root_sum), 2);
if (p_fmat->IsRowSplit()) {
collective::Allreduce<collective::Operation::kSum>(reinterpret_cast<double *>(&root_sum), 2);
}
std::vector<CPUExpandEntry> nodes{best};
size_t i = 0;
auto space = ConstructHistSpace(partitioner_, nodes);
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(param_, hess))) {
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess))) {
histogram_builder_.BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(), nodes,
{}, gpair);
i++;
@@ -103,7 +104,7 @@ class GloablApproxBuilder {
auto weight = evaluator_.InitRoot(root_sum);
p_tree->Stat(RegTree::kRoot).sum_hess = root_sum.GetHess();
p_tree->Stat(RegTree::kRoot).base_weight = weight;
(*p_tree)[RegTree::kRoot].SetLeaf(param_.learning_rate * weight);
(*p_tree)[RegTree::kRoot].SetLeaf(param_->learning_rate * weight);
auto const &histograms = histogram_builder_.Histogram();
auto ft = p_fmat->Info().feature_types.ConstHostSpan();
@@ -145,7 +146,7 @@ class GloablApproxBuilder {
size_t i = 0;
auto space = ConstructHistSpace(partitioner_, nodes_to_build);
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(param_, hess))) {
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess))) {
histogram_builder_.BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(),
nodes_to_build, nodes_to_sub, gpair);
i++;
@@ -166,12 +167,12 @@ class GloablApproxBuilder {
}
public:
explicit GloablApproxBuilder(TrainParam param, MetaInfo const &info, Context const *ctx,
explicit GloablApproxBuilder(TrainParam const *param, MetaInfo const &info, Context const *ctx,
std::shared_ptr<common::ColumnSampler> column_sampler, ObjInfo task,
common::Monitor *monitor)
: param_{std::move(param)},
: param_{param},
col_sampler_{std::move(column_sampler)},
evaluator_{param_, info, ctx->Threads(), col_sampler_},
evaluator_{ctx, param_, info, col_sampler_},
ctx_{ctx},
task_{task},
monitor_{monitor} {}
@@ -181,7 +182,7 @@ class GloablApproxBuilder {
p_last_tree_ = p_tree;
this->InitData(p_fmat, hess);
Driver<CPUExpandEntry> driver(param_);
Driver<CPUExpandEntry> driver(*param_);
auto &tree = *p_tree;
driver.Push({this->InitRoot(p_fmat, gpair, hess, p_tree)});
auto expand_set = driver.Pop();
@@ -211,7 +212,7 @@ class GloablApproxBuilder {
monitor_->Start("UpdatePosition");
size_t page_id = 0;
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(param_, hess))) {
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(BatchSpec(*param_, hess))) {
partitioner_.at(page_id).UpdatePosition(ctx_, page, applied, p_tree);
page_id++;
}
@@ -248,7 +249,6 @@ class GloablApproxBuilder {
* iteration.
*/
class GlobalApproxUpdater : public TreeUpdater {
TrainParam param_;
common::Monitor monitor_;
// specializations for different histogram precision.
std::unique_ptr<GloablApproxBuilder> pimpl_;
@@ -263,15 +263,9 @@ class GlobalApproxUpdater : public TreeUpdater {
monitor_.Init(__func__);
}
void Configure(const Args &args) override { param_.UpdateAllowUnknown(args); }
void LoadConfig(Json const &in) override {
auto const &config = get<Object const>(in);
FromJson(config.at("train_param"), &this->param_);
}
void SaveConfig(Json *p_out) const override {
auto &out = *p_out;
out["train_param"] = ToJson(param_);
}
void Configure(Args const &) override {}
void LoadConfig(Json const &) override {}
void SaveConfig(Json *) const override {}
void InitData(TrainParam const &param, HostDeviceVector<GradientPair> const *gpair,
linalg::Matrix<GradientPair> *sampled) {
@@ -281,20 +275,17 @@ class GlobalApproxUpdater : public TreeUpdater {
SampleGradient(ctx_, param, sampled->HostView());
}
char const *Name() const override { return "grow_histmaker"; }
[[nodiscard]] char const *Name() const override { return "grow_histmaker"; }
void Update(HostDeviceVector<GradientPair> *gpair, DMatrix *m,
void Update(TrainParam const *param, HostDeviceVector<GradientPair> *gpair, DMatrix *m,
common::Span<HostDeviceVector<bst_node_t>> out_position,
const std::vector<RegTree *> &trees) override {
float lr = param_.learning_rate;
param_.learning_rate = lr / trees.size();
pimpl_ = std::make_unique<GloablApproxBuilder>(param_, m->Info(), ctx_, column_sampler_, task_,
pimpl_ = std::make_unique<GloablApproxBuilder>(param, m->Info(), ctx_, column_sampler_, task_,
&monitor_);
linalg::Matrix<GradientPair> h_gpair;
// Obtain the hessian values for weighted sketching
InitData(param_, gpair, &h_gpair);
InitData(*param, gpair, &h_gpair);
std::vector<float> hess(h_gpair.Size());
auto const &s_gpair = h_gpair.Data()->ConstHostVector();
std::transform(s_gpair.begin(), s_gpair.end(), hess.begin(),
@@ -302,12 +293,11 @@ class GlobalApproxUpdater : public TreeUpdater {
cached_ = m;
size_t t_idx = 0;
std::size_t t_idx = 0;
for (auto p_tree : trees) {
this->pimpl_->UpdateTree(m, s_gpair, hess, p_tree, &out_position[t_idx]);
++t_idx;
}
param_.learning_rate = lr;
}
bool UpdatePredictionCache(const DMatrix *data, linalg::VectorView<float> out_preds) override {
@@ -318,7 +308,7 @@ class GlobalApproxUpdater : public TreeUpdater {
return true;
}
bool HasNodePosition() const override { return true; }
[[nodiscard]] bool HasNodePosition() const override { return true; }
};
DMLC_REGISTRY_FILE_TAG(grow_histmaker);
@@ -328,5 +318,4 @@ XGBOOST_REGISTER_TREE_UPDATER(GlobalHistMaker, "grow_histmaker")
"Tree constructor that uses approximate histogram construction "
"for each node.")
.set_body([](Context const *ctx, ObjInfo task) { return new GlobalApproxUpdater(ctx, task); });
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2014-2022 by XGBoost Contributors
/**
* Copyright 2014-2023 by XGBoost Contributors
* \file updater_colmaker.cc
* \brief use columnwise update to construct a tree
* \author Tianqi Chen
@@ -17,8 +17,7 @@
#include "../common/random.h"
#include "split_evaluator.h"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
DMLC_REGISTRY_FILE_TAG(updater_colmaker);
@@ -57,18 +56,15 @@ class ColMaker: public TreeUpdater {
public:
explicit ColMaker(Context const *ctx) : TreeUpdater(ctx) {}
void Configure(const Args &args) override {
param_.UpdateAllowUnknown(args);
colmaker_param_.UpdateAllowUnknown(args);
}
void LoadConfig(Json const& in) override {
auto const& config = get<Object const>(in);
FromJson(config.at("train_param"), &this->param_);
FromJson(config.at("colmaker_train_param"), &this->colmaker_param_);
}
void SaveConfig(Json* p_out) const override {
auto& out = *p_out;
out["train_param"] = ToJson(param_);
void SaveConfig(Json *p_out) const override {
auto &out = *p_out;
out["colmaker_train_param"] = ToJson(colmaker_param_);
}
@@ -95,7 +91,7 @@ class ColMaker: public TreeUpdater {
}
}
void Update(HostDeviceVector<GradientPair> *gpair, DMatrix *dmat,
void Update(TrainParam const *param, HostDeviceVector<GradientPair> *gpair, DMatrix *dmat,
common::Span<HostDeviceVector<bst_node_t>> /*out_position*/,
const std::vector<RegTree *> &trees) override {
if (collective::IsDistributed()) {
@@ -108,22 +104,16 @@ class ColMaker: public TreeUpdater {
}
this->LazyGetColumnDensity(dmat);
// rescale learning rate according to size of trees
float lr = param_.learning_rate;
param_.learning_rate = lr / trees.size();
interaction_constraints_.Configure(param_, dmat->Info().num_row_);
interaction_constraints_.Configure(*param, dmat->Info().num_row_);
// build tree
for (auto tree : trees) {
CHECK(ctx_);
Builder builder(param_, colmaker_param_, interaction_constraints_, ctx_,
column_densities_);
Builder builder(*param, colmaker_param_, interaction_constraints_, ctx_, column_densities_);
builder.Update(gpair->ConstHostVector(), dmat, tree);
}
param_.learning_rate = lr;
}
protected:
// training parameter
TrainParam param_;
ColMakerTrainParam colmaker_param_;
// SplitEvaluator that will be cloned for each Builder
std::vector<float> column_densities_;
@@ -234,9 +224,9 @@ class ColMaker: public TreeUpdater {
}
}
{
column_sampler_.Init(fmat.Info().num_col_, fmat.Info().feature_weights.ConstHostVector(),
param_.colsample_bynode, param_.colsample_bylevel,
param_.colsample_bytree);
column_sampler_.Init(ctx_, fmat.Info().num_col_,
fmat.Info().feature_weights.ConstHostVector(), param_.colsample_bynode,
param_.colsample_bylevel, param_.colsample_bytree);
}
{
// setup temp space for each thread
@@ -614,5 +604,4 @@ class ColMaker: public TreeUpdater {
XGBOOST_REGISTER_TREE_UPDATER(ColMaker, "grow_colmaker")
.describe("Grow tree with parallelization over columns.")
.set_body([](Context const *ctx, ObjInfo) { return new ColMaker(ctx); });
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2017-2022 XGBoost contributors
/**
* Copyright 2017-2023 by XGBoost contributors
*/
#include <thrust/copy.h>
#include <thrust/reduce.h>
@@ -160,11 +160,11 @@ class DeviceHistogramStorage {
if (nidx_map_.find(nidx) != nidx_map_.cend()) {
// Fetch from normal cache
auto ptr = data_.data().get() + nidx_map_.at(nidx);
return common::Span<GradientSumT>(reinterpret_cast<GradientSumT*>(ptr), n_bins_);
return {reinterpret_cast<GradientSumT*>(ptr), static_cast<std::size_t>(n_bins_)};
} else {
// Fetch from overflow
auto ptr = overflow_.data().get() + overflow_nidx_map_.at(nidx);
return common::Span<GradientSumT>(reinterpret_cast<GradientSumT*>(ptr), n_bins_);
return {reinterpret_cast<GradientSumT*>(ptr), static_cast<std::size_t>(n_bins_)};
}
}
};
@@ -243,7 +243,7 @@ struct GPUHistMakerDevice {
// thread safe
void Reset(HostDeviceVector<GradientPair>* dh_gpair, DMatrix* dmat, int64_t num_columns) {
auto const& info = dmat->Info();
this->column_sampler.Init(num_columns, info.feature_weights.HostVector(),
this->column_sampler.Init(ctx_, num_columns, info.feature_weights.HostVector(),
param.colsample_bynode, param.colsample_bylevel,
param.colsample_bytree);
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
@@ -306,6 +306,8 @@ struct GPUHistMakerDevice {
matrix.is_dense
};
dh::TemporaryArray<GPUExpandEntry> entries(2 * candidates.size());
// Store the feature set ptrs so they dont go out of scope before the kernel is called
std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> feature_sets;
for (size_t i = 0; i < candidates.size(); i++) {
auto candidate = candidates.at(i);
int left_nidx = tree[candidate.nid].LeftChild();
@@ -314,10 +316,12 @@ struct GPUHistMakerDevice {
nidx[i * 2 + 1] = right_nidx;
auto left_sampled_features = column_sampler.GetFeatureSet(tree.GetDepth(left_nidx));
left_sampled_features->SetDevice(ctx_->gpu_id);
feature_sets.emplace_back(left_sampled_features);
common::Span<bst_feature_t> left_feature_set =
interaction_constraints.Query(left_sampled_features->DeviceSpan(), left_nidx);
auto right_sampled_features = column_sampler.GetFeatureSet(tree.GetDepth(right_nidx));
right_sampled_features->SetDevice(ctx_->gpu_id);
feature_sets.emplace_back(right_sampled_features);
common::Span<bst_feature_t> right_feature_set =
interaction_constraints.Query(right_sampled_features->DeviceSpan(),
right_nidx);
@@ -330,8 +334,8 @@ struct GPUHistMakerDevice {
}
bst_feature_t max_active_features = 0;
for (auto input : h_node_inputs) {
max_active_features = std::max(max_active_features,
bst_feature_t(input.feature_set.size()));
max_active_features =
std::max(max_active_features, static_cast<bst_feature_t>(input.feature_set.size()));
}
dh::safe_cuda(cudaMemcpyAsync(
d_node_inputs.data().get(), h_node_inputs.data(),
@@ -752,7 +756,6 @@ class GPUHistMaker : public TreeUpdater {
void Configure(const Args& args) override {
// Used in test to count how many configurations are performed
LOG(DEBUG) << "[GPU Hist]: Configure";
param_.UpdateAllowUnknown(args);
hist_maker_param_.UpdateAllowUnknown(args);
dh::CheckComputeCapability();
initialised_ = false;
@@ -764,32 +767,26 @@ class GPUHistMaker : public TreeUpdater {
auto const& config = get<Object const>(in);
FromJson(config.at("gpu_hist_train_param"), &this->hist_maker_param_);
initialised_ = false;
FromJson(config.at("train_param"), &param_);
}
void SaveConfig(Json* p_out) const override {
auto& out = *p_out;
out["gpu_hist_train_param"] = ToJson(hist_maker_param_);
out["train_param"] = ToJson(param_);
}
~GPUHistMaker() { // NOLINT
dh::GlobalMemoryLogger().Log();
}
void Update(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
void Update(TrainParam const* param, HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
common::Span<HostDeviceVector<bst_node_t>> out_position,
const std::vector<RegTree*>& trees) override {
monitor_.Start("Update");
// rescale learning rate according to size of trees
float lr = param_.learning_rate;
param_.learning_rate = lr / trees.size();
// build tree
try {
size_t t_idx{0};
for (xgboost::RegTree* tree : trees) {
this->UpdateTree(gpair, dmat, tree, &out_position[t_idx]);
this->UpdateTree(param, gpair, dmat, tree, &out_position[t_idx]);
if (hist_maker_param_.debug_synchronize) {
this->CheckTreesSynchronized(tree);
@@ -800,12 +797,10 @@ class GPUHistMaker : public TreeUpdater {
} catch (const std::exception& e) {
LOG(FATAL) << "Exception in gpu_hist: " << e.what() << std::endl;
}
param_.learning_rate = lr;
monitor_.Stop("Update");
}
void InitDataOnce(DMatrix* dmat) {
void InitDataOnce(TrainParam const* param, DMatrix* dmat) {
CHECK_GE(ctx_->gpu_id, 0) << "Must have at least one device";
info_ = &dmat->Info();
@@ -814,24 +809,24 @@ class GPUHistMaker : public TreeUpdater {
collective::Broadcast(&column_sampling_seed, sizeof(column_sampling_seed), 0);
BatchParam batch_param{
ctx_->gpu_id,
param_.max_bin,
ctx_->gpu_id,
param->max_bin,
};
auto page = (*dmat->GetBatches<EllpackPage>(batch_param).begin()).Impl();
dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
info_->feature_types.SetDevice(ctx_->gpu_id);
maker.reset(new GPUHistMakerDevice<GradientSumT>(
ctx_, page, info_->feature_types.ConstDeviceSpan(), info_->num_row_, param_,
ctx_, page, info_->feature_types.ConstDeviceSpan(), info_->num_row_, *param,
column_sampling_seed, info_->num_col_, batch_param));
p_last_fmat_ = dmat;
initialised_ = true;
}
void InitData(DMatrix* dmat, RegTree const* p_tree) {
void InitData(TrainParam const* param, DMatrix* dmat, RegTree const* p_tree) {
if (!initialised_) {
monitor_.Start("InitDataOnce");
this->InitDataOnce(dmat);
this->InitDataOnce(param, dmat);
monitor_.Stop("InitDataOnce");
}
p_last_tree_ = p_tree;
@@ -852,10 +847,10 @@ class GPUHistMaker : public TreeUpdater {
CHECK(*local_tree == reference_tree);
}
void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat, RegTree* p_tree,
HostDeviceVector<bst_node_t>* p_out_position) {
void UpdateTree(TrainParam const* param, HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat,
RegTree* p_tree, HostDeviceVector<bst_node_t>* p_out_position) {
monitor_.Start("InitData");
this->InitData(p_fmat, p_tree);
this->InitData(param, p_fmat, p_tree);
monitor_.Stop("InitData");
gpair->SetDevice(ctx_->gpu_id);
@@ -874,7 +869,6 @@ class GPUHistMaker : public TreeUpdater {
return result;
}
TrainParam param_; // NOLINT
MetaInfo* info_{}; // NOLINT
std::unique_ptr<GPUHistMakerDevice<GradientSumT>> maker; // NOLINT

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2014-2022 by XGBoost Contributors
/**
* Copyright 2014-2023 by XGBoost Contributors
* \file updater_prune.cc
* \brief prune a tree given the statistics
* \author Tianqi Chen
@@ -8,13 +8,11 @@
#include <memory>
#include "../common/timer.h"
#include "./param.h"
#include "xgboost/base.h"
#include "xgboost/json.h"
#include "./param.h"
#include "../common/timer.h"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
DMLC_REGISTRY_FILE_TAG(updater_prune);
/*! \brief pruner that prunes a tree after growing finishes */
@@ -24,47 +22,31 @@ class TreePruner : public TreeUpdater {
syncher_.reset(TreeUpdater::Create("sync", ctx_, task));
pruner_monitor_.Init("TreePruner");
}
char const* Name() const override {
return "prune";
}
[[nodiscard]] char const* Name() const override { return "prune"; }
// set training parameter
void Configure(const Args& args) override {
param_.UpdateAllowUnknown(args);
syncher_->Configure(args);
}
void Configure(const Args& args) override { syncher_->Configure(args); }
void LoadConfig(Json const& in) override {
auto const& config = get<Object const>(in);
FromJson(config.at("train_param"), &this->param_);
}
void SaveConfig(Json* p_out) const override {
auto& out = *p_out;
out["train_param"] = ToJson(param_);
}
bool CanModifyTree() const override {
return true;
}
void LoadConfig(Json const&) override {}
void SaveConfig(Json*) const override {}
[[nodiscard]] bool CanModifyTree() const override { return true; }
// update the tree, do pruning
void Update(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat,
void Update(TrainParam const* param, HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat,
common::Span<HostDeviceVector<bst_node_t>> out_position,
const std::vector<RegTree*>& trees) override {
pruner_monitor_.Start("PrunerUpdate");
// rescale learning rate according to size of trees
float lr = param_.learning_rate;
param_.learning_rate = lr / trees.size();
for (auto tree : trees) {
this->DoPrune(tree);
this->DoPrune(param, tree);
}
param_.learning_rate = lr;
syncher_->Update(gpair, p_fmat, out_position, trees);
syncher_->Update(param, gpair, p_fmat, out_position, trees);
pruner_monitor_.Stop("PrunerUpdate");
}
private:
// try to prune off current leaf
bst_node_t TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) { // NOLINT(*)
bst_node_t TryPruneLeaf(TrainParam const* param, RegTree* p_tree, int nid, int depth,
int npruned) {
auto& tree = *p_tree;
CHECK(tree[nid].IsLeaf());
if (tree[nid].IsRoot()) {
return npruned;
@@ -77,22 +59,22 @@ class TreePruner : public TreeUpdater {
auto right = tree[pid].RightChild();
bool balanced = tree[left].IsLeaf() &&
right != RegTree::kInvalidNodeId && tree[right].IsLeaf();
if (balanced && param_.NeedPrune(s.loss_chg, depth)) {
if (balanced && param->NeedPrune(s.loss_chg, depth)) {
// need to be pruned
tree.ChangeToLeaf(pid, param_.learning_rate * s.base_weight);
tree.ChangeToLeaf(pid, param->learning_rate * s.base_weight);
// tail recursion
return this->TryPruneLeaf(tree, pid, depth - 1, npruned + 2);
return this->TryPruneLeaf(param, p_tree, pid, depth - 1, npruned + 2);
} else {
return npruned;
}
}
/*! \brief do pruning of a tree */
void DoPrune(RegTree* p_tree) {
void DoPrune(TrainParam const* param, RegTree* p_tree) {
auto& tree = *p_tree;
bst_node_t npruned = 0;
for (int nid = 0; nid < tree.param.num_nodes; ++nid) {
if (tree[nid].IsLeaf() && !tree[nid].IsDeleted()) {
npruned = this->TryPruneLeaf(tree, nid, tree.GetDepth(nid), npruned);
npruned = this->TryPruneLeaf(param, p_tree, nid, tree.GetDepth(nid), npruned);
}
}
LOG(INFO) << "tree pruning end, "
@@ -103,13 +85,10 @@ class TreePruner : public TreeUpdater {
private:
// synchronizer
std::unique_ptr<TreeUpdater> syncher_;
// training parameter
TrainParam param_;
common::Monitor pruner_monitor_;
};
XGBOOST_REGISTER_TREE_UPDATER(TreePruner, "prune")
.describe("Pruner that prune the tree according to statistics.")
.set_body([](Context const* ctx, ObjInfo task) { return new TreePruner(ctx, task); });
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree

View File

@@ -28,21 +28,14 @@ namespace tree {
DMLC_REGISTRY_FILE_TAG(updater_quantile_hist);
void QuantileHistMaker::Configure(const Args &args) {
param_.UpdateAllowUnknown(args);
}
void QuantileHistMaker::Update(HostDeviceVector<GradientPair> *gpair, DMatrix *dmat,
void QuantileHistMaker::Update(TrainParam const *param, HostDeviceVector<GradientPair> *gpair,
DMatrix *dmat,
common::Span<HostDeviceVector<bst_node_t>> out_position,
const std::vector<RegTree *> &trees) {
// rescale learning rate according to size of trees
float lr = param_.learning_rate;
param_.learning_rate = lr / trees.size();
// build tree
const size_t n_trees = trees.size();
if (!pimpl_) {
pimpl_.reset(new Builder(n_trees, param_, dmat, task_, ctx_));
pimpl_.reset(new Builder(n_trees, param, dmat, task_, ctx_));
}
size_t t_idx{0};
@@ -51,8 +44,6 @@ void QuantileHistMaker::Update(HostDeviceVector<GradientPair> *gpair, DMatrix *d
this->pimpl_->UpdateTree(gpair, dmat, p_tree, &t_row_position);
++t_idx;
}
param_.learning_rate = lr;
}
bool QuantileHistMaker::UpdatePredictionCache(const DMatrix *data,
@@ -107,7 +98,7 @@ CPUExpandEntry QuantileHistMaker::Builder::InitRoot(
auto weight = evaluator_->InitRoot(GradStats{grad_stat});
p_tree->Stat(RegTree::kRoot).sum_hess = grad_stat.GetHess();
p_tree->Stat(RegTree::kRoot).base_weight = weight;
(*p_tree)[RegTree::kRoot].SetLeaf(param_.learning_rate * weight);
(*p_tree)[RegTree::kRoot].SetLeaf(param_->learning_rate * weight);
std::vector<CPUExpandEntry> entries{node};
monitor_->Start("EvaluateSplits");
@@ -173,7 +164,7 @@ void QuantileHistMaker::Builder::ExpandTree(DMatrix *p_fmat, RegTree *p_tree,
HostDeviceVector<bst_node_t> *p_out_position) {
monitor_->Start(__func__);
Driver<CPUExpandEntry> driver(param_);
Driver<CPUExpandEntry> driver(*param_);
driver.Push(this->InitRoot(p_fmat, p_tree, gpair_h));
auto const &tree = *p_tree;
auto expand_set = driver.Pop();
@@ -277,21 +268,19 @@ void QuantileHistMaker::Builder::InitData(DMatrix *fmat, const RegTree &tree,
} else {
CHECK_EQ(n_total_bins, page.cut.TotalBins());
}
partitioner_.emplace_back(this->ctx_, page.Size(), page.base_rowid);
partitioner_.emplace_back(this->ctx_, page.Size(), page.base_rowid, fmat->IsColumnSplit());
++page_id;
}
histogram_builder_->Reset(n_total_bins, HistBatch(param_), ctx_->Threads(), page_id,
collective::IsDistributed());
collective::IsDistributed(), fmat->IsColumnSplit());
auto m_gpair =
linalg::MakeTensorView(*gpair, {gpair->size(), static_cast<std::size_t>(1)}, ctx_->gpu_id);
SampleGradient(ctx_, param_, m_gpair);
auto m_gpair = linalg::MakeTensorView(ctx_, *gpair, gpair->size(), static_cast<std::size_t>(1));
SampleGradient(ctx_, *param_, m_gpair);
}
// store a pointer to the tree
p_last_tree_ = &tree;
evaluator_.reset(
new HistEvaluator<CPUExpandEntry>{param_, info, this->ctx_->Threads(), column_sampler_});
evaluator_.reset(new HistEvaluator<CPUExpandEntry>{ctx_, param_, info, column_sampler_});
monitor_->Stop(__func__);
}

View File

@@ -35,49 +35,36 @@
#include "../common/partition_builder.h"
#include "../common/column_matrix.h"
namespace xgboost {
namespace tree {
inline BatchParam HistBatch(TrainParam const& param) {
return {param.max_bin, param.sparse_threshold};
namespace xgboost::tree {
inline BatchParam HistBatch(TrainParam const* param) {
return {param->max_bin, param->sparse_threshold};
}
/*! \brief construct a tree using quantized feature values */
class QuantileHistMaker: public TreeUpdater {
public:
explicit QuantileHistMaker(Context const* ctx, ObjInfo task) : TreeUpdater(ctx), task_{task} {}
void Configure(const Args& args) override;
void Configure(const Args&) override {}
void Update(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
void Update(TrainParam const* param, HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
common::Span<HostDeviceVector<bst_node_t>> out_position,
const std::vector<RegTree*>& trees) override;
bool UpdatePredictionCache(const DMatrix *data,
linalg::VectorView<float> out_preds) override;
void LoadConfig(Json const& in) override {
auto const& config = get<Object const>(in);
FromJson(config.at("train_param"), &this->param_);
}
void SaveConfig(Json* p_out) const override {
auto& out = *p_out;
out["train_param"] = ToJson(param_);
}
void LoadConfig(Json const&) override {}
void SaveConfig(Json*) const override {}
char const* Name() const override {
return "grow_quantile_histmaker";
}
bool HasNodePosition() const override { return true; }
[[nodiscard]] char const* Name() const override { return "grow_quantile_histmaker"; }
[[nodiscard]] bool HasNodePosition() const override { return true; }
protected:
// training parameter
TrainParam param_;
// actual builder that runs the algorithm
struct Builder {
public:
// constructor
explicit Builder(const size_t n_trees, const TrainParam& param, DMatrix const* fmat,
explicit Builder(const size_t n_trees, TrainParam const* param, DMatrix const* fmat,
ObjInfo task, Context const* ctx)
: n_trees_(n_trees),
param_(param),
@@ -115,7 +102,7 @@ class QuantileHistMaker: public TreeUpdater {
private:
const size_t n_trees_;
const TrainParam& param_;
TrainParam const* param_;
std::shared_ptr<common::ColumnSampler> column_sampler_{
std::make_shared<common::ColumnSampler>()};
@@ -140,7 +127,6 @@ class QuantileHistMaker: public TreeUpdater {
std::unique_ptr<Builder> pimpl_;
ObjInfo task_;
};
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree
#endif // XGBOOST_TREE_UPDATER_QUANTILE_HIST_H_

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2014-2022 by XGBoost Contributors
/**
* Copyright 2014-2023 by XGBoost Contributors
* \file updater_refresh.cc
* \brief refresh the statistics and leaf value on the tree on the dataset
* \author Tianqi Chen
@@ -16,8 +16,7 @@
#include "./param.h"
#include "xgboost/json.h"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
DMLC_REGISTRY_FILE_TAG(updater_refresh);
@@ -25,23 +24,14 @@ DMLC_REGISTRY_FILE_TAG(updater_refresh);
class TreeRefresher : public TreeUpdater {
public:
explicit TreeRefresher(Context const *ctx) : TreeUpdater(ctx) {}
void Configure(const Args &args) override { param_.UpdateAllowUnknown(args); }
void LoadConfig(Json const& in) override {
auto const& config = get<Object const>(in);
FromJson(config.at("train_param"), &this->param_);
}
void SaveConfig(Json* p_out) const override {
auto& out = *p_out;
out["train_param"] = ToJson(param_);
}
char const* Name() const override {
return "refresh";
}
bool CanModifyTree() const override {
return true;
}
void Configure(const Args &) override {}
void LoadConfig(Json const &) override {}
void SaveConfig(Json *) const override {}
[[nodiscard]] char const *Name() const override { return "refresh"; }
[[nodiscard]] bool CanModifyTree() const override { return true; }
// update the tree, do pruning
void Update(HostDeviceVector<GradientPair> *gpair, DMatrix *p_fmat,
void Update(TrainParam const *param, HostDeviceVector<GradientPair> *gpair, DMatrix *p_fmat,
common::Span<HostDeviceVector<bst_node_t>> /*out_position*/,
const std::vector<RegTree *> &trees) override {
if (trees.size() == 0) return;
@@ -103,16 +93,11 @@ class TreeRefresher : public TreeUpdater {
lazy_get_stats();
collective::Allreduce<collective::Operation::kSum>(&dmlc::BeginPtr(stemp[0])->sum_grad,
stemp[0].size() * 2);
// rescale learning rate according to size of trees
float lr = param_.learning_rate;
param_.learning_rate = lr / trees.size();
int offset = 0;
for (auto tree : trees) {
this->Refresh(dmlc::BeginPtr(stemp[0]) + offset, 0, tree);
this->Refresh(param, dmlc::BeginPtr(stemp[0]) + offset, 0, tree);
offset += tree->param.num_nodes;
}
// set learning rate back
param_.learning_rate = lr;
}
private:
@@ -135,31 +120,27 @@ class TreeRefresher : public TreeUpdater {
gstats[pid].Add(gpair[ridx]);
}
}
inline void Refresh(const GradStats *gstats,
int nid, RegTree *p_tree) {
inline void Refresh(TrainParam const *param, const GradStats *gstats, int nid, RegTree *p_tree) {
RegTree &tree = *p_tree;
tree.Stat(nid).base_weight =
static_cast<bst_float>(CalcWeight(param_, gstats[nid]));
static_cast<bst_float>(CalcWeight(*param, gstats[nid]));
tree.Stat(nid).sum_hess = static_cast<bst_float>(gstats[nid].sum_hess);
if (tree[nid].IsLeaf()) {
if (param_.refresh_leaf) {
tree[nid].SetLeaf(tree.Stat(nid).base_weight * param_.learning_rate);
if (param->refresh_leaf) {
tree[nid].SetLeaf(tree.Stat(nid).base_weight * param->learning_rate);
}
} else {
tree.Stat(nid).loss_chg = static_cast<bst_float>(
xgboost::tree::CalcGain(param_, gstats[tree[nid].LeftChild()]) +
xgboost::tree::CalcGain(param_, gstats[tree[nid].RightChild()]) -
xgboost::tree::CalcGain(param_, gstats[nid]));
this->Refresh(gstats, tree[nid].LeftChild(), p_tree);
this->Refresh(gstats, tree[nid].RightChild(), p_tree);
tree.Stat(nid).loss_chg =
static_cast<bst_float>(xgboost::tree::CalcGain(*param, gstats[tree[nid].LeftChild()]) +
xgboost::tree::CalcGain(*param, gstats[tree[nid].RightChild()]) -
xgboost::tree::CalcGain(*param, gstats[nid]));
this->Refresh(param, gstats, tree[nid].LeftChild(), p_tree);
this->Refresh(param, gstats, tree[nid].RightChild(), p_tree);
}
}
// training parameter
TrainParam param_;
};
XGBOOST_REGISTER_TREE_UPDATER(TreeRefresher, "refresh")
.describe("Refresher that refreshes the weight and statistics according to data.")
.set_body([](Context const *ctx, ObjInfo) { return new TreeRefresher(ctx); });
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2014-2019 by Contributors
/**
* Copyright 2014-2013 by XBGoost Contributors
* \file updater_sync.cc
* \brief synchronize the tree in all distributed nodes
*/
@@ -13,8 +13,7 @@
#include "../common/io.h"
#include "xgboost/json.h"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
DMLC_REGISTRY_FILE_TAG(updater_sync);
@@ -30,11 +29,9 @@ class TreeSyncher : public TreeUpdater {
void LoadConfig(Json const&) override {}
void SaveConfig(Json*) const override {}
char const* Name() const override {
return "prune";
}
[[nodiscard]] char const* Name() const override { return "prune"; }
void Update(HostDeviceVector<GradientPair>*, DMatrix*,
void Update(TrainParam const*, HostDeviceVector<GradientPair>*, DMatrix*,
common::Span<HostDeviceVector<bst_node_t>> /*out_position*/,
const std::vector<RegTree*>& trees) override {
if (collective::GetWorldSize() == 1) return;
@@ -57,5 +54,4 @@ class TreeSyncher : public TreeUpdater {
XGBOOST_REGISTER_TREE_UPDATER(TreeSyncher, "sync")
.describe("Syncher that synchronize the tree in all distributed nodes.")
.set_body([](Context const* ctx, ObjInfo) { return new TreeSyncher(ctx); });
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree