Bound the size of the histogram cache. (#9440)
- A new histogram collection with a limit in size. - Unify histogram building logic between hist, multi-hist, and approx.
This commit is contained in:
@@ -4,13 +4,13 @@
|
||||
#ifndef XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_
|
||||
#define XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_
|
||||
|
||||
#include <algorithm> // for copy
|
||||
#include <cstddef> // for size_t
|
||||
#include <limits> // for numeric_limits
|
||||
#include <memory> // for shared_ptr
|
||||
#include <numeric> // for accumulate
|
||||
#include <utility> // for move
|
||||
#include <vector> // for vector
|
||||
#include <algorithm> // for copy
|
||||
#include <cstddef> // for size_t
|
||||
#include <limits> // for numeric_limits
|
||||
#include <memory> // for shared_ptr
|
||||
#include <numeric> // for accumulate
|
||||
#include <utility> // for move
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../common/categorical.h" // for CatBitField
|
||||
#include "../../common/hist_util.h" // for GHistRow, HistogramCuts
|
||||
@@ -20,6 +20,7 @@
|
||||
#include "../param.h" // for TrainParam
|
||||
#include "../split_evaluator.h" // for TreeEvaluator
|
||||
#include "expand_entry.h" // for MultiExpandEntry
|
||||
#include "hist_cache.h" // for BoundedHistCollection
|
||||
#include "xgboost/base.h" // for bst_node_t, bst_target_t, bst_feature_t
|
||||
#include "xgboost/context.h" // for COntext
|
||||
#include "xgboost/linalg.h" // for Constants, Vector
|
||||
@@ -317,7 +318,7 @@ class HistEvaluator {
|
||||
}
|
||||
|
||||
public:
|
||||
void EvaluateSplits(const common::HistCollection &hist, common::HistogramCuts const &cut,
|
||||
void EvaluateSplits(const BoundedHistCollection &hist, common::HistogramCuts const &cut,
|
||||
common::Span<FeatureType const> feature_types, const RegTree &tree,
|
||||
std::vector<CPUExpandEntry> *p_entries) {
|
||||
auto n_threads = ctx_->Threads();
|
||||
@@ -623,7 +624,7 @@ class HistMultiEvaluator {
|
||||
}
|
||||
|
||||
public:
|
||||
void EvaluateSplits(RegTree const &tree, common::Span<const common::HistCollection *> hist,
|
||||
void EvaluateSplits(RegTree const &tree, common::Span<const BoundedHistCollection *> hist,
|
||||
common::HistogramCuts const &cut, std::vector<MultiExpandEntry> *p_entries) {
|
||||
auto &entries = *p_entries;
|
||||
std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> features(entries.size());
|
||||
|
||||
@@ -18,8 +18,8 @@ namespace xgboost::tree {
|
||||
*/
|
||||
template <typename Impl>
|
||||
struct ExpandEntryImpl {
|
||||
bst_node_t nid;
|
||||
bst_node_t depth;
|
||||
bst_node_t nid{0};
|
||||
bst_node_t depth{0};
|
||||
|
||||
[[nodiscard]] float GetLossChange() const {
|
||||
return static_cast<Impl const*>(this)->split.loss_chg;
|
||||
|
||||
109
src/tree/hist/hist_cache.h
Normal file
109
src/tree/hist/hist_cache.h
Normal file
@@ -0,0 +1,109 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost Contributors
|
||||
*/
|
||||
#ifndef XGBOOST_TREE_HIST_HIST_CACHE_H_
|
||||
#define XGBOOST_TREE_HIST_HIST_CACHE_H_
|
||||
#include <cstddef> // for size_t
|
||||
#include <map> // for map
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../common/hist_util.h" // for GHistRow, ConstGHistRow
|
||||
#include "xgboost/base.h" // for bst_node_t, bst_bin_t
|
||||
#include "xgboost/logging.h" // for CHECK_GT
|
||||
#include "xgboost/span.h" // for Span
|
||||
|
||||
namespace xgboost::tree {
|
||||
/**
|
||||
* @brief A persistent cache for CPU histogram.
|
||||
*
|
||||
* The size of the cache is first bounded by the `Driver` class then by this cache
|
||||
* implementaiton. The former limits the number of nodes that can be built for each node
|
||||
* batch, while this cache limits the number of all nodes up to the size of
|
||||
* max(|node_batch|, n_cached_node).
|
||||
*
|
||||
* The caller is responsible for clearing up the cache as it needs to rearrange the
|
||||
* nodes before making overflowed allocations. The strcut only reports whether the size
|
||||
* limit has benn reached.
|
||||
*/
|
||||
class BoundedHistCollection {
|
||||
// maps node index to offset in `data_`.
|
||||
std::map<bst_node_t, std::size_t> node_map_;
|
||||
// currently allocated bins, used for tracking consistentcy.
|
||||
std::size_t current_size_{0};
|
||||
|
||||
// stores the histograms in a contiguous buffer
|
||||
std::vector<GradientPairPrecise> data_;
|
||||
|
||||
// number of histogram bins across all features
|
||||
bst_bin_t n_total_bins_{0};
|
||||
// limits the number of nodes that can be in the cache for each tree
|
||||
std::size_t n_cached_nodes_{0};
|
||||
// whether the tree has grown beyond the cache limit
|
||||
bool has_exceeded_{false};
|
||||
|
||||
public:
|
||||
common::GHistRow operator[](std::size_t idx) {
|
||||
auto offset = node_map_.at(idx);
|
||||
return common::Span{data_.data(), data_.size()}.subspan(offset, n_total_bins_);
|
||||
}
|
||||
common::ConstGHistRow operator[](std::size_t idx) const {
|
||||
auto offset = node_map_.at(idx);
|
||||
return common::Span{data_.data(), data_.size()}.subspan(offset, n_total_bins_);
|
||||
}
|
||||
void Reset(bst_bin_t n_total_bins, std::size_t n_cached_nodes) {
|
||||
n_total_bins_ = n_total_bins;
|
||||
n_cached_nodes_ = n_cached_nodes;
|
||||
this->Clear(false);
|
||||
}
|
||||
/**
|
||||
* @brief Clear the cache, mark whether the cache is exceeded the limit.
|
||||
*/
|
||||
void Clear(bool exceeded) {
|
||||
node_map_.clear();
|
||||
current_size_ = 0;
|
||||
has_exceeded_ = exceeded;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool CanHost(common::Span<bst_node_t const> nodes_to_build,
|
||||
common::Span<bst_node_t const> nodes_to_sub) const {
|
||||
auto n_new_nodes = nodes_to_build.size() + nodes_to_sub.size();
|
||||
return n_new_nodes + node_map_.size() <= n_cached_nodes_;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Allocate histogram buffers for all nodes.
|
||||
*
|
||||
* The resulting histogram buffer is contiguous for all nodes in the order of
|
||||
* allocation.
|
||||
*/
|
||||
void AllocateHistograms(common::Span<bst_node_t const> nodes_to_build,
|
||||
common::Span<bst_node_t const> nodes_to_sub) {
|
||||
auto n_new_nodes = nodes_to_build.size() + nodes_to_sub.size();
|
||||
auto alloc_size = n_new_nodes * n_total_bins_;
|
||||
auto new_size = alloc_size + current_size_;
|
||||
if (new_size > data_.size()) {
|
||||
data_.resize(new_size);
|
||||
}
|
||||
for (auto nidx : nodes_to_build) {
|
||||
node_map_[nidx] = current_size_;
|
||||
current_size_ += n_total_bins_;
|
||||
}
|
||||
for (auto nidx : nodes_to_sub) {
|
||||
node_map_[nidx] = current_size_;
|
||||
current_size_ += n_total_bins_;
|
||||
}
|
||||
CHECK_EQ(current_size_, new_size);
|
||||
}
|
||||
void AllocateHistograms(std::vector<bst_node_t> const& nodes) {
|
||||
this->AllocateHistograms(common::Span<bst_node_t const>{nodes},
|
||||
common::Span<bst_node_t const>{});
|
||||
}
|
||||
|
||||
[[nodiscard]] bool HasExceeded() const { return has_exceeded_; }
|
||||
[[nodiscard]] bool HistogramExists(bst_node_t nidx) const {
|
||||
return node_map_.find(nidx) != node_map_.cend();
|
||||
}
|
||||
[[nodiscard]] std::size_t Size() const { return current_size_; }
|
||||
};
|
||||
} // namespace xgboost::tree
|
||||
#endif // XGBOOST_TREE_HIST_HIST_CACHE_H_
|
||||
63
src/tree/hist/histogram.cc
Normal file
63
src/tree/hist/histogram.cc
Normal file
@@ -0,0 +1,63 @@
|
||||
/**
|
||||
* Copyright 2023 by XGBoost Contributors
|
||||
*/
|
||||
#include "histogram.h"
|
||||
|
||||
#include <cstddef> // for size_t
|
||||
#include <numeric> // for accumulate
|
||||
#include <utility> // for swap
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../common/transform_iterator.h" // for MakeIndexTransformIter
|
||||
#include "expand_entry.h" // for MultiExpandEntry, CPUExpandEntry
|
||||
#include "xgboost/logging.h" // for CHECK_NE
|
||||
#include "xgboost/span.h" // for Span
|
||||
#include "xgboost/tree_model.h" // for RegTree
|
||||
|
||||
namespace xgboost::tree {
|
||||
void AssignNodes(RegTree const *p_tree, std::vector<MultiExpandEntry> const &valid_candidates,
|
||||
common::Span<bst_node_t> nodes_to_build, common::Span<bst_node_t> nodes_to_sub) {
|
||||
CHECK_EQ(nodes_to_build.size(), valid_candidates.size());
|
||||
|
||||
std::size_t n_idx = 0;
|
||||
for (auto const &c : valid_candidates) {
|
||||
auto left_nidx = p_tree->LeftChild(c.nid);
|
||||
auto right_nidx = p_tree->RightChild(c.nid);
|
||||
|
||||
auto build_nidx = left_nidx;
|
||||
auto subtract_nidx = right_nidx;
|
||||
auto lit =
|
||||
common::MakeIndexTransformIter([&](auto i) { return c.split.left_sum[i].GetHess(); });
|
||||
auto left_sum = std::accumulate(lit, lit + c.split.left_sum.size(), .0);
|
||||
auto rit =
|
||||
common::MakeIndexTransformIter([&](auto i) { return c.split.right_sum[i].GetHess(); });
|
||||
auto right_sum = std::accumulate(rit, rit + c.split.right_sum.size(), .0);
|
||||
auto fewer_right = right_sum < left_sum;
|
||||
if (fewer_right) {
|
||||
std::swap(build_nidx, subtract_nidx);
|
||||
}
|
||||
nodes_to_build[n_idx] = build_nidx;
|
||||
nodes_to_sub[n_idx] = subtract_nidx;
|
||||
++n_idx;
|
||||
}
|
||||
}
|
||||
|
||||
void AssignNodes(RegTree const *p_tree, std::vector<CPUExpandEntry> const &candidates,
|
||||
common::Span<bst_node_t> nodes_to_build, common::Span<bst_node_t> nodes_to_sub) {
|
||||
std::size_t n_idx = 0;
|
||||
for (auto const &c : candidates) {
|
||||
auto left_nidx = (*p_tree)[c.nid].LeftChild();
|
||||
auto right_nidx = (*p_tree)[c.nid].RightChild();
|
||||
auto fewer_right = c.split.right_sum.GetHess() < c.split.left_sum.GetHess();
|
||||
|
||||
auto build_nidx = left_nidx;
|
||||
auto subtract_nidx = right_nidx;
|
||||
if (fewer_right) {
|
||||
std::swap(build_nidx, subtract_nidx);
|
||||
}
|
||||
nodes_to_build[n_idx] = build_nidx;
|
||||
nodes_to_sub[n_idx] = subtract_nidx;
|
||||
++n_idx;
|
||||
}
|
||||
}
|
||||
} // namespace xgboost::tree
|
||||
@@ -4,80 +4,85 @@
|
||||
#ifndef XGBOOST_TREE_HIST_HISTOGRAM_H_
|
||||
#define XGBOOST_TREE_HIST_HISTOGRAM_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
#include <algorithm> // for max
|
||||
#include <cstddef> // for size_t
|
||||
#include <cstdint> // for int32_t
|
||||
#include <functional> // for function
|
||||
#include <utility> // for move
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../collective/communicator-inl.h"
|
||||
#include "../../common/hist_util.h"
|
||||
#include "../../data/gradient_index.h"
|
||||
#include "expand_entry.h"
|
||||
#include "xgboost/tree_model.h" // for RegTree
|
||||
#include "../../collective/communicator-inl.h" // for Allreduce
|
||||
#include "../../collective/communicator.h" // for Operation
|
||||
#include "../../common/hist_util.h" // for GHistRow, ParallelGHi...
|
||||
#include "../../common/row_set.h" // for RowSetCollection
|
||||
#include "../../common/threading_utils.h" // for ParallelFor2d, Range1d, BlockedSpace2d
|
||||
#include "../../data/gradient_index.h" // for GHistIndexMatrix
|
||||
#include "expand_entry.h" // for MultiExpandEntry, CPUExpandEntry
|
||||
#include "hist_cache.h" // for BoundedHistCollection
|
||||
#include "param.h" // for HistMakerTrainParam
|
||||
#include "xgboost/base.h" // for bst_node_t, bst_target_t, bst_bin_t
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/data.h" // for BatchIterator, BatchSet
|
||||
#include "xgboost/linalg.h" // for MatrixView, All, Vect...
|
||||
#include "xgboost/logging.h" // for CHECK_GE
|
||||
#include "xgboost/span.h" // for Span
|
||||
#include "xgboost/tree_model.h" // for RegTree
|
||||
|
||||
namespace xgboost::tree {
|
||||
template <typename ExpandEntry>
|
||||
/**
|
||||
* @brief Decide which node as the build node for multi-target trees.
|
||||
*/
|
||||
void AssignNodes(RegTree const *p_tree, std::vector<MultiExpandEntry> const &valid_candidates,
|
||||
common::Span<bst_node_t> nodes_to_build, common::Span<bst_node_t> nodes_to_sub);
|
||||
|
||||
/**
|
||||
* @brief Decide which node as the build node.
|
||||
*/
|
||||
void AssignNodes(RegTree const *p_tree, std::vector<CPUExpandEntry> const &candidates,
|
||||
common::Span<bst_node_t> nodes_to_build, common::Span<bst_node_t> nodes_to_sub);
|
||||
|
||||
class HistogramBuilder {
|
||||
/*! \brief culmulative histogram of gradients. */
|
||||
common::HistCollection hist_;
|
||||
BoundedHistCollection hist_;
|
||||
common::ParallelGHistBuilder buffer_;
|
||||
BatchParam param_;
|
||||
int32_t n_threads_{-1};
|
||||
size_t n_batches_{0};
|
||||
// Whether XGBoost is running in distributed environment.
|
||||
bool is_distributed_{false};
|
||||
bool is_col_split_{false};
|
||||
|
||||
public:
|
||||
/**
|
||||
* \param total_bins Total number of bins across all features
|
||||
* \param max_bin_per_feat Maximum number of bins per feature, same as the `max_bin`
|
||||
* training parameter.
|
||||
* \param n_threads Number of threads.
|
||||
* \param is_distributed Mostly used for testing to allow injecting parameters instead
|
||||
* @brief Reset the builder, should be called before growing a new tree.
|
||||
*
|
||||
* @param total_bins Total number of bins across all features
|
||||
* @param is_distributed Mostly used for testing to allow injecting parameters instead
|
||||
* of using global rabit variable.
|
||||
*/
|
||||
void Reset(uint32_t total_bins, BatchParam p, int32_t n_threads, size_t n_batches,
|
||||
bool is_distributed, bool is_col_split) {
|
||||
CHECK_GE(n_threads, 1);
|
||||
n_threads_ = n_threads;
|
||||
n_batches_ = n_batches;
|
||||
void Reset(Context const *ctx, bst_bin_t total_bins, BatchParam const &p, bool is_distributed,
|
||||
bool is_col_split, HistMakerTrainParam const *param) {
|
||||
n_threads_ = ctx->Threads();
|
||||
param_ = p;
|
||||
hist_.Init(total_bins);
|
||||
hist_.Reset(total_bins, param->internal_max_cached_hist_node);
|
||||
buffer_.Init(total_bins);
|
||||
is_distributed_ = is_distributed;
|
||||
is_col_split_ = is_col_split;
|
||||
}
|
||||
|
||||
template <bool any_missing>
|
||||
void BuildLocalHistograms(size_t page_idx, common::BlockedSpace2d space,
|
||||
GHistIndexMatrix const &gidx,
|
||||
std::vector<ExpandEntry> const &nodes_for_explicit_hist_build,
|
||||
void BuildLocalHistograms(common::BlockedSpace2d const &space, GHistIndexMatrix const &gidx,
|
||||
std::vector<bst_node_t> const &nodes_to_build,
|
||||
common::RowSetCollection const &row_set_collection,
|
||||
common::Span<GradientPair const> gpair_h, bool force_read_by_column) {
|
||||
const size_t n_nodes = nodes_for_explicit_hist_build.size();
|
||||
CHECK_GT(n_nodes, 0);
|
||||
|
||||
std::vector<common::GHistRow> target_hists(n_nodes);
|
||||
for (size_t i = 0; i < n_nodes; ++i) {
|
||||
auto const nidx = nodes_for_explicit_hist_build[i].nid;
|
||||
target_hists[i] = hist_[nidx];
|
||||
}
|
||||
if (page_idx == 0) {
|
||||
// FIXME(jiamingy): Handle different size of space. Right now we use the maximum
|
||||
// partition size for the buffer, which might not be efficient if partition sizes
|
||||
// has significant variance.
|
||||
buffer_.Reset(this->n_threads_, n_nodes, space, target_hists);
|
||||
}
|
||||
|
||||
// Parallel processing by nodes and data in each node
|
||||
common::ParallelFor2d(space, this->n_threads_, [&](size_t nid_in_set, common::Range1d r) {
|
||||
const auto tid = static_cast<unsigned>(omp_get_thread_num());
|
||||
const int32_t nid = nodes_for_explicit_hist_build[nid_in_set].nid;
|
||||
auto elem = row_set_collection[nid];
|
||||
bst_node_t const nidx = nodes_to_build[nid_in_set];
|
||||
auto elem = row_set_collection[nidx];
|
||||
auto start_of_row_set = std::min(r.begin(), elem.Size());
|
||||
auto end_of_row_set = std::min(r.end(), elem.Size());
|
||||
auto rid_set = common::RowSetCollection::Elem(elem.begin + start_of_row_set,
|
||||
elem.begin + end_of_row_set, nid);
|
||||
elem.begin + end_of_row_set, nidx);
|
||||
auto hist = buffer_.GetInitializedHist(tid, nid_in_set);
|
||||
if (rid_set.Size() != 0) {
|
||||
common::BuildHist<any_missing>(gpair_h, rid_set, gidx, hist, force_read_by_column);
|
||||
@@ -85,117 +90,143 @@ class HistogramBuilder {
|
||||
});
|
||||
}
|
||||
|
||||
void AddHistRows(int *starting_index,
|
||||
std::vector<ExpandEntry> const &nodes_for_explicit_hist_build,
|
||||
std::vector<ExpandEntry> const &nodes_for_subtraction_trick) {
|
||||
for (auto const &entry : nodes_for_explicit_hist_build) {
|
||||
int nid = entry.nid;
|
||||
this->hist_.AddHistRow(nid);
|
||||
(*starting_index) = std::min(nid, (*starting_index));
|
||||
/**
|
||||
* @brief Allocate histogram, rearrange the nodes if `rearrange` is true and the tree
|
||||
* has reached the cache size limit.
|
||||
*/
|
||||
void AddHistRows(RegTree const *p_tree, std::vector<bst_node_t> *p_nodes_to_build,
|
||||
std::vector<bst_node_t> *p_nodes_to_sub, bool rearrange) {
|
||||
CHECK(p_nodes_to_build);
|
||||
auto &nodes_to_build = *p_nodes_to_build;
|
||||
CHECK(p_nodes_to_sub);
|
||||
auto &nodes_to_sub = *p_nodes_to_sub;
|
||||
|
||||
// We first check whether the cache size is already exceeded or about to be exceeded.
|
||||
// If not, then we can allocate histograms without clearing the cache and without
|
||||
// worrying about missing parent histogram.
|
||||
//
|
||||
// Otherwise, we need to rearrange the nodes before the allocation to make sure the
|
||||
// resulting buffer is contiguous. This is to facilitate efficient allreduce.
|
||||
|
||||
bool can_host = this->hist_.CanHost(nodes_to_build, nodes_to_sub);
|
||||
// True if the tree is still within the size of cache limit. Allocate histogram as
|
||||
// usual.
|
||||
auto cache_is_valid = can_host && !this->hist_.HasExceeded();
|
||||
|
||||
if (!can_host) {
|
||||
this->hist_.Clear(true);
|
||||
}
|
||||
|
||||
for (auto const &node : nodes_for_subtraction_trick) {
|
||||
this->hist_.AddHistRow(node.nid);
|
||||
}
|
||||
this->hist_.AllocateAllData();
|
||||
}
|
||||
|
||||
/** Main entry point of this class, build histogram for tree nodes. */
|
||||
void BuildHist(size_t page_id, common::BlockedSpace2d space, GHistIndexMatrix const &gidx,
|
||||
RegTree const *p_tree, common::RowSetCollection const &row_set_collection,
|
||||
std::vector<ExpandEntry> const &nodes_for_explicit_hist_build,
|
||||
std::vector<ExpandEntry> const &nodes_for_subtraction_trick,
|
||||
common::Span<GradientPair const> gpair, bool force_read_by_column = false) {
|
||||
int starting_index = std::numeric_limits<int>::max();
|
||||
if (page_id == 0) {
|
||||
this->AddHistRows(&starting_index, nodes_for_explicit_hist_build,
|
||||
nodes_for_subtraction_trick);
|
||||
}
|
||||
if (gidx.IsDense()) {
|
||||
this->BuildLocalHistograms<false>(page_id, space, gidx, nodes_for_explicit_hist_build,
|
||||
row_set_collection, gpair, force_read_by_column);
|
||||
} else {
|
||||
this->BuildLocalHistograms<true>(page_id, space, gidx, nodes_for_explicit_hist_build,
|
||||
row_set_collection, gpair, force_read_by_column);
|
||||
}
|
||||
|
||||
CHECK_GE(n_batches_, 1);
|
||||
if (page_id != n_batches_ - 1) {
|
||||
if (!rearrange || cache_is_valid) {
|
||||
// If not rearrange, we allocate the histogram as usual, assuming the nodes have
|
||||
// been properly arranged by other builders.
|
||||
this->hist_.AllocateHistograms(nodes_to_build, nodes_to_sub);
|
||||
if (rearrange) {
|
||||
CHECK(!this->hist_.HasExceeded());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
this->SyncHistogram(p_tree, nodes_for_explicit_hist_build,
|
||||
nodes_for_subtraction_trick, starting_index);
|
||||
}
|
||||
/** same as the other build hist but handles only single batch data (in-core) */
|
||||
void BuildHist(size_t page_id, GHistIndexMatrix const &gidx, RegTree *p_tree,
|
||||
common::RowSetCollection const &row_set_collection,
|
||||
std::vector<ExpandEntry> const &nodes_for_explicit_hist_build,
|
||||
std::vector<ExpandEntry> const &nodes_for_subtraction_trick,
|
||||
common::Span<GradientPair const> gpair, bool force_read_by_column = false) {
|
||||
const size_t n_nodes = nodes_for_explicit_hist_build.size();
|
||||
// create space of size (# rows in each node)
|
||||
common::BlockedSpace2d space(
|
||||
n_nodes,
|
||||
[&](size_t nidx_in_set) {
|
||||
const int32_t nidx = nodes_for_explicit_hist_build[nidx_in_set].nid;
|
||||
return row_set_collection[nidx].Size();
|
||||
},
|
||||
256);
|
||||
this->BuildHist(page_id, space, gidx, p_tree, row_set_collection, nodes_for_explicit_hist_build,
|
||||
nodes_for_subtraction_trick, gpair, force_read_by_column);
|
||||
}
|
||||
|
||||
void SyncHistogram(RegTree const *p_tree,
|
||||
std::vector<ExpandEntry> const &nodes_for_explicit_hist_build,
|
||||
std::vector<ExpandEntry> const &nodes_for_subtraction_trick,
|
||||
int starting_index) {
|
||||
auto n_bins = buffer_.TotalBins();
|
||||
common::BlockedSpace2d space(
|
||||
nodes_for_explicit_hist_build.size(), [&](size_t) { return n_bins; }, 1024);
|
||||
CHECK(hist_.IsContiguous());
|
||||
common::ParallelFor2d(space, this->n_threads_, [&](size_t node, common::Range1d r) {
|
||||
const auto &entry = nodes_for_explicit_hist_build[node];
|
||||
auto this_hist = this->hist_[entry.nid];
|
||||
// Merging histograms from each thread into once
|
||||
this->buffer_.ReduceHist(node, r.begin(), r.end());
|
||||
});
|
||||
|
||||
if (is_distributed_ && !is_col_split_) {
|
||||
collective::Allreduce<collective::Operation::kSum>(
|
||||
reinterpret_cast<double *>(this->hist_[starting_index].data()),
|
||||
n_bins * nodes_for_explicit_hist_build.size() * 2);
|
||||
// The cache is full, parent histogram might be removed in previous iterations to
|
||||
// saved memory.
|
||||
std::vector<bst_node_t> can_subtract;
|
||||
for (auto const &v : nodes_to_sub) {
|
||||
if (this->hist_.HistogramExists(p_tree->Parent(v))) {
|
||||
// We can still use the subtraction trick for this node
|
||||
can_subtract.push_back(v);
|
||||
} else {
|
||||
// This node requires a full build
|
||||
nodes_to_build.push_back(v);
|
||||
}
|
||||
}
|
||||
|
||||
common::ParallelFor2d(space, this->n_threads_, [&](std::size_t nidx_in_set, common::Range1d r) {
|
||||
const auto &entry = nodes_for_explicit_hist_build[nidx_in_set];
|
||||
auto this_hist = this->hist_[entry.nid];
|
||||
if (!p_tree->IsRoot(entry.nid)) {
|
||||
auto const parent_id = p_tree->Parent(entry.nid);
|
||||
auto const subtraction_node_id = nodes_for_subtraction_trick[nidx_in_set].nid;
|
||||
auto parent_hist = this->hist_[parent_id];
|
||||
auto sibling_hist = this->hist_[subtraction_node_id];
|
||||
common::SubtractionHist(sibling_hist, parent_hist, this_hist, r.begin(), r.end());
|
||||
nodes_to_sub = std::move(can_subtract);
|
||||
this->hist_.AllocateHistograms(nodes_to_build, nodes_to_sub);
|
||||
}
|
||||
|
||||
/** Main entry point of this class, build histogram for tree nodes. */
|
||||
void BuildHist(std::size_t page_idx, common::BlockedSpace2d const &space,
|
||||
GHistIndexMatrix const &gidx, common::RowSetCollection const &row_set_collection,
|
||||
std::vector<bst_node_t> const &nodes_to_build,
|
||||
linalg::VectorView<GradientPair const> gpair, bool force_read_by_column = false) {
|
||||
CHECK(gpair.Contiguous());
|
||||
|
||||
if (page_idx == 0) {
|
||||
// Add the local histogram cache to the parallel buffer before processing the first page.
|
||||
auto n_nodes = nodes_to_build.size();
|
||||
std::vector<common::GHistRow> target_hists(n_nodes);
|
||||
for (size_t i = 0; i < n_nodes; ++i) {
|
||||
auto const nidx = nodes_to_build[i];
|
||||
target_hists[i] = hist_[nidx];
|
||||
}
|
||||
buffer_.Reset(this->n_threads_, n_nodes, space, target_hists);
|
||||
}
|
||||
|
||||
if (gidx.IsDense()) {
|
||||
this->BuildLocalHistograms<false>(space, gidx, nodes_to_build, row_set_collection,
|
||||
gpair.Values(), force_read_by_column);
|
||||
} else {
|
||||
this->BuildLocalHistograms<true>(space, gidx, nodes_to_build, row_set_collection,
|
||||
gpair.Values(), force_read_by_column);
|
||||
}
|
||||
}
|
||||
|
||||
void SyncHistogram(RegTree const *p_tree, std::vector<bst_node_t> const &nodes_to_build,
|
||||
std::vector<bst_node_t> const &nodes_to_trick) {
|
||||
auto n_total_bins = buffer_.TotalBins();
|
||||
common::BlockedSpace2d space(
|
||||
nodes_to_build.size(), [&](std::size_t) { return n_total_bins; }, 1024);
|
||||
common::ParallelFor2d(space, this->n_threads_, [&](size_t node, common::Range1d r) {
|
||||
// Merging histograms from each thread.
|
||||
this->buffer_.ReduceHist(node, r.begin(), r.end());
|
||||
});
|
||||
if (is_distributed_ && !is_col_split_) {
|
||||
// The cache is contiguous, we can perform allreduce for all nodes in one go.
|
||||
CHECK(!nodes_to_build.empty());
|
||||
auto first_nidx = nodes_to_build.front();
|
||||
std::size_t n = n_total_bins * nodes_to_build.size() * 2;
|
||||
collective::Allreduce<collective::Operation::kSum>(
|
||||
reinterpret_cast<double *>(this->hist_[first_nidx].data()), n);
|
||||
}
|
||||
|
||||
common::BlockedSpace2d const &subspace =
|
||||
nodes_to_trick.size() == nodes_to_build.size()
|
||||
? space
|
||||
: common::BlockedSpace2d{nodes_to_trick.size(),
|
||||
[&](std::size_t) { return n_total_bins; }, 1024};
|
||||
common::ParallelFor2d(
|
||||
subspace, this->n_threads_, [&](std::size_t nidx_in_set, common::Range1d r) {
|
||||
auto subtraction_nidx = nodes_to_trick[nidx_in_set];
|
||||
auto parent_id = p_tree->Parent(subtraction_nidx);
|
||||
auto sibling_nidx = p_tree->IsLeftChild(subtraction_nidx) ? p_tree->RightChild(parent_id)
|
||||
: p_tree->LeftChild(parent_id);
|
||||
auto sibling_hist = this->hist_[sibling_nidx];
|
||||
auto parent_hist = this->hist_[parent_id];
|
||||
auto subtract_hist = this->hist_[subtraction_nidx];
|
||||
common::SubtractionHist(subtract_hist, parent_hist, sibling_hist, r.begin(), r.end());
|
||||
});
|
||||
}
|
||||
|
||||
public:
|
||||
/* Getters for tests. */
|
||||
common::HistCollection const &Histogram() { return hist_; }
|
||||
[[nodiscard]] BoundedHistCollection const &Histogram() const { return hist_; }
|
||||
[[nodiscard]] BoundedHistCollection &Histogram() { return hist_; }
|
||||
auto &Buffer() { return buffer_; }
|
||||
};
|
||||
|
||||
// Construct a work space for building histogram. Eventually we should move this
|
||||
// function into histogram builder once hist tree method supports external memory.
|
||||
template <typename Partitioner, typename ExpandEntry = CPUExpandEntry>
|
||||
template <typename Partitioner>
|
||||
common::BlockedSpace2d ConstructHistSpace(Partitioner const &partitioners,
|
||||
std::vector<ExpandEntry> const &nodes_to_build) {
|
||||
std::vector<size_t> partition_size(nodes_to_build.size(), 0);
|
||||
std::vector<bst_node_t> const &nodes_to_build) {
|
||||
// FIXME(jiamingy): Handle different size of space. Right now we use the maximum
|
||||
// partition size for the buffer, which might not be efficient if partition sizes
|
||||
// has significant variance.
|
||||
std::vector<std::size_t> partition_size(nodes_to_build.size(), 0);
|
||||
for (auto const &partition : partitioners) {
|
||||
size_t k = 0;
|
||||
for (auto node : nodes_to_build) {
|
||||
auto n_rows_in_node = partition.Partitions()[node.nid].Size();
|
||||
for (auto nidx : nodes_to_build) {
|
||||
auto n_rows_in_node = partition.Partitions()[nidx].Size();
|
||||
partition_size[k] = std::max(partition_size[k], n_rows_in_node);
|
||||
k++;
|
||||
}
|
||||
@@ -204,5 +235,107 @@ common::BlockedSpace2d ConstructHistSpace(Partitioner const &partitioners,
|
||||
nodes_to_build.size(), [&](size_t nidx_in_set) { return partition_size[nidx_in_set]; }, 256};
|
||||
return space;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Histogram builder that can handle multiple targets.
|
||||
*/
|
||||
class MultiHistogramBuilder {
|
||||
std::vector<HistogramBuilder> target_builders_;
|
||||
Context const *ctx_;
|
||||
|
||||
public:
|
||||
/**
|
||||
* @brief Build the histogram for root node.
|
||||
*/
|
||||
template <typename Partitioner, typename ExpandEntry>
|
||||
void BuildRootHist(DMatrix *p_fmat, RegTree const *p_tree,
|
||||
std::vector<Partitioner> const &partitioners,
|
||||
linalg::MatrixView<GradientPair const> gpair, ExpandEntry const &best,
|
||||
BatchParam const ¶m, bool force_read_by_column = false) {
|
||||
auto n_targets = p_tree->NumTargets();
|
||||
CHECK_EQ(gpair.Shape(1), n_targets);
|
||||
CHECK_EQ(p_fmat->Info().num_row_, gpair.Shape(0));
|
||||
CHECK_EQ(target_builders_.size(), n_targets);
|
||||
std::vector<bst_node_t> nodes{best.nid};
|
||||
std::vector<bst_node_t> dummy_sub;
|
||||
|
||||
auto space = ConstructHistSpace(partitioners, nodes);
|
||||
for (bst_target_t t{0}; t < n_targets; ++t) {
|
||||
this->target_builders_[t].AddHistRows(p_tree, &nodes, &dummy_sub, false);
|
||||
}
|
||||
CHECK(dummy_sub.empty());
|
||||
|
||||
std::size_t page_idx{0};
|
||||
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, param)) {
|
||||
for (bst_target_t t{0}; t < n_targets; ++t) {
|
||||
auto t_gpair = gpair.Slice(linalg::All(), t);
|
||||
this->target_builders_[t].BuildHist(page_idx, space, gidx,
|
||||
partitioners[page_idx].Partitions(), nodes, t_gpair,
|
||||
force_read_by_column);
|
||||
}
|
||||
++page_idx;
|
||||
}
|
||||
|
||||
for (bst_target_t t = 0; t < p_tree->NumTargets(); ++t) {
|
||||
this->target_builders_[t].SyncHistogram(p_tree, nodes, dummy_sub);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* @brief Build histogram for left and right child of valid candidates
|
||||
*/
|
||||
template <typename Partitioner, typename ExpandEntry>
|
||||
void BuildHistLeftRight(DMatrix *p_fmat, RegTree const *p_tree,
|
||||
std::vector<Partitioner> const &partitioners,
|
||||
std::vector<ExpandEntry> const &valid_candidates,
|
||||
linalg::MatrixView<GradientPair const> gpair, BatchParam const ¶m,
|
||||
bool force_read_by_column = false) {
|
||||
std::vector<bst_node_t> nodes_to_build(valid_candidates.size());
|
||||
std::vector<bst_node_t> nodes_to_sub(valid_candidates.size());
|
||||
AssignNodes(p_tree, valid_candidates, nodes_to_build, nodes_to_sub);
|
||||
|
||||
// use the first builder for getting number of valid nodes.
|
||||
target_builders_.front().AddHistRows(p_tree, &nodes_to_build, &nodes_to_sub, true);
|
||||
CHECK_GE(nodes_to_build.size(), nodes_to_sub.size());
|
||||
CHECK_EQ(nodes_to_sub.size() + nodes_to_build.size(), valid_candidates.size() * 2);
|
||||
|
||||
// allocate storage for the rest of the builders
|
||||
for (bst_target_t t = 1; t < target_builders_.size(); ++t) {
|
||||
target_builders_[t].AddHistRows(p_tree, &nodes_to_build, &nodes_to_sub, false);
|
||||
}
|
||||
|
||||
auto space = ConstructHistSpace(partitioners, nodes_to_build);
|
||||
std::size_t page_idx{0};
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, param)) {
|
||||
CHECK_EQ(gpair.Shape(1), p_tree->NumTargets());
|
||||
for (bst_target_t t = 0; t < p_tree->NumTargets(); ++t) {
|
||||
auto t_gpair = gpair.Slice(linalg::All(), t);
|
||||
CHECK_EQ(t_gpair.Shape(0), p_fmat->Info().num_row_);
|
||||
this->target_builders_[t].BuildHist(page_idx, space, page,
|
||||
partitioners[page_idx].Partitions(), nodes_to_build,
|
||||
t_gpair, force_read_by_column);
|
||||
}
|
||||
page_idx++;
|
||||
}
|
||||
|
||||
for (bst_target_t t = 0; t < p_tree->NumTargets(); ++t) {
|
||||
this->target_builders_[t].SyncHistogram(p_tree, nodes_to_build, nodes_to_sub);
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] auto const &Histogram(bst_target_t t) const {
|
||||
return target_builders_[t].Histogram();
|
||||
}
|
||||
[[nodiscard]] auto &Histogram(bst_target_t t) { return target_builders_[t].Histogram(); }
|
||||
|
||||
void Reset(Context const *ctx, bst_bin_t total_bins, bst_target_t n_targets, BatchParam const &p,
|
||||
bool is_distributed, bool is_col_split, HistMakerTrainParam const *param) {
|
||||
ctx_ = ctx;
|
||||
target_builders_.resize(n_targets);
|
||||
CHECK_GE(n_targets, 1);
|
||||
for (auto &v : target_builders_) {
|
||||
v.Reset(ctx, total_bins, p, is_distributed, is_col_split, param);
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace xgboost::tree
|
||||
#endif // XGBOOST_TREE_HIST_HISTOGRAM_H_
|
||||
|
||||
@@ -2,12 +2,19 @@
|
||||
* Copyright 2021-2023, XGBoost Contributors
|
||||
*/
|
||||
#pragma once
|
||||
#include "xgboost/parameter.h"
|
||||
|
||||
#include <cstddef> // for size_t
|
||||
|
||||
#include "xgboost/parameter.h" // for XGBoostParameter
|
||||
#include "xgboost/tree_model.h" // for RegTree
|
||||
|
||||
namespace xgboost::tree {
|
||||
struct HistMakerTrainParam : public XGBoostParameter<HistMakerTrainParam> {
|
||||
bool debug_synchronize;
|
||||
constexpr static std::size_t DefaultNodes() { return static_cast<std::size_t>(1) << 16; }
|
||||
|
||||
bool debug_synchronize{false};
|
||||
std::size_t internal_max_cached_hist_node{DefaultNodes()};
|
||||
|
||||
void CheckTreesSynchronized(RegTree const* local_tree) const;
|
||||
|
||||
// declare parameters
|
||||
@@ -15,6 +22,10 @@ struct HistMakerTrainParam : public XGBoostParameter<HistMakerTrainParam> {
|
||||
DMLC_DECLARE_FIELD(debug_synchronize)
|
||||
.set_default(false)
|
||||
.describe("Check if all distributed tree are identical after tree construction.");
|
||||
DMLC_DECLARE_FIELD(internal_max_cached_hist_node)
|
||||
.set_default(DefaultNodes())
|
||||
.set_lower_bound(1)
|
||||
.describe("Maximum number of nodes in CPU histogram cache. Only for internal usage.");
|
||||
}
|
||||
};
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -526,7 +526,7 @@ struct SplitEntryContainer {
|
||||
* \return whether the proposed split is better and can replace current split
|
||||
*/
|
||||
template <typename GradientSumT>
|
||||
bool Update(bst_float new_loss_chg, unsigned split_index, bst_float new_split_value,
|
||||
bool Update(bst_float new_loss_chg, bst_feature_t split_index, float new_split_value,
|
||||
bool default_left, bool is_cat, GradientSumT const &left_sum,
|
||||
GradientSumT const &right_sum) {
|
||||
if (this->NeedReplace(new_loss_chg, split_index)) {
|
||||
|
||||
@@ -3,27 +3,39 @@
|
||||
*
|
||||
* \brief Implementation for the approx tree method.
|
||||
*/
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <algorithm> // for max, transform, fill_n
|
||||
#include <cstddef> // for size_t
|
||||
#include <map> // for map
|
||||
#include <memory> // for allocator, unique_ptr, make_shared, make_unique
|
||||
#include <utility> // for move
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../collective/aggregator.h"
|
||||
#include "../common/random.h"
|
||||
#include "../data/gradient_index.h"
|
||||
#include "common_row_partitioner.h"
|
||||
#include "driver.h"
|
||||
#include "hist/evaluate_splits.h"
|
||||
#include "hist/histogram.h"
|
||||
#include "hist/param.h"
|
||||
#include "hist/sampler.h" // for SampleGradient
|
||||
#include "param.h" // for HistMakerTrainParam
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/data.h"
|
||||
#include "xgboost/json.h"
|
||||
#include "xgboost/linalg.h"
|
||||
#include "xgboost/task.h" // for ObjInfo
|
||||
#include "xgboost/tree_model.h"
|
||||
#include "xgboost/tree_updater.h" // for TreeUpdater
|
||||
#include "../collective/aggregator.h" // for GlobalSum
|
||||
#include "../collective/communicator-inl.h" // for IsDistributed
|
||||
#include "../common/hist_util.h" // for HistogramCuts
|
||||
#include "../common/random.h" // for ColumnSampler
|
||||
#include "../common/timer.h" // for Monitor
|
||||
#include "../data/gradient_index.h" // for GHistIndexMatrix
|
||||
#include "common_row_partitioner.h" // for CommonRowPartitioner
|
||||
#include "dmlc/registry.h" // for DMLC_REGISTRY_FILE_TAG
|
||||
#include "driver.h" // for Driver
|
||||
#include "hist/evaluate_splits.h" // for HistEvaluator, UpdatePredictionCacheImpl
|
||||
#include "hist/expand_entry.h" // for CPUExpandEntry
|
||||
#include "hist/histogram.h" // for MultiHistogramBuilder
|
||||
#include "hist/param.h" // for HistMakerTrainParam
|
||||
#include "hist/sampler.h" // for SampleGradient
|
||||
#include "param.h" // for GradStats, TrainParam
|
||||
#include "xgboost/base.h" // for Args, GradientPair, bst_node_t, bst_bin_t
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/data.h" // for DMatrix, BatchSet, BatchIterator, MetaInfo
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
#include "xgboost/json.h" // for Object, Json, FromJson, ToJson, get
|
||||
#include "xgboost/linalg.h" // for Matrix, MakeTensorView, Empty, MatrixView
|
||||
#include "xgboost/logging.h" // for LogCheck_EQ, CHECK_EQ, CHECK
|
||||
#include "xgboost/span.h" // for Span
|
||||
#include "xgboost/task.h" // for ObjInfo
|
||||
#include "xgboost/tree_model.h" // for RegTree, RTreeNodeStat
|
||||
#include "xgboost/tree_updater.h" // for TreeUpdater, TreeUpdaterReg, XGBOOST_REGISTE...
|
||||
|
||||
namespace xgboost::tree {
|
||||
|
||||
@@ -46,7 +58,7 @@ class GloablApproxBuilder {
|
||||
HistMakerTrainParam const *hist_param_{nullptr};
|
||||
std::shared_ptr<common::ColumnSampler> col_sampler_;
|
||||
HistEvaluator evaluator_;
|
||||
HistogramBuilder<CPUExpandEntry> histogram_builder_;
|
||||
MultiHistogramBuilder histogram_builder_;
|
||||
Context const *ctx_;
|
||||
ObjInfo const *const task_;
|
||||
|
||||
@@ -59,7 +71,7 @@ class GloablApproxBuilder {
|
||||
common::HistogramCuts feature_values_;
|
||||
|
||||
public:
|
||||
void InitData(DMatrix *p_fmat, common::Span<float> hess) {
|
||||
void InitData(DMatrix *p_fmat, RegTree const *p_tree, common::Span<float> hess) {
|
||||
monitor_->Start(__func__);
|
||||
|
||||
n_batches_ = 0;
|
||||
@@ -79,8 +91,9 @@ class GloablApproxBuilder {
|
||||
n_batches_++;
|
||||
}
|
||||
|
||||
histogram_builder_.Reset(n_total_bins, BatchSpec(*param_, hess), ctx_->Threads(), n_batches_,
|
||||
collective::IsDistributed(), p_fmat->Info().IsColumnSplit());
|
||||
histogram_builder_.Reset(ctx_, n_total_bins, p_tree->NumTargets(), BatchSpec(*param_, hess),
|
||||
collective::IsDistributed(), p_fmat->Info().IsColumnSplit(),
|
||||
hist_param_);
|
||||
monitor_->Stop(__func__);
|
||||
}
|
||||
|
||||
@@ -96,20 +109,16 @@ class GloablApproxBuilder {
|
||||
}
|
||||
collective::GlobalSum(p_fmat->Info(), reinterpret_cast<double *>(&root_sum), 2);
|
||||
std::vector<CPUExpandEntry> nodes{best};
|
||||
size_t i = 0;
|
||||
auto space = ConstructHistSpace(partitioner_, nodes);
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, BatchSpec(*param_, hess))) {
|
||||
histogram_builder_.BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(), nodes,
|
||||
{}, gpair);
|
||||
i++;
|
||||
}
|
||||
this->histogram_builder_.BuildRootHist(p_fmat, p_tree, partitioner_,
|
||||
linalg::MakeTensorView(ctx_, gpair, gpair.size(), 1),
|
||||
best, BatchSpec(*param_, hess));
|
||||
|
||||
auto weight = evaluator_.InitRoot(root_sum);
|
||||
p_tree->Stat(RegTree::kRoot).sum_hess = root_sum.GetHess();
|
||||
p_tree->Stat(RegTree::kRoot).base_weight = weight;
|
||||
(*p_tree)[RegTree::kRoot].SetLeaf(param_->learning_rate * weight);
|
||||
|
||||
auto const &histograms = histogram_builder_.Histogram();
|
||||
auto const &histograms = histogram_builder_.Histogram(0);
|
||||
auto ft = p_fmat->Info().feature_types.ConstHostSpan();
|
||||
evaluator_.EvaluateSplits(histograms, feature_values_, ft, *p_tree, &nodes);
|
||||
monitor_->Stop(__func__);
|
||||
@@ -130,30 +139,9 @@ class GloablApproxBuilder {
|
||||
std::vector<CPUExpandEntry> const &valid_candidates,
|
||||
std::vector<GradientPair> const &gpair, common::Span<float> hess) {
|
||||
monitor_->Start(__func__);
|
||||
std::vector<CPUExpandEntry> nodes_to_build;
|
||||
std::vector<CPUExpandEntry> nodes_to_sub;
|
||||
|
||||
for (auto const &c : valid_candidates) {
|
||||
auto left_nidx = (*p_tree)[c.nid].LeftChild();
|
||||
auto right_nidx = (*p_tree)[c.nid].RightChild();
|
||||
auto fewer_right = c.split.right_sum.GetHess() < c.split.left_sum.GetHess();
|
||||
|
||||
auto build_nidx = left_nidx;
|
||||
auto subtract_nidx = right_nidx;
|
||||
if (fewer_right) {
|
||||
std::swap(build_nidx, subtract_nidx);
|
||||
}
|
||||
nodes_to_build.push_back(CPUExpandEntry{build_nidx, p_tree->GetDepth(build_nidx), {}});
|
||||
nodes_to_sub.push_back(CPUExpandEntry{subtract_nidx, p_tree->GetDepth(subtract_nidx), {}});
|
||||
}
|
||||
|
||||
size_t i = 0;
|
||||
auto space = ConstructHistSpace(partitioner_, nodes_to_build);
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, BatchSpec(*param_, hess))) {
|
||||
histogram_builder_.BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(),
|
||||
nodes_to_build, nodes_to_sub, gpair);
|
||||
i++;
|
||||
}
|
||||
this->histogram_builder_.BuildHistLeftRight(
|
||||
p_fmat, p_tree, partitioner_, valid_candidates,
|
||||
linalg::MakeTensorView(ctx_, gpair, gpair.size(), 1), BatchSpec(*param_, hess));
|
||||
monitor_->Stop(__func__);
|
||||
}
|
||||
|
||||
@@ -185,7 +173,7 @@ class GloablApproxBuilder {
|
||||
void UpdateTree(DMatrix *p_fmat, std::vector<GradientPair> const &gpair, common::Span<float> hess,
|
||||
RegTree *p_tree, HostDeviceVector<bst_node_t> *p_out_position) {
|
||||
p_last_tree_ = p_tree;
|
||||
this->InitData(p_fmat, hess);
|
||||
this->InitData(p_fmat, p_tree, hess);
|
||||
|
||||
Driver<CPUExpandEntry> driver(*param_);
|
||||
auto &tree = *p_tree;
|
||||
@@ -235,7 +223,7 @@ class GloablApproxBuilder {
|
||||
best_splits.push_back(l_best);
|
||||
best_splits.push_back(r_best);
|
||||
}
|
||||
auto const &histograms = histogram_builder_.Histogram();
|
||||
auto const &histograms = histogram_builder_.Histogram(0);
|
||||
auto ft = p_fmat->Info().feature_types.ConstHostSpan();
|
||||
monitor_->Start("EvaluateSplits");
|
||||
evaluator_.EvaluateSplits(histograms, feature_values_, ft, *p_tree, &best_splits);
|
||||
|
||||
@@ -7,35 +7,37 @@
|
||||
#include <algorithm> // for max, copy, transform
|
||||
#include <cstddef> // for size_t
|
||||
#include <cstdint> // for uint32_t, int32_t
|
||||
#include <memory> // for unique_ptr, allocator, make_unique, shared_ptr
|
||||
#include <numeric> // for accumulate
|
||||
#include <ostream> // for basic_ostream, char_traits, operator<<
|
||||
#include <utility> // for move, swap
|
||||
#include <exception> // for exception
|
||||
#include <memory> // for allocator, unique_ptr, make_unique, shared_ptr
|
||||
#include <ostream> // for operator<<, basic_ostream, char_traits
|
||||
#include <utility> // for move
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../collective/aggregator.h" // for GlobalSum
|
||||
#include "../collective/communicator-inl.h" // for Allreduce, IsDistributed
|
||||
#include "../common/hist_util.h" // for HistogramCuts, HistCollection
|
||||
#include "../collective/communicator-inl.h" // for IsDistributed
|
||||
#include "../common/hist_util.h" // for HistogramCuts, GHistRow
|
||||
#include "../common/linalg_op.h" // for begin, cbegin, cend
|
||||
#include "../common/random.h" // for ColumnSampler
|
||||
#include "../common/threading_utils.h" // for ParallelFor
|
||||
#include "../common/timer.h" // for Monitor
|
||||
#include "../common/transform_iterator.h" // for IndexTransformIter, MakeIndexTransformIter
|
||||
#include "../common/transform_iterator.h" // for IndexTransformIter
|
||||
#include "../data/gradient_index.h" // for GHistIndexMatrix
|
||||
#include "common_row_partitioner.h" // for CommonRowPartitioner
|
||||
#include "dmlc/registry.h" // for DMLC_REGISTRY_FILE_TAG
|
||||
#include "driver.h" // for Driver
|
||||
#include "hist/evaluate_splits.h" // for HistEvaluator, HistMultiEvaluator, UpdatePre...
|
||||
#include "hist/expand_entry.h" // for MultiExpandEntry, CPUExpandEntry
|
||||
#include "hist/histogram.h" // for HistogramBuilder, ConstructHistSpace
|
||||
#include "hist/hist_cache.h" // for BoundedHistCollection
|
||||
#include "hist/histogram.h" // for MultiHistogramBuilder
|
||||
#include "hist/param.h" // for HistMakerTrainParam
|
||||
#include "hist/sampler.h" // for SampleGradient
|
||||
#include "param.h" // for TrainParam, SplitEntryContainer, GradStats
|
||||
#include "xgboost/base.h" // for GradientPairInternal, GradientPair, bst_targ...
|
||||
#include "param.h" // for TrainParam, GradStats
|
||||
#include "xgboost/base.h" // for Args, GradientPairPrecise, GradientPair, Gra...
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/data.h" // for BatchIterator, BatchSet, DMatrix, MetaInfo
|
||||
#include "xgboost/data.h" // for BatchSet, DMatrix, BatchIterator, MetaInfo
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
#include "xgboost/linalg.h" // for All, MatrixView, TensorView, Matrix, Empty
|
||||
#include "xgboost/json.h" // for Object, Json, FromJson, ToJson, get
|
||||
#include "xgboost/linalg.h" // for MatrixView, TensorView, All, Matrix, Empty
|
||||
#include "xgboost/logging.h" // for LogCheck_EQ, CHECK_EQ, CHECK, LogCheck_GE
|
||||
#include "xgboost/span.h" // for Span, operator!=, SpanIterator
|
||||
#include "xgboost/string_view.h" // for operator<<
|
||||
@@ -120,7 +122,7 @@ class MultiTargetHistBuilder {
|
||||
std::shared_ptr<common::ColumnSampler> col_sampler_;
|
||||
std::unique_ptr<HistMultiEvaluator> evaluator_;
|
||||
// Histogram builder for each target.
|
||||
std::vector<HistogramBuilder<MultiExpandEntry>> histogram_builder_;
|
||||
std::unique_ptr<MultiHistogramBuilder> histogram_builder_;
|
||||
Context const *ctx_{nullptr};
|
||||
// Partitioner for each data batch.
|
||||
std::vector<CommonRowPartitioner> partitioner_;
|
||||
@@ -150,7 +152,6 @@ class MultiTargetHistBuilder {
|
||||
monitor_->Start(__func__);
|
||||
|
||||
p_last_fmat_ = p_fmat;
|
||||
std::size_t page_id = 0;
|
||||
bst_bin_t n_total_bins = 0;
|
||||
partitioner_.clear();
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
|
||||
@@ -160,16 +161,13 @@ class MultiTargetHistBuilder {
|
||||
CHECK_EQ(n_total_bins, page.cut.TotalBins());
|
||||
}
|
||||
partitioner_.emplace_back(ctx_, page.Size(), page.base_rowid, p_fmat->Info().IsColumnSplit());
|
||||
page_id++;
|
||||
}
|
||||
|
||||
bst_target_t n_targets = p_tree->NumTargets();
|
||||
histogram_builder_.clear();
|
||||
for (std::size_t i = 0; i < n_targets; ++i) {
|
||||
histogram_builder_.emplace_back();
|
||||
histogram_builder_.back().Reset(n_total_bins, HistBatch(param_), ctx_->Threads(), page_id,
|
||||
collective::IsDistributed(), p_fmat->Info().IsColumnSplit());
|
||||
}
|
||||
histogram_builder_ = std::make_unique<MultiHistogramBuilder>();
|
||||
histogram_builder_->Reset(ctx_, n_total_bins, n_targets, HistBatch(param_),
|
||||
collective::IsDistributed(), p_fmat->Info().IsColumnSplit(),
|
||||
hist_param_);
|
||||
|
||||
evaluator_ = std::make_unique<HistMultiEvaluator>(ctx_, p_fmat->Info(), param_, col_sampler_);
|
||||
p_last_tree_ = p_tree;
|
||||
@@ -204,17 +202,7 @@ class MultiTargetHistBuilder {
|
||||
collective::GlobalSum(p_fmat->Info(), reinterpret_cast<double *>(root_sum.Values().data()),
|
||||
root_sum.Size() * 2);
|
||||
|
||||
std::vector<MultiExpandEntry> nodes{best};
|
||||
std::size_t i = 0;
|
||||
auto space = ConstructHistSpace(partitioner_, nodes);
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
|
||||
for (bst_target_t t{0}; t < n_targets; ++t) {
|
||||
auto t_gpair = gpair.Slice(linalg::All(), t);
|
||||
histogram_builder_[t].BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(),
|
||||
nodes, {}, t_gpair.Values());
|
||||
}
|
||||
i++;
|
||||
}
|
||||
histogram_builder_->BuildRootHist(p_fmat, p_tree, partitioner_, gpair, best, HistBatch(param_));
|
||||
|
||||
auto weight = evaluator_->InitRoot(root_sum);
|
||||
auto weight_t = weight.HostView();
|
||||
@@ -222,9 +210,10 @@ class MultiTargetHistBuilder {
|
||||
[&](float w) { return w * param_->learning_rate; });
|
||||
|
||||
p_tree->SetLeaf(RegTree::kRoot, weight_t);
|
||||
std::vector<common::HistCollection const *> hists;
|
||||
std::vector<BoundedHistCollection const *> hists;
|
||||
std::vector<MultiExpandEntry> nodes{{RegTree::kRoot, 0}};
|
||||
for (bst_target_t t{0}; t < p_tree->NumTargets(); ++t) {
|
||||
hists.push_back(&histogram_builder_[t].Histogram());
|
||||
hists.push_back(&(*histogram_builder_).Histogram(t));
|
||||
}
|
||||
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
|
||||
evaluator_->EvaluateSplits(*p_tree, hists, gmat.cut, &nodes);
|
||||
@@ -239,50 +228,17 @@ class MultiTargetHistBuilder {
|
||||
std::vector<MultiExpandEntry> const &valid_candidates,
|
||||
linalg::MatrixView<GradientPair const> gpair) {
|
||||
monitor_->Start(__func__);
|
||||
std::vector<MultiExpandEntry> nodes_to_build;
|
||||
std::vector<MultiExpandEntry> nodes_to_sub;
|
||||
|
||||
for (auto const &c : valid_candidates) {
|
||||
auto left_nidx = p_tree->LeftChild(c.nid);
|
||||
auto right_nidx = p_tree->RightChild(c.nid);
|
||||
|
||||
auto build_nidx = left_nidx;
|
||||
auto subtract_nidx = right_nidx;
|
||||
auto lit =
|
||||
common::MakeIndexTransformIter([&](auto i) { return c.split.left_sum[i].GetHess(); });
|
||||
auto left_sum = std::accumulate(lit, lit + c.split.left_sum.size(), .0);
|
||||
auto rit =
|
||||
common::MakeIndexTransformIter([&](auto i) { return c.split.right_sum[i].GetHess(); });
|
||||
auto right_sum = std::accumulate(rit, rit + c.split.right_sum.size(), .0);
|
||||
auto fewer_right = right_sum < left_sum;
|
||||
if (fewer_right) {
|
||||
std::swap(build_nidx, subtract_nidx);
|
||||
}
|
||||
nodes_to_build.emplace_back(build_nidx, p_tree->GetDepth(build_nidx));
|
||||
nodes_to_sub.emplace_back(subtract_nidx, p_tree->GetDepth(subtract_nidx));
|
||||
}
|
||||
|
||||
std::size_t i = 0;
|
||||
auto space = ConstructHistSpace(partitioner_, nodes_to_build);
|
||||
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
|
||||
for (std::size_t t = 0; t < p_tree->NumTargets(); ++t) {
|
||||
auto t_gpair = gpair.Slice(linalg::All(), t);
|
||||
// Make sure the gradient matrix is f-order.
|
||||
CHECK(t_gpair.Contiguous());
|
||||
histogram_builder_[t].BuildHist(i, space, page, p_tree, partitioner_.at(i).Partitions(),
|
||||
nodes_to_build, nodes_to_sub, t_gpair.Values());
|
||||
}
|
||||
i++;
|
||||
}
|
||||
histogram_builder_->BuildHistLeftRight(p_fmat, p_tree, partitioner_, valid_candidates, gpair,
|
||||
HistBatch(param_));
|
||||
monitor_->Stop(__func__);
|
||||
}
|
||||
|
||||
void EvaluateSplits(DMatrix *p_fmat, RegTree const *p_tree,
|
||||
std::vector<MultiExpandEntry> *best_splits) {
|
||||
monitor_->Start(__func__);
|
||||
std::vector<common::HistCollection const *> hists;
|
||||
std::vector<BoundedHistCollection const *> hists;
|
||||
for (bst_target_t t{0}; t < p_tree->NumTargets(); ++t) {
|
||||
hists.push_back(&histogram_builder_[t].Histogram());
|
||||
hists.push_back(&(*histogram_builder_).Histogram(t));
|
||||
}
|
||||
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
|
||||
evaluator_->EvaluateSplits(*p_tree, hists, gmat.cut, best_splits);
|
||||
@@ -349,7 +305,7 @@ class HistUpdater {
|
||||
const RegTree *p_last_tree_{nullptr};
|
||||
DMatrix const *const p_last_fmat_{nullptr};
|
||||
|
||||
std::unique_ptr<HistogramBuilder<CPUExpandEntry>> histogram_builder_;
|
||||
std::unique_ptr<MultiHistogramBuilder> histogram_builder_;
|
||||
ObjInfo const *task_{nullptr};
|
||||
// Context for number of threads
|
||||
Context const *ctx_{nullptr};
|
||||
@@ -364,7 +320,7 @@ class HistUpdater {
|
||||
col_sampler_{std::move(column_sampler)},
|
||||
evaluator_{std::make_unique<HistEvaluator>(ctx, param, fmat->Info(), col_sampler_)},
|
||||
p_last_fmat_(fmat),
|
||||
histogram_builder_{new HistogramBuilder<CPUExpandEntry>},
|
||||
histogram_builder_{new MultiHistogramBuilder},
|
||||
task_{task},
|
||||
ctx_{ctx} {
|
||||
monitor_->Init(__func__);
|
||||
@@ -387,7 +343,6 @@ class HistUpdater {
|
||||
// initialize temp data structure
|
||||
void InitData(DMatrix *fmat, RegTree const *p_tree) {
|
||||
monitor_->Start(__func__);
|
||||
std::size_t page_id{0};
|
||||
bst_bin_t n_total_bins{0};
|
||||
partitioner_.clear();
|
||||
for (auto const &page : fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
|
||||
@@ -398,10 +353,9 @@ class HistUpdater {
|
||||
}
|
||||
partitioner_.emplace_back(this->ctx_, page.Size(), page.base_rowid,
|
||||
fmat->Info().IsColumnSplit());
|
||||
++page_id;
|
||||
}
|
||||
histogram_builder_->Reset(n_total_bins, HistBatch(param_), ctx_->Threads(), page_id,
|
||||
collective::IsDistributed(), fmat->Info().IsColumnSplit());
|
||||
histogram_builder_->Reset(ctx_, n_total_bins, 1, HistBatch(param_), collective::IsDistributed(),
|
||||
fmat->Info().IsColumnSplit(), hist_param_);
|
||||
evaluator_ = std::make_unique<HistEvaluator>(ctx_, this->param_, fmat->Info(), col_sampler_);
|
||||
p_last_tree_ = p_tree;
|
||||
monitor_->Stop(__func__);
|
||||
@@ -410,7 +364,7 @@ class HistUpdater {
|
||||
void EvaluateSplits(DMatrix *p_fmat, RegTree const *p_tree,
|
||||
std::vector<CPUExpandEntry> *best_splits) {
|
||||
monitor_->Start(__func__);
|
||||
auto const &histograms = histogram_builder_->Histogram();
|
||||
auto const &histograms = histogram_builder_->Histogram(0);
|
||||
auto ft = p_fmat->Info().feature_types.ConstHostSpan();
|
||||
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
|
||||
evaluator_->EvaluateSplits(histograms, gmat.cut, ft, *p_tree, best_splits);
|
||||
@@ -428,16 +382,8 @@ class HistUpdater {
|
||||
monitor_->Start(__func__);
|
||||
CPUExpandEntry node(RegTree::kRoot, p_tree->GetDepth(0));
|
||||
|
||||
std::size_t page_id = 0;
|
||||
auto space = ConstructHistSpace(partitioner_, {node});
|
||||
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
|
||||
std::vector<CPUExpandEntry> nodes_to_build{node};
|
||||
std::vector<CPUExpandEntry> nodes_to_sub;
|
||||
this->histogram_builder_->BuildHist(page_id, space, gidx, p_tree,
|
||||
partitioner_.at(page_id).Partitions(), nodes_to_build,
|
||||
nodes_to_sub, gpair.Slice(linalg::All(), 0).Values());
|
||||
++page_id;
|
||||
}
|
||||
this->histogram_builder_->BuildRootHist(p_fmat, p_tree, partitioner_, gpair, node,
|
||||
HistBatch(param_));
|
||||
|
||||
{
|
||||
GradientPairPrecise grad_stat;
|
||||
@@ -451,7 +397,7 @@ class HistUpdater {
|
||||
CHECK_GE(row_ptr.size(), 2);
|
||||
std::uint32_t const ibegin = row_ptr[0];
|
||||
std::uint32_t const iend = row_ptr[1];
|
||||
auto hist = this->histogram_builder_->Histogram()[RegTree::kRoot];
|
||||
auto hist = this->histogram_builder_->Histogram(0)[RegTree::kRoot];
|
||||
auto begin = hist.data();
|
||||
for (std::uint32_t i = ibegin; i < iend; ++i) {
|
||||
GradientPairPrecise const &et = begin[i];
|
||||
@@ -474,7 +420,7 @@ class HistUpdater {
|
||||
monitor_->Start("EvaluateSplits");
|
||||
auto ft = p_fmat->Info().feature_types.ConstHostSpan();
|
||||
for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
|
||||
evaluator_->EvaluateSplits(histogram_builder_->Histogram(), gmat.cut, ft, *p_tree,
|
||||
evaluator_->EvaluateSplits(histogram_builder_->Histogram(0), gmat.cut, ft, *p_tree,
|
||||
&entries);
|
||||
break;
|
||||
}
|
||||
@@ -490,33 +436,8 @@ class HistUpdater {
|
||||
std::vector<CPUExpandEntry> const &valid_candidates,
|
||||
linalg::MatrixView<GradientPair const> gpair) {
|
||||
monitor_->Start(__func__);
|
||||
std::vector<CPUExpandEntry> nodes_to_build(valid_candidates.size());
|
||||
std::vector<CPUExpandEntry> nodes_to_sub(valid_candidates.size());
|
||||
|
||||
std::size_t n_idx = 0;
|
||||
for (auto const &c : valid_candidates) {
|
||||
auto left_nidx = (*p_tree)[c.nid].LeftChild();
|
||||
auto right_nidx = (*p_tree)[c.nid].RightChild();
|
||||
auto fewer_right = c.split.right_sum.GetHess() < c.split.left_sum.GetHess();
|
||||
|
||||
auto build_nidx = left_nidx;
|
||||
auto subtract_nidx = right_nidx;
|
||||
if (fewer_right) {
|
||||
std::swap(build_nidx, subtract_nidx);
|
||||
}
|
||||
nodes_to_build[n_idx] = CPUExpandEntry{build_nidx, p_tree->GetDepth(build_nidx), {}};
|
||||
nodes_to_sub[n_idx] = CPUExpandEntry{subtract_nidx, p_tree->GetDepth(subtract_nidx), {}};
|
||||
n_idx++;
|
||||
}
|
||||
|
||||
std::size_t page_id{0};
|
||||
auto space = ConstructHistSpace(partitioner_, nodes_to_build);
|
||||
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
|
||||
histogram_builder_->BuildHist(page_id, space, gidx, p_tree,
|
||||
partitioner_.at(page_id).Partitions(), nodes_to_build,
|
||||
nodes_to_sub, gpair.Values());
|
||||
++page_id;
|
||||
}
|
||||
this->histogram_builder_->BuildHistLeftRight(p_fmat, p_tree, partitioner_, valid_candidates,
|
||||
gpair, HistBatch(param_));
|
||||
monitor_->Stop(__func__);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user