Use realloc for histogram cache and expose the cache limit. (#9455)
This commit is contained in:
parent
a57371ef7c
commit
1caa93221a
@ -226,6 +226,15 @@ Parameters for Tree Booster
|
||||
- ``one_output_per_tree``: One model for each target.
|
||||
- ``multi_output_tree``: Use multi-target trees.
|
||||
|
||||
* ``max_cached_hist_node``, [default = 65536]
|
||||
|
||||
Maximum number of cached nodes for CPU histogram.
|
||||
|
||||
.. versionadded:: 2.0.0
|
||||
|
||||
- For most of the cases this parameter should not be set except for growing deep trees
|
||||
on CPU.
|
||||
|
||||
.. _cat-param:
|
||||
|
||||
Parameters for Categorical Feature
|
||||
|
||||
@ -42,7 +42,7 @@ hist_parameter_strategy = strategies.fixed_dictionaries(
|
||||
)
|
||||
|
||||
hist_cache_strategy = strategies.fixed_dictionaries(
|
||||
{"internal_max_cached_hist_node": strategies.sampled_from([1, 4, 1024, 2**31])}
|
||||
{"max_cached_hist_node": strategies.sampled_from([1, 4, 1024, 2**31])}
|
||||
)
|
||||
|
||||
hist_multi_parameter_strategy = strategies.fixed_dictionaries(
|
||||
|
||||
@ -35,6 +35,13 @@ class RefResourceView {
|
||||
size_type size_{0};
|
||||
std::shared_ptr<common::ResourceHandler> mem_{nullptr};
|
||||
|
||||
protected:
|
||||
void Init(value_type* ptr, size_type size, std::shared_ptr<common::ResourceHandler> mem) {
|
||||
ptr_ = ptr;
|
||||
size_ = size;
|
||||
mem_ = std::move(mem);
|
||||
}
|
||||
|
||||
public:
|
||||
RefResourceView(value_type* ptr, size_type n, std::shared_ptr<common::ResourceHandler> mem)
|
||||
: ptr_{ptr}, size_{n}, mem_{std::move(mem)} {
|
||||
@ -60,11 +67,11 @@ class RefResourceView {
|
||||
|
||||
RefResourceView() = default;
|
||||
RefResourceView(RefResourceView const& that) = delete;
|
||||
RefResourceView(RefResourceView&& that) = delete;
|
||||
RefResourceView& operator=(RefResourceView const& that) = delete;
|
||||
/**
|
||||
* @brief We allow move assignment for lazy initialization.
|
||||
*/
|
||||
RefResourceView(RefResourceView&& that) = default;
|
||||
RefResourceView& operator=(RefResourceView&& that) = default;
|
||||
|
||||
[[nodiscard]] size_type size() const { return size_; } // NOLINT
|
||||
@ -154,5 +161,33 @@ template <typename T>
|
||||
auto resource = std::make_shared<common::MallocResource>(n_elements * sizeof(T));
|
||||
return RefResourceView{resource->DataAs<T>(), n_elements, resource, init};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class ReallocVector : public RefResourceView<T> {
|
||||
static_assert(!std::is_reference_v<T>);
|
||||
static_assert(!std::is_const_v<T>);
|
||||
static_assert(std::is_trivially_copyable_v<T>);
|
||||
|
||||
using Upper = RefResourceView<T>;
|
||||
using size_type = typename Upper::size_type; // NOLINT
|
||||
using value_type = typename Upper::value_type; // NOLINT
|
||||
|
||||
public:
|
||||
ReallocVector() : RefResourceView<T>{MakeFixedVecWithMalloc(0, T{})} {}
|
||||
|
||||
ReallocVector(size_type n, value_type const& init)
|
||||
: RefResourceView<T>{MakeFixedVecWithMalloc(n, init)} {}
|
||||
ReallocVector(ReallocVector const& that) = delete;
|
||||
ReallocVector(ReallocVector&& that) = delete;
|
||||
ReallocVector& operator=(ReallocVector const& that) = delete;
|
||||
ReallocVector& operator=(ReallocVector&& that) = delete;
|
||||
|
||||
void Resize(typename Upper::size_type new_size) {
|
||||
auto resource = std::dynamic_pointer_cast<common::MallocResource>(this->Resource());
|
||||
CHECK(resource);
|
||||
resource->Resize(new_size * sizeof(T));
|
||||
this->Init(resource->template DataAs<T>(), new_size, resource);
|
||||
}
|
||||
};
|
||||
} // namespace xgboost::common
|
||||
#endif // XGBOOST_COMMON_REF_RESOURCE_VIEW_H_
|
||||
|
||||
@ -5,9 +5,11 @@
|
||||
#define XGBOOST_TREE_HIST_HIST_CACHE_H_
|
||||
#include <cstddef> // for size_t
|
||||
#include <map> // for map
|
||||
#include <memory> // for unique_ptr
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../common/hist_util.h" // for GHistRow, ConstGHistRow
|
||||
#include "../../common/ref_resource_view.h" // for ReallocVector
|
||||
#include "xgboost/base.h" // for bst_node_t, bst_bin_t
|
||||
#include "xgboost/logging.h" // for CHECK_GT
|
||||
#include "xgboost/span.h" // for Span
|
||||
@ -32,7 +34,8 @@ class BoundedHistCollection {
|
||||
std::size_t current_size_{0};
|
||||
|
||||
// stores the histograms in a contiguous buffer
|
||||
std::vector<GradientPairPrecise> data_;
|
||||
using Vec = common::ReallocVector<GradientPairPrecise>;
|
||||
std::unique_ptr<Vec> data_{new Vec{}}; // nvcc 12.1 trips over std::make_unique
|
||||
|
||||
// number of histogram bins across all features
|
||||
bst_bin_t n_total_bins_{0};
|
||||
@ -42,13 +45,14 @@ class BoundedHistCollection {
|
||||
bool has_exceeded_{false};
|
||||
|
||||
public:
|
||||
BoundedHistCollection() = default;
|
||||
common::GHistRow operator[](std::size_t idx) {
|
||||
auto offset = node_map_.at(idx);
|
||||
return common::Span{data_.data(), data_.size()}.subspan(offset, n_total_bins_);
|
||||
return common::Span{data_->data(), data_->size()}.subspan(offset, n_total_bins_);
|
||||
}
|
||||
common::ConstGHistRow operator[](std::size_t idx) const {
|
||||
auto offset = node_map_.at(idx);
|
||||
return common::Span{data_.data(), data_.size()}.subspan(offset, n_total_bins_);
|
||||
return common::Span{data_->data(), data_->size()}.subspan(offset, n_total_bins_);
|
||||
}
|
||||
void Reset(bst_bin_t n_total_bins, std::size_t n_cached_nodes) {
|
||||
n_total_bins_ = n_total_bins;
|
||||
@ -81,8 +85,8 @@ class BoundedHistCollection {
|
||||
auto n_new_nodes = nodes_to_build.size() + nodes_to_sub.size();
|
||||
auto alloc_size = n_new_nodes * n_total_bins_;
|
||||
auto new_size = alloc_size + current_size_;
|
||||
if (new_size > data_.size()) {
|
||||
data_.resize(new_size);
|
||||
if (new_size > data_->size()) {
|
||||
data_->Resize(new_size);
|
||||
}
|
||||
for (auto nidx : nodes_to_build) {
|
||||
node_map_[nidx] = current_size_;
|
||||
|
||||
@ -63,7 +63,7 @@ class HistogramBuilder {
|
||||
bool is_col_split, HistMakerTrainParam const *param) {
|
||||
n_threads_ = ctx->Threads();
|
||||
param_ = p;
|
||||
hist_.Reset(total_bins, param->internal_max_cached_hist_node);
|
||||
hist_.Reset(total_bins, param->max_cached_hist_node);
|
||||
buffer_.Init(total_bins);
|
||||
is_distributed_ = is_distributed;
|
||||
is_col_split_ = is_col_split;
|
||||
|
||||
@ -13,7 +13,7 @@ struct HistMakerTrainParam : public XGBoostParameter<HistMakerTrainParam> {
|
||||
constexpr static std::size_t DefaultNodes() { return static_cast<std::size_t>(1) << 16; }
|
||||
|
||||
bool debug_synchronize{false};
|
||||
std::size_t internal_max_cached_hist_node{DefaultNodes()};
|
||||
std::size_t max_cached_hist_node{DefaultNodes()};
|
||||
|
||||
void CheckTreesSynchronized(RegTree const* local_tree) const;
|
||||
|
||||
@ -22,7 +22,7 @@ struct HistMakerTrainParam : public XGBoostParameter<HistMakerTrainParam> {
|
||||
DMLC_DECLARE_FIELD(debug_synchronize)
|
||||
.set_default(false)
|
||||
.describe("Check if all distributed tree are identical after tree construction.");
|
||||
DMLC_DECLARE_FIELD(internal_max_cached_hist_node)
|
||||
DMLC_DECLARE_FIELD(max_cached_hist_node)
|
||||
.set_default(DefaultNodes())
|
||||
.set_lower_bound(1)
|
||||
.describe("Maximum number of nodes in CPU histogram cache. Only for internal usage.");
|
||||
|
||||
@ -866,6 +866,9 @@ class GPUGlobalApproxMaker : public TreeUpdater {
|
||||
// Used in test to count how many configurations are performed
|
||||
LOG(DEBUG) << "[GPU Approx]: Configure";
|
||||
hist_maker_param_.UpdateAllowUnknown(args);
|
||||
if (hist_maker_param_.max_cached_hist_node != HistMakerTrainParam::DefaultNodes()) {
|
||||
LOG(WARNING) << "The `max_cached_hist_node` is ignored in GPU.";
|
||||
}
|
||||
dh::CheckComputeCapability();
|
||||
initialised_ = false;
|
||||
|
||||
|
||||
@ -51,7 +51,7 @@ void TestEvaluateSplits(bool force_read_by_column) {
|
||||
row_set_collection.Init();
|
||||
|
||||
HistMakerTrainParam hist_param;
|
||||
hist.Reset(gmat.cut.Ptrs().back(), hist_param.internal_max_cached_hist_node);
|
||||
hist.Reset(gmat.cut.Ptrs().back(), hist_param.max_cached_hist_node);
|
||||
hist.AllocateHistograms({0});
|
||||
common::BuildHist<false>(row_gpairs, row_set_collection[0], gmat, hist[0], force_read_by_column);
|
||||
|
||||
@ -118,7 +118,7 @@ TEST(HistMultiEvaluator, Evaluate) {
|
||||
linalg::Vector<GradientPairPrecise> root_sum({2}, Context::kCpuId);
|
||||
for (bst_target_t t{0}; t < n_targets; ++t) {
|
||||
auto &hist = histogram[t];
|
||||
hist.Reset(n_bins * n_features, hist_param.internal_max_cached_hist_node);
|
||||
hist.Reset(n_bins * n_features, hist_param.max_cached_hist_node);
|
||||
hist.AllocateHistograms({0});
|
||||
auto node_hist = hist[0];
|
||||
node_hist[0] = {-0.5, 0.5};
|
||||
@ -235,7 +235,7 @@ auto CompareOneHotAndPartition(bool onehot) {
|
||||
entries.front().nid = 0;
|
||||
entries.front().depth = 0;
|
||||
|
||||
hist.Reset(gmat.cut.TotalBins(), hist_param.internal_max_cached_hist_node);
|
||||
hist.Reset(gmat.cut.TotalBins(), hist_param.max_cached_hist_node);
|
||||
hist.AllocateHistograms({0});
|
||||
auto node_hist = hist[0];
|
||||
|
||||
@ -265,7 +265,7 @@ TEST(HistEvaluator, Categorical) {
|
||||
TEST_F(TestCategoricalSplitWithMissing, HistEvaluator) {
|
||||
BoundedHistCollection hist;
|
||||
HistMakerTrainParam hist_param;
|
||||
hist.Reset(cuts_.TotalBins(), hist_param.internal_max_cached_hist_node);
|
||||
hist.Reset(cuts_.TotalBins(), hist_param.max_cached_hist_node);
|
||||
hist.AllocateHistograms({0});
|
||||
auto node_hist = hist[0];
|
||||
ASSERT_EQ(node_hist.size(), feature_histogram_.size());
|
||||
|
||||
@ -516,7 +516,7 @@ class OverflowTest : public ::testing::TestWithParam<std::tuple<bool, bool>> {
|
||||
Context ctx;
|
||||
HistMakerTrainParam hist_param;
|
||||
if (limit) {
|
||||
hist_param.Init(Args{{"internal_max_cached_hist_node", "1"}});
|
||||
hist_param.Init(Args{{"max_cached_hist_node", "1"}});
|
||||
}
|
||||
|
||||
std::shared_ptr<DMatrix> Xy =
|
||||
|
||||
@ -59,7 +59,7 @@ class TestPartitionBasedSplit : public ::testing::Test {
|
||||
cuts_.min_vals_.Resize(1);
|
||||
|
||||
HistMakerTrainParam hist_param;
|
||||
hist_.Reset(cuts_.TotalBins(), hist_param.internal_max_cached_hist_node);
|
||||
hist_.Reset(cuts_.TotalBins(), hist_param.max_cached_hist_node);
|
||||
hist_.AllocateHistograms({0});
|
||||
auto node_hist = hist_[0];
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user