Use realloc for histogram cache and expose the cache limit. (#9455)
This commit is contained in:
@@ -5,12 +5,14 @@
|
||||
#define XGBOOST_TREE_HIST_HIST_CACHE_H_
|
||||
#include <cstddef> // for size_t
|
||||
#include <map> // for map
|
||||
#include <memory> // for unique_ptr
|
||||
#include <vector> // for vector
|
||||
|
||||
#include "../../common/hist_util.h" // for GHistRow, ConstGHistRow
|
||||
#include "xgboost/base.h" // for bst_node_t, bst_bin_t
|
||||
#include "xgboost/logging.h" // for CHECK_GT
|
||||
#include "xgboost/span.h" // for Span
|
||||
#include "../../common/hist_util.h" // for GHistRow, ConstGHistRow
|
||||
#include "../../common/ref_resource_view.h" // for ReallocVector
|
||||
#include "xgboost/base.h" // for bst_node_t, bst_bin_t
|
||||
#include "xgboost/logging.h" // for CHECK_GT
|
||||
#include "xgboost/span.h" // for Span
|
||||
|
||||
namespace xgboost::tree {
|
||||
/**
|
||||
@@ -32,7 +34,8 @@ class BoundedHistCollection {
|
||||
std::size_t current_size_{0};
|
||||
|
||||
// stores the histograms in a contiguous buffer
|
||||
std::vector<GradientPairPrecise> data_;
|
||||
using Vec = common::ReallocVector<GradientPairPrecise>;
|
||||
std::unique_ptr<Vec> data_{new Vec{}}; // nvcc 12.1 trips over std::make_unique
|
||||
|
||||
// number of histogram bins across all features
|
||||
bst_bin_t n_total_bins_{0};
|
||||
@@ -42,13 +45,14 @@ class BoundedHistCollection {
|
||||
bool has_exceeded_{false};
|
||||
|
||||
public:
|
||||
BoundedHistCollection() = default;
|
||||
common::GHistRow operator[](std::size_t idx) {
|
||||
auto offset = node_map_.at(idx);
|
||||
return common::Span{data_.data(), data_.size()}.subspan(offset, n_total_bins_);
|
||||
return common::Span{data_->data(), data_->size()}.subspan(offset, n_total_bins_);
|
||||
}
|
||||
common::ConstGHistRow operator[](std::size_t idx) const {
|
||||
auto offset = node_map_.at(idx);
|
||||
return common::Span{data_.data(), data_.size()}.subspan(offset, n_total_bins_);
|
||||
return common::Span{data_->data(), data_->size()}.subspan(offset, n_total_bins_);
|
||||
}
|
||||
void Reset(bst_bin_t n_total_bins, std::size_t n_cached_nodes) {
|
||||
n_total_bins_ = n_total_bins;
|
||||
@@ -81,8 +85,8 @@ class BoundedHistCollection {
|
||||
auto n_new_nodes = nodes_to_build.size() + nodes_to_sub.size();
|
||||
auto alloc_size = n_new_nodes * n_total_bins_;
|
||||
auto new_size = alloc_size + current_size_;
|
||||
if (new_size > data_.size()) {
|
||||
data_.resize(new_size);
|
||||
if (new_size > data_->size()) {
|
||||
data_->Resize(new_size);
|
||||
}
|
||||
for (auto nidx : nodes_to_build) {
|
||||
node_map_[nidx] = current_size_;
|
||||
|
||||
@@ -63,7 +63,7 @@ class HistogramBuilder {
|
||||
bool is_col_split, HistMakerTrainParam const *param) {
|
||||
n_threads_ = ctx->Threads();
|
||||
param_ = p;
|
||||
hist_.Reset(total_bins, param->internal_max_cached_hist_node);
|
||||
hist_.Reset(total_bins, param->max_cached_hist_node);
|
||||
buffer_.Init(total_bins);
|
||||
is_distributed_ = is_distributed;
|
||||
is_col_split_ = is_col_split;
|
||||
|
||||
@@ -13,7 +13,7 @@ struct HistMakerTrainParam : public XGBoostParameter<HistMakerTrainParam> {
|
||||
constexpr static std::size_t DefaultNodes() { return static_cast<std::size_t>(1) << 16; }
|
||||
|
||||
bool debug_synchronize{false};
|
||||
std::size_t internal_max_cached_hist_node{DefaultNodes()};
|
||||
std::size_t max_cached_hist_node{DefaultNodes()};
|
||||
|
||||
void CheckTreesSynchronized(RegTree const* local_tree) const;
|
||||
|
||||
@@ -22,7 +22,7 @@ struct HistMakerTrainParam : public XGBoostParameter<HistMakerTrainParam> {
|
||||
DMLC_DECLARE_FIELD(debug_synchronize)
|
||||
.set_default(false)
|
||||
.describe("Check if all distributed tree are identical after tree construction.");
|
||||
DMLC_DECLARE_FIELD(internal_max_cached_hist_node)
|
||||
DMLC_DECLARE_FIELD(max_cached_hist_node)
|
||||
.set_default(DefaultNodes())
|
||||
.set_lower_bound(1)
|
||||
.describe("Maximum number of nodes in CPU histogram cache. Only for internal usage.");
|
||||
|
||||
@@ -866,6 +866,9 @@ class GPUGlobalApproxMaker : public TreeUpdater {
|
||||
// Used in test to count how many configurations are performed
|
||||
LOG(DEBUG) << "[GPU Approx]: Configure";
|
||||
hist_maker_param_.UpdateAllowUnknown(args);
|
||||
if (hist_maker_param_.max_cached_hist_node != HistMakerTrainParam::DefaultNodes()) {
|
||||
LOG(WARNING) << "The `max_cached_hist_node` is ignored in GPU.";
|
||||
}
|
||||
dh::CheckComputeCapability();
|
||||
initialised_ = false;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user