Bound the size of the histogram cache. (#9440)
- A new histogram collection with a limit in size. - Unify histogram building logic between hist, multi-hist, and approx.
This commit is contained in:
@@ -4,13 +4,13 @@
|
||||
#include "../test_evaluate_splits.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h> // for GradientPairPrecise, Args, Gradie...
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/data.h> // for FeatureType, DMatrix, MetaInfo
|
||||
#include <xgboost/logging.h> // for CHECK_EQ
|
||||
#include <xgboost/tree_model.h> // for RegTree, RTreeNodeStat
|
||||
#include <xgboost/base.h> // for GradientPairPrecise, Args, Gradie...
|
||||
#include <xgboost/context.h> // for Context
|
||||
#include <xgboost/data.h> // for FeatureType, DMatrix, MetaInfo
|
||||
#include <xgboost/logging.h> // for CHECK_EQ
|
||||
#include <xgboost/tree_model.h> // for RegTree, RTreeNodeStat
|
||||
|
||||
#include <memory> // for make_shared, shared_ptr, addressof
|
||||
#include <memory> // for make_shared, shared_ptr, addressof
|
||||
|
||||
#include "../../../../src/common/hist_util.h" // for HistCollection, HistogramCuts
|
||||
#include "../../../../src/common/random.h" // for ColumnSampler
|
||||
@@ -18,6 +18,8 @@
|
||||
#include "../../../../src/data/gradient_index.h" // for GHistIndexMatrix
|
||||
#include "../../../../src/tree/hist/evaluate_splits.h" // for HistEvaluator
|
||||
#include "../../../../src/tree/hist/expand_entry.h" // for CPUExpandEntry
|
||||
#include "../../../../src/tree/hist/hist_cache.h" // for BoundedHistCollection
|
||||
#include "../../../../src/tree/hist/param.h" // for HistMakerTrainParam
|
||||
#include "../../../../src/tree/param.h" // for GradStats, TrainParam
|
||||
#include "../../helpers.h" // for RandomDataGenerator, AllThreadsFo...
|
||||
|
||||
@@ -34,7 +36,7 @@ void TestEvaluateSplits(bool force_read_by_column) {
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).Seed(3).GenerateDMatrix();
|
||||
|
||||
auto evaluator = HistEvaluator{&ctx, ¶m, dmat->Info(), sampler};
|
||||
common::HistCollection hist;
|
||||
BoundedHistCollection hist;
|
||||
std::vector<GradientPair> row_gpairs = {
|
||||
{1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f},
|
||||
{0.27f, 0.29f}, {0.37f, 0.39f}, {-0.47f, 0.49f}, {0.57f, 0.59f}};
|
||||
@@ -48,9 +50,9 @@ void TestEvaluateSplits(bool force_read_by_column) {
|
||||
std::iota(row_indices.begin(), row_indices.end(), 0);
|
||||
row_set_collection.Init();
|
||||
|
||||
hist.Init(gmat.cut.Ptrs().back());
|
||||
hist.AddHistRow(0);
|
||||
hist.AllocateAllData();
|
||||
HistMakerTrainParam hist_param;
|
||||
hist.Reset(gmat.cut.Ptrs().back(), hist_param.internal_max_cached_hist_node);
|
||||
hist.AllocateHistograms({0});
|
||||
common::BuildHist<false>(row_gpairs, row_set_collection[0], gmat, hist[0], force_read_by_column);
|
||||
|
||||
// Compute total gradient for all data points
|
||||
@@ -111,13 +113,13 @@ TEST(HistMultiEvaluator, Evaluate) {
|
||||
RandomDataGenerator{n_samples, n_features, 0.5}.Targets(n_targets).GenerateDMatrix(true);
|
||||
|
||||
HistMultiEvaluator evaluator{&ctx, p_fmat->Info(), ¶m, sampler};
|
||||
std::vector<common::HistCollection> histogram(n_targets);
|
||||
HistMakerTrainParam hist_param;
|
||||
std::vector<BoundedHistCollection> histogram(n_targets);
|
||||
linalg::Vector<GradientPairPrecise> root_sum({2}, Context::kCpuId);
|
||||
for (bst_target_t t{0}; t < n_targets; ++t) {
|
||||
auto &hist = histogram[t];
|
||||
hist.Init(n_bins * n_features);
|
||||
hist.AddHistRow(0);
|
||||
hist.AllocateAllData();
|
||||
hist.Reset(n_bins * n_features, hist_param.internal_max_cached_hist_node);
|
||||
hist.AllocateHistograms({0});
|
||||
auto node_hist = hist[0];
|
||||
node_hist[0] = {-0.5, 0.5};
|
||||
node_hist[1] = {2.0, 0.5};
|
||||
@@ -143,7 +145,7 @@ TEST(HistMultiEvaluator, Evaluate) {
|
||||
|
||||
std::vector<MultiExpandEntry> entries(1, {/*nidx=*/0, /*depth=*/0});
|
||||
|
||||
std::vector<common::HistCollection const *> ptrs;
|
||||
std::vector<BoundedHistCollection const *> ptrs;
|
||||
std::transform(histogram.cbegin(), histogram.cend(), std::back_inserter(ptrs),
|
||||
[](auto const &h) { return std::addressof(h); });
|
||||
|
||||
@@ -225,16 +227,16 @@ auto CompareOneHotAndPartition(bool onehot) {
|
||||
auto sampler = std::make_shared<common::ColumnSampler>();
|
||||
auto evaluator = HistEvaluator{&ctx, ¶m, dmat->Info(), sampler};
|
||||
std::vector<CPUExpandEntry> entries(1);
|
||||
HistMakerTrainParam hist_param;
|
||||
|
||||
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>(&ctx, {32, param.sparse_threshold})) {
|
||||
common::HistCollection hist;
|
||||
BoundedHistCollection hist;
|
||||
|
||||
entries.front().nid = 0;
|
||||
entries.front().depth = 0;
|
||||
|
||||
hist.Init(gmat.cut.TotalBins());
|
||||
hist.AddHistRow(0);
|
||||
hist.AllocateAllData();
|
||||
hist.Reset(gmat.cut.TotalBins(), hist_param.internal_max_cached_hist_node);
|
||||
hist.AllocateHistograms({0});
|
||||
auto node_hist = hist[0];
|
||||
|
||||
CHECK_EQ(node_hist.size(), n_cats);
|
||||
@@ -261,10 +263,10 @@ TEST(HistEvaluator, Categorical) {
|
||||
}
|
||||
|
||||
TEST_F(TestCategoricalSplitWithMissing, HistEvaluator) {
|
||||
common::HistCollection hist;
|
||||
hist.Init(cuts_.TotalBins());
|
||||
hist.AddHistRow(0);
|
||||
hist.AllocateAllData();
|
||||
BoundedHistCollection hist;
|
||||
HistMakerTrainParam hist_param;
|
||||
hist.Reset(cuts_.TotalBins(), hist_param.internal_max_cached_hist_node);
|
||||
hist.AllocateHistograms({0});
|
||||
auto node_hist = hist[0];
|
||||
ASSERT_EQ(node_hist.size(), feature_histogram_.size());
|
||||
std::copy(feature_histogram_.cbegin(), feature_histogram_.cend(), node_hist.begin());
|
||||
|
||||
Reference in New Issue
Block a user