Prepare external memory support for hist. (#7638)

This PR prepares the GHistIndexMatrix to host the column matrix which is used by the hist tree method by accepting sparse_threshold parameter.

Some cleanups are made to ensure the correct batch param is being passed into DMatrix along with some additional tests for correctness of SimpleDMatrix.
This commit is contained in:
Jiaming Yuan
2022-02-10 16:58:02 +08:00
committed by GitHub
parent 87c01f49d8
commit 2775c2a1ab
24 changed files with 368 additions and 201 deletions

View File

@@ -31,8 +31,7 @@ template <typename GradientSumT> void TestEvaluateSplits() {
size_t constexpr kMaxBins = 4;
// dense, no missing values
GHistIndexMatrix gmat(dmat.get(), kMaxBins, false, common::OmpGetNumThreads(0));
GHistIndexMatrix gmat(dmat.get(), kMaxBins, 0.5, false, common::OmpGetNumThreads(0));
common::RowSetCollection row_set_collection;
std::vector<size_t> &row_indices = *row_set_collection.Data();
row_indices.resize(kRows);
@@ -127,7 +126,7 @@ TEST(HistEvaluator, CategoricalPartition) {
auto evaluator = HistEvaluator<GradientSumT, CPUExpandEntry>{
param, dmat->Info(), n_threads, sampler, ObjInfo{ObjInfo::kRegression}};
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>({GenericParameter::kCpuId, 32})) {
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>({32, param.sparse_threshold})) {
common::HistCollection<GradientSumT> hist;
std::vector<CPUExpandEntry> entries(1);
@@ -212,7 +211,7 @@ auto CompareOneHotAndPartition(bool onehot) {
param, dmat->Info(), n_threads, sampler, ObjInfo{ObjInfo::kRegression}};
std::vector<CPUExpandEntry> entries(1);
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>({GenericParameter::kCpuId, 32})) {
for (auto const &gmat : dmat->GetBatches<GHistIndexMatrix>({32, param.sparse_threshold})) {
common::HistCollection<GradientSumT> hist;
entries.front().nid = 0;

View File

@@ -1,20 +1,20 @@
/*!
* Copyright 2018-2021 by Contributors
* Copyright 2018-2022 by Contributors
*/
#include <gtest/gtest.h>
#include "../../helpers.h"
#include "../../categorical_helpers.h"
#include <limits>
#include "../../../../src/common/categorical.h"
#include "../../../../src/tree/hist/histogram.h"
#include "../../../../src/tree/updater_quantile_hist.h"
#include "../../categorical_helpers.h"
#include "../../helpers.h"
namespace xgboost {
namespace tree {
namespace {
void InitRowPartitionForTest(RowSetCollection *row_set, size_t n_samples,
size_t base_rowid = 0) {
void InitRowPartitionForTest(RowSetCollection *row_set, size_t n_samples, size_t base_rowid = 0) {
auto &row_indices = *row_set->Data();
row_indices.resize(n_samples);
std::iota(row_indices.begin(), row_indices.end(), base_rowid);
@@ -33,10 +33,7 @@ void TestAddHistRows(bool is_distributed) {
int32_t constexpr kMaxBins = 4;
auto p_fmat =
RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
auto const &gmat = *(p_fmat
->GetBatches<GHistIndexMatrix>(
BatchParam{GenericParameter::kCpuId, kMaxBins})
.begin());
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(BatchParam{kMaxBins, 0.5}).begin());
RegTree tree;
@@ -49,9 +46,8 @@ void TestAddHistRows(bool is_distributed) {
nodes_for_subtraction_trick_.emplace_back(6, tree.GetDepth(6), 0.0f);
HistogramBuilder<GradientSumT, CPUExpandEntry> histogram_builder;
histogram_builder.Reset(gmat.cut.TotalBins(),
{GenericParameter::kCpuId, kMaxBins},
omp_get_max_threads(), 1, is_distributed);
histogram_builder.Reset(gmat.cut.TotalBins(), {kMaxBins, 0.5}, omp_get_max_threads(), 1,
is_distributed);
histogram_builder.AddHistRows(&starting_index, &sync_count,
nodes_for_explicit_hist_build_,
nodes_for_subtraction_trick_, &tree);
@@ -89,15 +85,11 @@ void TestSyncHist(bool is_distributed) {
auto p_fmat =
RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
auto const &gmat = *(p_fmat
->GetBatches<GHistIndexMatrix>(
BatchParam{GenericParameter::kCpuId, kMaxBins})
.begin());
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(BatchParam{kMaxBins, 0.5}).begin());
HistogramBuilder<GradientSumT, CPUExpandEntry> histogram;
uint32_t total_bins = gmat.cut.Ptrs().back();
histogram.Reset(total_bins, {GenericParameter::kCpuId, kMaxBins},
omp_get_max_threads(), 1, is_distributed);
histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed);
RowSetCollection row_set_collection_;
{
@@ -250,10 +242,7 @@ void TestBuildHistogram(bool is_distributed) {
int32_t constexpr kMaxBins = 4;
auto p_fmat =
RandomDataGenerator(kNRows, kNCols, 0.8).Seed(3).GenerateDMatrix();
auto const &gmat = *(p_fmat
->GetBatches<GHistIndexMatrix>(
BatchParam{GenericParameter::kCpuId, kMaxBins})
.begin());
auto const &gmat = *(p_fmat->GetBatches<GHistIndexMatrix>(BatchParam{kMaxBins, 0.5}).begin());
uint32_t total_bins = gmat.cut.Ptrs().back();
static double constexpr kEps = 1e-6;
@@ -263,8 +252,7 @@ void TestBuildHistogram(bool is_distributed) {
bst_node_t nid = 0;
HistogramBuilder<GradientSumT, CPUExpandEntry> histogram;
histogram.Reset(total_bins, {GenericParameter::kCpuId, kMaxBins},
omp_get_max_threads(), 1, is_distributed);
histogram.Reset(total_bins, {kMaxBins, 0.5}, omp_get_max_threads(), 1, is_distributed);
RegTree tree;
@@ -278,8 +266,7 @@ void TestBuildHistogram(bool is_distributed) {
CPUExpandEntry node(RegTree::kRoot, tree.GetDepth(0), 0.0f);
std::vector<CPUExpandEntry> nodes_for_explicit_hist_build;
nodes_for_explicit_hist_build.push_back(node);
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(
{GenericParameter::kCpuId, kMaxBins})) {
for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>({kMaxBins, 0.5})) {
histogram.BuildHist(0, gidx, &tree, row_set_collection,
nodes_for_explicit_hist_build, {}, gpair);
}
@@ -342,11 +329,9 @@ void TestHistogramCategorical(size_t n_categories) {
* Generate hist with cat data.
*/
HistogramBuilder<double, CPUExpandEntry> cat_hist;
for (auto const &gidx : cat_m->GetBatches<GHistIndexMatrix>(
{GenericParameter::kCpuId, kBins})) {
for (auto const &gidx : cat_m->GetBatches<GHistIndexMatrix>({kBins, 0.5})) {
auto total_bins = gidx.cut.TotalBins();
cat_hist.Reset(total_bins, {GenericParameter::kCpuId, kBins},
omp_get_max_threads(), 1, false);
cat_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false);
cat_hist.BuildHist(0, gidx, &tree, row_set_collection,
nodes_for_explicit_hist_build, {}, gpair.HostVector());
}
@@ -357,13 +342,10 @@ void TestHistogramCategorical(size_t n_categories) {
auto x_encoded = OneHotEncodeFeature(x, n_categories);
auto encode_m = GetDMatrixFromData(x_encoded, kRows, n_categories);
HistogramBuilder<double, CPUExpandEntry> onehot_hist;
for (auto const &gidx : encode_m->GetBatches<GHistIndexMatrix>(
{GenericParameter::kCpuId, kBins})) {
for (auto const &gidx : encode_m->GetBatches<GHistIndexMatrix>({kBins, 0.5})) {
auto total_bins = gidx.cut.TotalBins();
onehot_hist.Reset(total_bins, {GenericParameter::kCpuId, kBins},
omp_get_max_threads(), 1, false);
onehot_hist.BuildHist(0, gidx, &tree, row_set_collection,
nodes_for_explicit_hist_build, {},
onehot_hist.Reset(total_bins, {kBins, 0.5}, omp_get_max_threads(), 1, false);
onehot_hist.BuildHist(0, gidx, &tree, row_set_collection, nodes_for_explicit_hist_build, {},
gpair.HostVector());
}
@@ -378,11 +360,16 @@ TEST(CPUHistogram, Categorical) {
TestHistogramCategorical(n_categories);
}
}
TEST(CPUHistogram, ExternalMemory) {
namespace {
void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx) {
size_t constexpr kEntries = 1 << 16;
int32_t constexpr kBins = 32;
auto m = CreateSparsePageDMatrix(kEntries, "cache");
std::vector<float> hess(m->Info().num_row_, 1.0);
if (is_approx) {
batch_param.hess = hess;
}
std::vector<size_t> partition_size(1, 0);
size_t total_bins{0};
size_t n_samples{0};
@@ -401,9 +388,7 @@ TEST(CPUHistogram, ExternalMemory) {
* Multi page
*/
std::vector<RowSetCollection> rows_set;
std::vector<float> hess(m->Info().num_row_, 1.0);
for (auto const &page : m->GetBatches<GHistIndexMatrix>(
{GenericParameter::kCpuId, kBins, hess})) {
for (auto const &page : m->GetBatches<GHistIndexMatrix>(batch_param)) {
CHECK_LT(page.base_rowid, m->Info().num_row_);
auto n_rows_in_node = page.Size();
partition_size[0] = std::max(partition_size[0], n_rows_in_node);
@@ -419,12 +404,10 @@ TEST(CPUHistogram, ExternalMemory) {
1, [&](size_t nidx_in_set) { return partition_size.at(nidx_in_set); },
256};
multi_build.Reset(total_bins, {GenericParameter::kCpuId, kBins},
omp_get_max_threads(), rows_set.size(), false);
multi_build.Reset(total_bins, batch_param, common::OmpGetNumThreads(0), rows_set.size(), false);
size_t page_idx{0};
for (auto const &page : m->GetBatches<GHistIndexMatrix>(
{GenericParameter::kCpuId, kBins, hess})) {
for (auto const &page : m->GetBatches<GHistIndexMatrix>(batch_param)) {
multi_build.BuildHist(page_idx, space, page, &tree, rows_set.at(page_idx), nodes, {},
h_gpair);
++page_idx;
@@ -442,16 +425,13 @@ TEST(CPUHistogram, ExternalMemory) {
RowSetCollection row_set_collection;
InitRowPartitionForTest(&row_set_collection, n_samples);
single_build.Reset(total_bins, {GenericParameter::kCpuId, kBins},
omp_get_max_threads(), 1, false);
size_t n_batches{0};
for (auto const &page :
m->GetBatches<GHistIndexMatrix>({GenericParameter::kCpuId, kBins})) {
single_build.BuildHist(0, page, &tree, row_set_collection, nodes, {},
h_gpair);
n_batches ++;
}
ASSERT_EQ(n_batches, 1);
single_build.Reset(total_bins, batch_param, common::OmpGetNumThreads(0), 1, false);
SparsePage concat;
GHistIndexMatrix gmat;
std::vector<float> hess(m->Info().num_row_, 1.0f);
gmat.Init(m.get(), batch_param.max_bin, std::numeric_limits<double>::quiet_NaN(), false,
common::OmpGetNumThreads(0), hess);
single_build.BuildHist(0, gmat, &tree, row_set_collection, nodes, {}, h_gpair);
single_page = single_build.Histogram()[0];
}
@@ -460,5 +440,11 @@ TEST(CPUHistogram, ExternalMemory) {
ASSERT_NEAR(single_page[i].GetHess(), multi_page[i].GetHess(), kRtEps);
}
}
} // anonymous namespace
TEST(CPUHistogram, ExternalMemory) {
int32_t constexpr kBins = 256;
TestHistogramExternalMemory(BatchParam{kBins, common::Span<float>{}, false}, true);
}
} // namespace tree
} // namespace xgboost

View File

@@ -8,6 +8,19 @@
namespace xgboost {
namespace tree {
namespace {
void GetSplit(RegTree *tree, float split_value, std::vector<CPUExpandEntry> *candidates) {
tree->ExpandNode(
/*nid=*/RegTree::kRoot, /*split_index=*/0, /*split_value=*/split_value,
/*default_left=*/true, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
/*left_sum=*/0.0f,
/*right_sum=*/0.0f);
candidates->front().split.split_value = split_value;
candidates->front().split.sindex = 0;
candidates->front().split.sindex |= (1U << 31);
}
} // anonymous namespace
TEST(Approx, Partitioner) {
size_t n_samples = 1024, n_features = 1, base_rowid = 0;
ApproxRowPartitioner partitioner{n_samples, base_rowid};
@@ -20,20 +33,18 @@ TEST(Approx, Partitioner) {
ctx.InitAllowUnknown(Args{});
std::vector<CPUExpandEntry> candidates{{0, 0, 0.4}};
for (auto const &page : Xy->GetBatches<GHistIndexMatrix>({GenericParameter::kCpuId, 64})) {
bst_feature_t split_ind = 0;
auto grad = GenerateRandomGradients(n_samples);
std::vector<float> hess(grad.Size());
std::transform(grad.HostVector().cbegin(), grad.HostVector().cend(), hess.begin(),
[](auto gpair) { return gpair.GetHess(); });
for (auto const &page : Xy->GetBatches<GHistIndexMatrix>({64, hess, true})) {
bst_feature_t const split_ind = 0;
{
auto min_value = page.cut.MinValues()[split_ind];
RegTree tree;
tree.ExpandNode(
/*nid=*/0, /*split_index=*/0, /*split_value=*/min_value,
/*default_left=*/true, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
/*left_sum=*/0.0f,
/*right_sum=*/0.0f);
ApproxRowPartitioner partitioner{n_samples, base_rowid};
candidates.front().split.split_value = min_value;
candidates.front().split.sindex = 0;
candidates.front().split.sindex |= (1U << 31);
GetSplit(&tree, min_value, &candidates);
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
ASSERT_EQ(partitioner.Size(), 3);
ASSERT_EQ(partitioner[1].Size(), 0);
@@ -44,16 +55,8 @@ TEST(Approx, Partitioner) {
auto ptr = page.cut.Ptrs()[split_ind + 1];
float split_value = page.cut.Values().at(ptr / 2);
RegTree tree;
tree.ExpandNode(
/*nid=*/RegTree::kRoot, /*split_index=*/split_ind,
/*split_value=*/split_value,
/*default_left=*/true, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
/*left_sum=*/0.0f,
/*right_sum=*/0.0f);
GetSplit(&tree, split_value, &candidates);
auto left_nidx = tree[RegTree::kRoot].LeftChild();
candidates.front().split.split_value = split_value;
candidates.front().split.sindex = 0;
candidates.front().split.sindex |= (1U << 31);
partitioner.UpdatePosition(&ctx, page, candidates, &tree);
auto elem = partitioner[left_nidx];

View File

@@ -155,18 +155,19 @@ class QuantileHistMock : public QuantileHistMaker {
std::vector<GradientPair> row_gpairs =
{ {1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f},
{0.27f, 0.29f}, {0.37f, 0.39f}, {-0.47f, 0.49f}, {0.57f, 0.59f} };
size_t constexpr kMaxBins = 4;
int32_t constexpr kMaxBins = 4;
// try out different sparsity to get different number of missing values
for (double sparsity : {0.0, 0.1, 0.2}) {
// kNRows samples with kNCols features
auto dmat = RandomDataGenerator(kNRows, kNCols, sparsity).Seed(3).GenerateDMatrix();
GHistIndexMatrix gmat(dmat.get(), kMaxBins, false, common::OmpGetNumThreads(0));
float sparse_th = 0.0;
GHistIndexMatrix gmat{dmat.get(), kMaxBins, sparse_th, false, common::OmpGetNumThreads(0)};
ColumnMatrix cm;
// treat everything as dense, as this is what we intend to test here
cm.Init(gmat, 0.0, common::OmpGetNumThreads(0));
cm.Init(gmat, sparse_th, common::OmpGetNumThreads(0));
RealImpl::InitData(gmat, *dmat, tree, &row_gpairs);
const size_t num_row = dmat->Info().num_row_;
// split by feature 0
@@ -247,8 +248,8 @@ class QuantileHistMock : public QuantileHistMaker {
static size_t GetNumColumns() { return kNCols; }
void TestInitData() {
size_t constexpr kMaxBins = 4;
GHistIndexMatrix gmat(dmat_.get(), kMaxBins, false, common::OmpGetNumThreads(0));
int32_t constexpr kMaxBins = 4;
GHistIndexMatrix gmat{dmat_.get(), kMaxBins, 0.0f, false, common::OmpGetNumThreads(0)};
RegTree tree = RegTree();
tree.param.UpdateAllowUnknown(cfg_);
@@ -264,8 +265,8 @@ class QuantileHistMock : public QuantileHistMaker {
}
void TestInitDataSampling() {
size_t constexpr kMaxBins = 4;
GHistIndexMatrix gmat(dmat_.get(), kMaxBins, false, common::OmpGetNumThreads(0));
int32_t constexpr kMaxBins = 4;
GHistIndexMatrix gmat{dmat_.get(), kMaxBins, 0.0f, false, common::OmpGetNumThreads(0)};
RegTree tree = RegTree();
tree.param.UpdateAllowUnknown(cfg_);

View File

@@ -0,0 +1,124 @@
/*!
* Copyright 2022 XGBoost contributors
*/
#include <gtest/gtest.h>
#include "../../../src/data/adapter.h"
#include "../../../src/data/simple_dmatrix.h"
#include "../helpers.h"
namespace xgboost {
namespace {
class DMatrixForTest : public data::SimpleDMatrix {
size_t n_regen_{0};
public:
using SimpleDMatrix::SimpleDMatrix;
BatchSet<GHistIndexMatrix> GetGradientIndex(const BatchParam& param) override {
auto backup = this->gradient_index_;
auto iter = SimpleDMatrix::GetGradientIndex(param);
n_regen_ += (backup != this->gradient_index_);
return iter;
}
BatchSet<EllpackPage> GetEllpackBatches(const BatchParam& param) override {
auto backup = this->ellpack_page_;
auto iter = SimpleDMatrix::GetEllpackBatches(param);
n_regen_ += (backup != this->ellpack_page_);
return iter;
}
auto NumRegen() const { return n_regen_; }
void Reset() {
this->gradient_index_.reset();
this->ellpack_page_.reset();
n_regen_ = 0;
}
};
/**
* \brief Test for whether the gradient index is correctly regenerated.
*/
class RegenTest : public ::testing::Test {
protected:
std::shared_ptr<DMatrix> p_fmat_;
void SetUp() override {
size_t constexpr kRows = 256, kCols = 10;
HostDeviceVector<float> storage;
auto dense = RandomDataGenerator{kRows, kCols, 0.5}.GenerateArrayInterface(&storage);
auto adapter = data::ArrayAdapter(StringView{dense});
p_fmat_ = std::shared_ptr<DMatrix>(new DMatrixForTest{
&adapter, std::numeric_limits<float>::quiet_NaN(), common::OmpGetNumThreads(0)});
p_fmat_->Info().labels.Reshape(256, 1);
auto labels = p_fmat_->Info().labels.Data();
RandomDataGenerator{kRows, 1, 0}.GenerateDense(labels);
}
auto constexpr Iter() const { return 4; }
template <typename Page>
size_t TestTreeMethod(std::string tree_method, std::string obj, bool reset = true) const {
auto learner = std::unique_ptr<Learner>{Learner::Create({p_fmat_})};
learner->SetParam("tree_method", tree_method);
learner->SetParam("objective", obj);
learner->Configure();
for (auto i = 0; i < Iter(); ++i) {
learner->UpdateOneIter(i, p_fmat_);
}
auto for_test = dynamic_cast<DMatrixForTest*>(p_fmat_.get());
CHECK(for_test);
auto backup = for_test->NumRegen();
for_test->GetBatches<Page>(BatchParam{});
CHECK_EQ(for_test->NumRegen(), backup);
if (reset) {
for_test->Reset();
}
return backup;
}
};
} // anonymous namespace
TEST_F(RegenTest, Approx) {
auto n = this->TestTreeMethod<GHistIndexMatrix>("approx", "reg:squarederror");
ASSERT_EQ(n, 1);
n = this->TestTreeMethod<GHistIndexMatrix>("approx", "reg:logistic");
ASSERT_EQ(n, this->Iter());
}
TEST_F(RegenTest, Hist) {
auto n = this->TestTreeMethod<GHistIndexMatrix>("hist", "reg:squarederror");
ASSERT_EQ(n, 1);
n = this->TestTreeMethod<GHistIndexMatrix>("hist", "reg:logistic");
ASSERT_EQ(n, 1);
}
TEST_F(RegenTest, Mixed) {
auto n = this->TestTreeMethod<GHistIndexMatrix>("hist", "reg:squarederror", false);
ASSERT_EQ(n, 1);
n = this->TestTreeMethod<GHistIndexMatrix>("approx", "reg:logistic", true);
ASSERT_EQ(n, this->Iter() + 1);
n = this->TestTreeMethod<GHistIndexMatrix>("approx", "reg:logistic", false);
ASSERT_EQ(n, this->Iter());
n = this->TestTreeMethod<GHistIndexMatrix>("hist", "reg:squarederror", true);
ASSERT_EQ(n, this->Iter() + 1);
}
#if defined(XGBOOST_USE_CUDA)
TEST_F(RegenTest, GpuHist) {
auto n = this->TestTreeMethod<EllpackPage>("gpu_hist", "reg:squarederror");
ASSERT_EQ(n, 1);
n = this->TestTreeMethod<EllpackPage>("gpu_hist", "reg:logistic", false);
ASSERT_EQ(n, 1);
n = this->TestTreeMethod<EllpackPage>("hist", "reg:logistic");
ASSERT_EQ(n, 2);
}
#endif // defined(XGBOOST_USE_CUDA)
} // namespace xgboost