Prepare gradient index for Quantile DMatrix. (#8103)

* Prepare gradient index for Quantile DMatrix.

- Implement push batch with adapter batch.
- Implement `GetFvalue` for prediction.
This commit is contained in:
Jiaming Yuan
2022-07-22 17:26:33 +08:00
committed by GitHub
parent 1be09848a7
commit 4a4e5c7c18
7 changed files with 254 additions and 70 deletions

View File

@@ -4,6 +4,7 @@
#include <gtest/gtest.h>
#include <xgboost/data.h>
#include "../../../src/common/column_matrix.h"
#include "../../../src/data/gradient_index.h"
#include "../helpers.h"
@@ -65,5 +66,46 @@ TEST(GradientIndex, FromCategoricalBasic) {
ASSERT_EQ(common::AsCat(x[i]), common::AsCat(bin_value));
}
}
TEST(GradientIndex, PushBatch) {
size_t constexpr kRows = 64, kCols = 4;
bst_bin_t max_bins = 64;
float st = 0.5;
auto test = [&](float sparisty) {
auto m = RandomDataGenerator{kRows, kCols, sparisty}.GenerateDMatrix(true);
auto cuts = common::SketchOnDMatrix(m.get(), max_bins, common::OmpGetNumThreads(0), false, {});
common::HistogramCuts copy_cuts = cuts;
ASSERT_EQ(m->Info().num_row_, kRows);
ASSERT_EQ(m->Info().num_col_, kCols);
GHistIndexMatrix gmat{m->Info(), std::move(copy_cuts), max_bins};
for (auto const &page : m->GetBatches<SparsePage>()) {
SparsePageAdapterBatch batch{page.GetView()};
gmat.PushAdapterBatch(m->Ctx(), 0, 0, batch, std::numeric_limits<float>::quiet_NaN(), {}, st,
m->Info().num_row_);
gmat.PushAdapterBatchColumns(m->Ctx(), batch, std::numeric_limits<float>::quiet_NaN(), 0);
}
for (auto const &page : m->GetBatches<GHistIndexMatrix>(BatchParam{max_bins, st})) {
for (size_t i = 0; i < kRows; ++i) {
for (size_t j = 0; j < kCols; ++j) {
auto v0 = gmat.GetFvalue(i, j, false);
auto v1 = page.GetFvalue(i, j, false);
if (sparisty == 0.0) {
ASSERT_FALSE(std::isnan(v0));
}
if (!std::isnan(v0)) {
ASSERT_EQ(v0, v1);
}
}
}
}
};
test(0.0f);
test(0.5f);
test(0.9f);
}
} // namespace data
} // namespace xgboost

View File

@@ -66,6 +66,14 @@ void TestTrainingPrediction(size_t rows, size_t bins,
learner->UpdateOneIter(i, p_hist);
}
Json model{Object{}};
learner->SaveModel(&model);
learner.reset(Learner::Create({}));
learner->LoadModel(model);
learner->SetParam("predictor", predictor);
learner->Configure();
HostDeviceVector<float> from_full;
learner->Predict(p_full, false, &from_full, 0, 0);

View File

@@ -419,9 +419,8 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx) {
auto cut = common::SketchOnDMatrix(m.get(), batch_param.max_bin, common::OmpGetNumThreads(0),
false, hess);
GHistIndexMatrix gmat;
gmat.Init(concat, {}, cut, batch_param.max_bin, false, std::numeric_limits<double>::quiet_NaN(),
common::OmpGetNumThreads(0));
GHistIndexMatrix gmat(concat, {}, cut, batch_param.max_bin, false,
std::numeric_limits<double>::quiet_NaN(), common::OmpGetNumThreads(0));
single_build.BuildHist(0, gmat, &tree, row_set_collection, nodes, {}, h_gpair);
single_page = single_build.Histogram()[0];
}

View File

@@ -34,8 +34,7 @@ TEST(QuantileHist, Partitioner) {
auto cuts = common::SketchOnDMatrix(Xy.get(), 64, ctx.Threads());
for (auto const& page : Xy->GetBatches<SparsePage>()) {
GHistIndexMatrix gmat;
gmat.Init(page, {}, cuts, 64, true, 0.5, ctx.Threads());
GHistIndexMatrix gmat(page, {}, cuts, 64, true, 0.5, ctx.Threads());
bst_feature_t const split_ind = 0;
common::ColumnMatrix column_indices;
column_indices.Init(page, gmat, 0.5, ctx.Threads());