Prepare gradient index for Quantile DMatrix. (#8103)
* Prepare gradient index for Quantile DMatrix. - Implement push batch with adapter batch. - Implement `GetFvalue` for prediction.
This commit is contained in:
@@ -4,6 +4,7 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h>
|
||||
|
||||
#include "../../../src/common/column_matrix.h"
|
||||
#include "../../../src/data/gradient_index.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
@@ -65,5 +66,46 @@ TEST(GradientIndex, FromCategoricalBasic) {
|
||||
ASSERT_EQ(common::AsCat(x[i]), common::AsCat(bin_value));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(GradientIndex, PushBatch) {
|
||||
size_t constexpr kRows = 64, kCols = 4;
|
||||
bst_bin_t max_bins = 64;
|
||||
float st = 0.5;
|
||||
|
||||
auto test = [&](float sparisty) {
|
||||
auto m = RandomDataGenerator{kRows, kCols, sparisty}.GenerateDMatrix(true);
|
||||
auto cuts = common::SketchOnDMatrix(m.get(), max_bins, common::OmpGetNumThreads(0), false, {});
|
||||
common::HistogramCuts copy_cuts = cuts;
|
||||
|
||||
ASSERT_EQ(m->Info().num_row_, kRows);
|
||||
ASSERT_EQ(m->Info().num_col_, kCols);
|
||||
GHistIndexMatrix gmat{m->Info(), std::move(copy_cuts), max_bins};
|
||||
|
||||
for (auto const &page : m->GetBatches<SparsePage>()) {
|
||||
SparsePageAdapterBatch batch{page.GetView()};
|
||||
gmat.PushAdapterBatch(m->Ctx(), 0, 0, batch, std::numeric_limits<float>::quiet_NaN(), {}, st,
|
||||
m->Info().num_row_);
|
||||
gmat.PushAdapterBatchColumns(m->Ctx(), batch, std::numeric_limits<float>::quiet_NaN(), 0);
|
||||
}
|
||||
for (auto const &page : m->GetBatches<GHistIndexMatrix>(BatchParam{max_bins, st})) {
|
||||
for (size_t i = 0; i < kRows; ++i) {
|
||||
for (size_t j = 0; j < kCols; ++j) {
|
||||
auto v0 = gmat.GetFvalue(i, j, false);
|
||||
auto v1 = page.GetFvalue(i, j, false);
|
||||
if (sparisty == 0.0) {
|
||||
ASSERT_FALSE(std::isnan(v0));
|
||||
}
|
||||
if (!std::isnan(v0)) {
|
||||
ASSERT_EQ(v0, v1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
test(0.0f);
|
||||
test(0.5f);
|
||||
test(0.9f);
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -66,6 +66,14 @@ void TestTrainingPrediction(size_t rows, size_t bins,
|
||||
learner->UpdateOneIter(i, p_hist);
|
||||
}
|
||||
|
||||
Json model{Object{}};
|
||||
learner->SaveModel(&model);
|
||||
|
||||
learner.reset(Learner::Create({}));
|
||||
learner->LoadModel(model);
|
||||
learner->SetParam("predictor", predictor);
|
||||
learner->Configure();
|
||||
|
||||
HostDeviceVector<float> from_full;
|
||||
learner->Predict(p_full, false, &from_full, 0, 0);
|
||||
|
||||
|
||||
@@ -419,9 +419,8 @@ void TestHistogramExternalMemory(BatchParam batch_param, bool is_approx) {
|
||||
|
||||
auto cut = common::SketchOnDMatrix(m.get(), batch_param.max_bin, common::OmpGetNumThreads(0),
|
||||
false, hess);
|
||||
GHistIndexMatrix gmat;
|
||||
gmat.Init(concat, {}, cut, batch_param.max_bin, false, std::numeric_limits<double>::quiet_NaN(),
|
||||
common::OmpGetNumThreads(0));
|
||||
GHistIndexMatrix gmat(concat, {}, cut, batch_param.max_bin, false,
|
||||
std::numeric_limits<double>::quiet_NaN(), common::OmpGetNumThreads(0));
|
||||
single_build.BuildHist(0, gmat, &tree, row_set_collection, nodes, {}, h_gpair);
|
||||
single_page = single_build.Histogram()[0];
|
||||
}
|
||||
|
||||
@@ -34,8 +34,7 @@ TEST(QuantileHist, Partitioner) {
|
||||
auto cuts = common::SketchOnDMatrix(Xy.get(), 64, ctx.Threads());
|
||||
|
||||
for (auto const& page : Xy->GetBatches<SparsePage>()) {
|
||||
GHistIndexMatrix gmat;
|
||||
gmat.Init(page, {}, cuts, 64, true, 0.5, ctx.Threads());
|
||||
GHistIndexMatrix gmat(page, {}, cuts, 64, true, 0.5, ctx.Threads());
|
||||
bst_feature_t const split_ind = 0;
|
||||
common::ColumnMatrix column_indices;
|
||||
column_indices.Init(page, gmat, 0.5, ctx.Threads());
|
||||
|
||||
Reference in New Issue
Block a user