Implement iterative DMatrix for CPU. (#8116)
This commit is contained in:
36
tests/cpp/data/test_iterative_dmatrix.cc
Normal file
36
tests/cpp/data/test_iterative_dmatrix.cc
Normal file
@@ -0,0 +1,36 @@
|
||||
/*!
|
||||
* Copyright 2022 XGBoost contributors
|
||||
*/
|
||||
#include "test_iterative_dmatrix.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "../../../src/data/gradient_index.h"
|
||||
#include "../../../src/data/iterative_dmatrix.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
TEST(IterativeDMatrix, Ref) {
|
||||
TestRefDMatrix<GHistIndexMatrix, NumpyArrayIterForTest>(
|
||||
[&](GHistIndexMatrix const& page) { return page.cut; });
|
||||
}
|
||||
|
||||
TEST(IterativeDMatrix, IsDense) {
|
||||
int n_bins = 16;
|
||||
auto test = [n_bins](float sparsity) {
|
||||
NumpyArrayIterForTest iter(sparsity);
|
||||
IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
|
||||
if (sparsity == 0.0) {
|
||||
ASSERT_TRUE(m.IsDense());
|
||||
} else {
|
||||
ASSERT_FALSE(m.IsDense());
|
||||
}
|
||||
};
|
||||
test(0.0);
|
||||
test(0.1);
|
||||
test(1.0);
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
@@ -3,19 +3,19 @@
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "../helpers.h"
|
||||
#include "../../../src/data/iterative_dmatrix.h"
|
||||
#include "../../../src/data/ellpack_page.cuh"
|
||||
#include "../../../src/data/device_adapter.cuh"
|
||||
#include "../../../src/data/ellpack_page.cuh"
|
||||
#include "../../../src/data/iterative_dmatrix.h"
|
||||
#include "../helpers.h"
|
||||
#include "test_iterative_dmatrix.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
|
||||
void TestEquivalent(float sparsity) {
|
||||
CudaArrayIterForTest iter{sparsity};
|
||||
IterativeDMatrix m(
|
||||
&iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(),
|
||||
0, 256);
|
||||
IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, 256);
|
||||
size_t offset = 0;
|
||||
auto first = (*m.GetEllpackBatches({}).begin()).Impl();
|
||||
std::unique_ptr<EllpackPageImpl> page_concatenated {
|
||||
@@ -88,9 +88,8 @@ TEST(IterativeDeviceDMatrix, Basic) {
|
||||
|
||||
TEST(IterativeDeviceDMatrix, RowMajor) {
|
||||
CudaArrayIterForTest iter(0.0f);
|
||||
IterativeDMatrix m(
|
||||
&iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(),
|
||||
0, 256);
|
||||
IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, 256);
|
||||
size_t n_batches = 0;
|
||||
std::string interface_str = iter.AsArray();
|
||||
for (auto& ellpack : m.GetBatches<EllpackPage>({})) {
|
||||
@@ -139,9 +138,8 @@ TEST(IterativeDeviceDMatrix, RowMajorMissing) {
|
||||
reinterpret_cast<float *>(get<Integer>(j_interface["data"][0])));
|
||||
thrust::copy(h_data.cbegin(), h_data.cend(), ptr);
|
||||
|
||||
IterativeDMatrix m(
|
||||
&iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(),
|
||||
0, 256);
|
||||
IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, 256);
|
||||
auto &ellpack = *m.GetBatches<EllpackPage>({0, 256}).begin();
|
||||
auto impl = ellpack.Impl();
|
||||
common::CompressedIterator<uint32_t> iterator(
|
||||
@@ -157,11 +155,10 @@ TEST(IterativeDeviceDMatrix, RowMajorMissing) {
|
||||
|
||||
TEST(IterativeDeviceDMatrix, IsDense) {
|
||||
int num_bins = 16;
|
||||
auto test = [num_bins] (float sparsity) {
|
||||
auto test = [num_bins](float sparsity) {
|
||||
CudaArrayIterForTest iter(sparsity);
|
||||
IterativeDMatrix m(
|
||||
&iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(),
|
||||
0, 256);
|
||||
IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, num_bins);
|
||||
if (sparsity == 0.0) {
|
||||
ASSERT_TRUE(m.IsDense());
|
||||
} else {
|
||||
@@ -170,6 +167,12 @@ TEST(IterativeDeviceDMatrix, IsDense) {
|
||||
};
|
||||
test(0.0);
|
||||
test(0.1);
|
||||
test(1.0);
|
||||
}
|
||||
|
||||
TEST(IterativeDeviceDMatrix, Ref) {
|
||||
TestRefDMatrix<EllpackPage, CudaArrayIterForTest>(
|
||||
[](EllpackPage const& page) { return page.Impl()->Cuts(); });
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
59
tests/cpp/data/test_iterative_dmatrix.h
Normal file
59
tests/cpp/data/test_iterative_dmatrix.h
Normal file
@@ -0,0 +1,59 @@
|
||||
/*!
|
||||
* Copyright 2022 XGBoost contributors
|
||||
*/
|
||||
#pragma once
|
||||
#include <memory> // std::make_shared
|
||||
|
||||
#include "../../../src/data/iterative_dmatrix.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
template <typename Page, typename Iter, typename Cuts>
|
||||
void TestRefDMatrix(Cuts&& get_cuts) {
|
||||
int n_bins = 256;
|
||||
Iter iter(0.3, 2048);
|
||||
auto m = std::make_shared<IterativeDMatrix>(&iter, iter.Proxy(), nullptr, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
|
||||
|
||||
Iter iter_1(0.8, 32, Iter::Cols(), 13);
|
||||
auto m_1 = std::make_shared<IterativeDMatrix>(&iter_1, iter_1.Proxy(), m, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
|
||||
|
||||
for (auto const& page_0 : m->template GetBatches<Page>({})) {
|
||||
for (auto const& page_1 : m_1->template GetBatches<Page>({})) {
|
||||
auto const& cuts_0 = get_cuts(page_0);
|
||||
auto const& cuts_1 = get_cuts(page_1);
|
||||
ASSERT_EQ(cuts_0.Values(), cuts_1.Values());
|
||||
ASSERT_EQ(cuts_0.Ptrs(), cuts_1.Ptrs());
|
||||
ASSERT_EQ(cuts_0.MinValues(), cuts_1.MinValues());
|
||||
}
|
||||
}
|
||||
|
||||
m_1 = std::make_shared<IterativeDMatrix>(&iter_1, iter_1.Proxy(), nullptr, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
|
||||
for (auto const& page_0 : m->template GetBatches<Page>({})) {
|
||||
for (auto const& page_1 : m_1->template GetBatches<Page>({})) {
|
||||
auto const& cuts_0 = get_cuts(page_0);
|
||||
auto const& cuts_1 = get_cuts(page_1);
|
||||
ASSERT_NE(cuts_0.Values(), cuts_1.Values());
|
||||
ASSERT_NE(cuts_0.Ptrs(), cuts_1.Ptrs());
|
||||
}
|
||||
}
|
||||
|
||||
// Use DMatrix as ref
|
||||
auto dm = RandomDataGenerator(2048, Iter::Cols(), 0.5).GenerateDMatrix(true);
|
||||
auto dqm = std::make_shared<IterativeDMatrix>(&iter_1, iter_1.Proxy(), dm, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
|
||||
for (auto const& page_0 : dm->template GetBatches<Page>({})) {
|
||||
for (auto const& page_1 : dqm->template GetBatches<Page>({})) {
|
||||
auto const& cuts_0 = get_cuts(page_0);
|
||||
auto const& cuts_1 = get_cuts(page_1);
|
||||
ASSERT_EQ(cuts_0.Values(), cuts_1.Values());
|
||||
ASSERT_EQ(cuts_0.Ptrs(), cuts_1.Ptrs());
|
||||
ASSERT_EQ(cuts_0.MinValues(), cuts_1.MinValues());
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
@@ -384,7 +384,7 @@ RandomDataGenerator::GenerateDMatrix(bool with_label, bool float_label,
|
||||
std::shared_ptr<DMatrix> RandomDataGenerator::GenerateQuantileDMatrix() {
|
||||
NumpyArrayIterForTest iter{this->sparsity_, this->rows_, this->cols_, 1};
|
||||
auto m = std::make_shared<data::IterativeDMatrix>(
|
||||
&iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(), 0, bins_);
|
||||
&iter, iter.Proxy(), nullptr, Reset, Next, std::numeric_limits<float>::quiet_NaN(), 0, bins_);
|
||||
return m;
|
||||
}
|
||||
|
||||
@@ -569,7 +569,7 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(
|
||||
auto& h_gpair = gpair.HostVector();
|
||||
h_gpair.resize(kRows);
|
||||
for (size_t i = 0; i < kRows; ++i) {
|
||||
h_gpair[i] = {static_cast<float>(i), 1};
|
||||
h_gpair[i] = GradientPair{static_cast<float>(i), 1};
|
||||
}
|
||||
|
||||
PredictionCacheEntry predts;
|
||||
|
||||
@@ -27,7 +27,7 @@ int CudaArrayIterForTest::Next() {
|
||||
std::shared_ptr<DMatrix> RandomDataGenerator::GenerateDeviceDMatrix() {
|
||||
CudaArrayIterForTest iter{this->sparsity_, this->rows_, this->cols_, 1};
|
||||
auto m = std::make_shared<data::IterativeDMatrix>(
|
||||
&iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(), 0, bins_);
|
||||
&iter, iter.Proxy(), nullptr, Reset, Next, std::numeric_limits<float>::quiet_NaN(), 0, bins_);
|
||||
return m;
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -245,6 +245,17 @@ void TestUpdatePredictionCache(bool use_subsampling) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(CPUPredictor, GHistIndex) {
|
||||
size_t constexpr kRows{128}, kCols{16}, kBins{64};
|
||||
auto p_hist = RandomDataGenerator{kRows, kCols, 0.0}.Bins(kBins).GenerateQuantileDMatrix();
|
||||
HostDeviceVector<float> storage(kRows * kCols);
|
||||
auto columnar = RandomDataGenerator{kRows, kCols, 0.0}.GenerateArrayInterface(&storage);
|
||||
auto adapter = data::ArrayAdapter(columnar.c_str());
|
||||
std::shared_ptr<DMatrix> p_full{
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
|
||||
TestTrainingPrediction(kRows, kBins, "hist", p_full, p_hist);
|
||||
}
|
||||
|
||||
TEST(CPUPredictor, CategoricalPrediction) {
|
||||
TestCategoricalPrediction("cpu_predictor");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user