Implement iterative DMatrix for CPU. (#8116)
This commit is contained in:
36
tests/cpp/data/test_iterative_dmatrix.cc
Normal file
36
tests/cpp/data/test_iterative_dmatrix.cc
Normal file
@@ -0,0 +1,36 @@
|
||||
/*!
|
||||
* Copyright 2022 XGBoost contributors
|
||||
*/
|
||||
#include "test_iterative_dmatrix.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "../../../src/data/gradient_index.h"
|
||||
#include "../../../src/data/iterative_dmatrix.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
TEST(IterativeDMatrix, Ref) {
|
||||
TestRefDMatrix<GHistIndexMatrix, NumpyArrayIterForTest>(
|
||||
[&](GHistIndexMatrix const& page) { return page.cut; });
|
||||
}
|
||||
|
||||
TEST(IterativeDMatrix, IsDense) {
|
||||
int n_bins = 16;
|
||||
auto test = [n_bins](float sparsity) {
|
||||
NumpyArrayIterForTest iter(sparsity);
|
||||
IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
|
||||
if (sparsity == 0.0) {
|
||||
ASSERT_TRUE(m.IsDense());
|
||||
} else {
|
||||
ASSERT_FALSE(m.IsDense());
|
||||
}
|
||||
};
|
||||
test(0.0);
|
||||
test(0.1);
|
||||
test(1.0);
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
@@ -3,19 +3,19 @@
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "../helpers.h"
|
||||
#include "../../../src/data/iterative_dmatrix.h"
|
||||
#include "../../../src/data/ellpack_page.cuh"
|
||||
#include "../../../src/data/device_adapter.cuh"
|
||||
#include "../../../src/data/ellpack_page.cuh"
|
||||
#include "../../../src/data/iterative_dmatrix.h"
|
||||
#include "../helpers.h"
|
||||
#include "test_iterative_dmatrix.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
|
||||
void TestEquivalent(float sparsity) {
|
||||
CudaArrayIterForTest iter{sparsity};
|
||||
IterativeDMatrix m(
|
||||
&iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(),
|
||||
0, 256);
|
||||
IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, 256);
|
||||
size_t offset = 0;
|
||||
auto first = (*m.GetEllpackBatches({}).begin()).Impl();
|
||||
std::unique_ptr<EllpackPageImpl> page_concatenated {
|
||||
@@ -88,9 +88,8 @@ TEST(IterativeDeviceDMatrix, Basic) {
|
||||
|
||||
TEST(IterativeDeviceDMatrix, RowMajor) {
|
||||
CudaArrayIterForTest iter(0.0f);
|
||||
IterativeDMatrix m(
|
||||
&iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(),
|
||||
0, 256);
|
||||
IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, 256);
|
||||
size_t n_batches = 0;
|
||||
std::string interface_str = iter.AsArray();
|
||||
for (auto& ellpack : m.GetBatches<EllpackPage>({})) {
|
||||
@@ -139,9 +138,8 @@ TEST(IterativeDeviceDMatrix, RowMajorMissing) {
|
||||
reinterpret_cast<float *>(get<Integer>(j_interface["data"][0])));
|
||||
thrust::copy(h_data.cbegin(), h_data.cend(), ptr);
|
||||
|
||||
IterativeDMatrix m(
|
||||
&iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(),
|
||||
0, 256);
|
||||
IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, 256);
|
||||
auto &ellpack = *m.GetBatches<EllpackPage>({0, 256}).begin();
|
||||
auto impl = ellpack.Impl();
|
||||
common::CompressedIterator<uint32_t> iterator(
|
||||
@@ -157,11 +155,10 @@ TEST(IterativeDeviceDMatrix, RowMajorMissing) {
|
||||
|
||||
TEST(IterativeDeviceDMatrix, IsDense) {
|
||||
int num_bins = 16;
|
||||
auto test = [num_bins] (float sparsity) {
|
||||
auto test = [num_bins](float sparsity) {
|
||||
CudaArrayIterForTest iter(sparsity);
|
||||
IterativeDMatrix m(
|
||||
&iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(),
|
||||
0, 256);
|
||||
IterativeDMatrix m(&iter, iter.Proxy(), nullptr, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, num_bins);
|
||||
if (sparsity == 0.0) {
|
||||
ASSERT_TRUE(m.IsDense());
|
||||
} else {
|
||||
@@ -170,6 +167,12 @@ TEST(IterativeDeviceDMatrix, IsDense) {
|
||||
};
|
||||
test(0.0);
|
||||
test(0.1);
|
||||
test(1.0);
|
||||
}
|
||||
|
||||
TEST(IterativeDeviceDMatrix, Ref) {
|
||||
TestRefDMatrix<EllpackPage, CudaArrayIterForTest>(
|
||||
[](EllpackPage const& page) { return page.Impl()->Cuts(); });
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
59
tests/cpp/data/test_iterative_dmatrix.h
Normal file
59
tests/cpp/data/test_iterative_dmatrix.h
Normal file
@@ -0,0 +1,59 @@
|
||||
/*!
|
||||
* Copyright 2022 XGBoost contributors
|
||||
*/
|
||||
#pragma once
|
||||
#include <memory> // std::make_shared
|
||||
|
||||
#include "../../../src/data/iterative_dmatrix.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace data {
|
||||
template <typename Page, typename Iter, typename Cuts>
|
||||
void TestRefDMatrix(Cuts&& get_cuts) {
|
||||
int n_bins = 256;
|
||||
Iter iter(0.3, 2048);
|
||||
auto m = std::make_shared<IterativeDMatrix>(&iter, iter.Proxy(), nullptr, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
|
||||
|
||||
Iter iter_1(0.8, 32, Iter::Cols(), 13);
|
||||
auto m_1 = std::make_shared<IterativeDMatrix>(&iter_1, iter_1.Proxy(), m, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
|
||||
|
||||
for (auto const& page_0 : m->template GetBatches<Page>({})) {
|
||||
for (auto const& page_1 : m_1->template GetBatches<Page>({})) {
|
||||
auto const& cuts_0 = get_cuts(page_0);
|
||||
auto const& cuts_1 = get_cuts(page_1);
|
||||
ASSERT_EQ(cuts_0.Values(), cuts_1.Values());
|
||||
ASSERT_EQ(cuts_0.Ptrs(), cuts_1.Ptrs());
|
||||
ASSERT_EQ(cuts_0.MinValues(), cuts_1.MinValues());
|
||||
}
|
||||
}
|
||||
|
||||
m_1 = std::make_shared<IterativeDMatrix>(&iter_1, iter_1.Proxy(), nullptr, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
|
||||
for (auto const& page_0 : m->template GetBatches<Page>({})) {
|
||||
for (auto const& page_1 : m_1->template GetBatches<Page>({})) {
|
||||
auto const& cuts_0 = get_cuts(page_0);
|
||||
auto const& cuts_1 = get_cuts(page_1);
|
||||
ASSERT_NE(cuts_0.Values(), cuts_1.Values());
|
||||
ASSERT_NE(cuts_0.Ptrs(), cuts_1.Ptrs());
|
||||
}
|
||||
}
|
||||
|
||||
// Use DMatrix as ref
|
||||
auto dm = RandomDataGenerator(2048, Iter::Cols(), 0.5).GenerateDMatrix(true);
|
||||
auto dqm = std::make_shared<IterativeDMatrix>(&iter_1, iter_1.Proxy(), dm, Reset, Next,
|
||||
std::numeric_limits<float>::quiet_NaN(), 0, n_bins);
|
||||
for (auto const& page_0 : dm->template GetBatches<Page>({})) {
|
||||
for (auto const& page_1 : dqm->template GetBatches<Page>({})) {
|
||||
auto const& cuts_0 = get_cuts(page_0);
|
||||
auto const& cuts_1 = get_cuts(page_1);
|
||||
ASSERT_EQ(cuts_0.Values(), cuts_1.Values());
|
||||
ASSERT_EQ(cuts_0.Ptrs(), cuts_1.Ptrs());
|
||||
ASSERT_EQ(cuts_0.MinValues(), cuts_1.MinValues());
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
Reference in New Issue
Block a user