Move GHistIndex into DMatrix. (#7064)

This commit is contained in:
Jiaming Yuan
2021-07-01 00:44:49 +08:00
committed by GitHub
parent 1c8fdf2218
commit 1cd20efe68
17 changed files with 386 additions and 320 deletions

View File

@@ -14,8 +14,7 @@ TEST(DenseColumn, Test) {
static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 2};
for (size_t max_num_bin : max_num_bins) {
auto dmat = RandomDataGenerator(100, 10, 0.0).GenerateDMatrix();
GHistIndexMatrix gmat;
gmat.Init(dmat.get(), max_num_bin);
GHistIndexMatrix gmat(dmat.get(), max_num_bin);
ColumnMatrix column_matrix;
column_matrix.Init(gmat, 0.2);
@@ -62,8 +61,7 @@ TEST(SparseColumn, Test) {
static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 2};
for (size_t max_num_bin : max_num_bins) {
auto dmat = RandomDataGenerator(100, 1, 0.85).GenerateDMatrix();
GHistIndexMatrix gmat;
gmat.Init(dmat.get(), max_num_bin);
GHistIndexMatrix gmat(dmat.get(), max_num_bin);
ColumnMatrix column_matrix;
column_matrix.Init(gmat, 0.5);
switch (column_matrix.GetTypeSize()) {
@@ -103,8 +101,7 @@ TEST(DenseColumnWithMissing, Test) {
static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 2 };
for (size_t max_num_bin : max_num_bins) {
auto dmat = RandomDataGenerator(100, 1, 0.5).GenerateDMatrix();
GHistIndexMatrix gmat;
gmat.Init(dmat.get(), max_num_bin);
GHistIndexMatrix gmat(dmat.get(), max_num_bin);
ColumnMatrix column_matrix;
column_matrix.Init(gmat, 0.2);
switch (column_matrix.GetTypeSize()) {
@@ -135,8 +132,7 @@ void TestGHistIndexMatrixCreation(size_t nthreads) {
/* This should create multiple sparse pages */
std::unique_ptr<DMatrix> dmat{ CreateSparsePageDMatrix(kEntries, kPageSize, filename) };
omp_set_num_threads(nthreads);
GHistIndexMatrix gmat;
gmat.Init(dmat.get(), 256);
GHistIndexMatrix gmat(dmat.get(), 256);
}
TEST(HistIndexCreationWithExternalMemory, Test) {

View File

@@ -4,6 +4,7 @@
#include <utility>
#include "../../../src/common/hist_util.h"
#include "../../../src/data/gradient_index.h"
#include "../helpers.h"
#include "test_hist_util.h"
@@ -255,8 +256,7 @@ TEST(HistUtil, IndexBinBound) {
for (auto max_bin : bin_sizes) {
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
common::GHistIndexMatrix hmat;
hmat.Init(p_fmat.get(), max_bin);
GHistIndexMatrix hmat(p_fmat.get(), max_bin);
EXPECT_EQ(hmat.index.Size(), kRows*kCols);
EXPECT_EQ(expected_bin_type_sizes[bin_id++], hmat.index.GetBinTypeSize());
}
@@ -264,7 +264,7 @@ TEST(HistUtil, IndexBinBound) {
template <typename T>
void CheckIndexData(T* data_ptr, uint32_t* offsets,
const common::GHistIndexMatrix& hmat, size_t n_cols) {
const GHistIndexMatrix& hmat, size_t n_cols) {
for (size_t i = 0; i < hmat.index.Size(); ++i) {
EXPECT_EQ(data_ptr[i] + offsets[i % n_cols], hmat.index[i]);
}
@@ -279,8 +279,7 @@ TEST(HistUtil, IndexBinData) {
for (auto max_bin : kBinSizes) {
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
common::GHistIndexMatrix hmat;
hmat.Init(p_fmat.get(), max_bin);
GHistIndexMatrix hmat(p_fmat.get(), max_bin);
uint32_t* offsets = hmat.index.Offset();
EXPECT_EQ(hmat.index.Size(), kRows*kCols);
switch (max_bin) {

View File

@@ -344,8 +344,7 @@ class QuantileHistMock : public QuantileHistMaker {
auto dmat = RandomDataGenerator(kNRows, kNCols, 0).Seed(3).GenerateDMatrix();
// dense, no missing values
common::GHistIndexMatrix gmat;
gmat.Init(dmat.get(), kMaxBins);
GHistIndexMatrix gmat(dmat.get(), kMaxBins);
RealImpl::InitData(gmat, *dmat, tree, &row_gpairs);
this->hist_.AddHistRow(0);
@@ -434,8 +433,7 @@ class QuantileHistMock : public QuantileHistMaker {
// kNRows samples with kNCols features
auto dmat = RandomDataGenerator(kNRows, kNCols, sparsity).Seed(3).GenerateDMatrix();
common::GHistIndexMatrix gmat;
gmat.Init(dmat.get(), kMaxBins);
GHistIndexMatrix gmat(dmat.get(), kMaxBins);
ColumnMatrix cm;
// treat everything as dense, as this is what we intend to test here
@@ -546,8 +544,7 @@ class QuantileHistMock : public QuantileHistMaker {
void TestInitData() {
size_t constexpr kMaxBins = 4;
common::GHistIndexMatrix gmat;
gmat.Init(dmat_.get(), kMaxBins);
GHistIndexMatrix gmat(dmat_.get(), kMaxBins);
RegTree tree = RegTree();
tree.param.UpdateAllowUnknown(cfg_);
@@ -564,8 +561,7 @@ class QuantileHistMock : public QuantileHistMaker {
void TestInitDataSampling() {
size_t constexpr kMaxBins = 4;
common::GHistIndexMatrix gmat;
gmat.Init(dmat_.get(), kMaxBins);
GHistIndexMatrix gmat(dmat_.get(), kMaxBins);
RegTree tree = RegTree();
tree.param.UpdateAllowUnknown(cfg_);
@@ -582,8 +578,7 @@ class QuantileHistMock : public QuantileHistMaker {
void TestAddHistRows() {
size_t constexpr kMaxBins = 4;
common::GHistIndexMatrix gmat;
gmat.Init(dmat_.get(), kMaxBins);
GHistIndexMatrix gmat(dmat_.get(), kMaxBins);
RegTree tree = RegTree();
tree.param.UpdateAllowUnknown(cfg_);
@@ -599,8 +594,7 @@ class QuantileHistMock : public QuantileHistMaker {
void TestSyncHistograms() {
size_t constexpr kMaxBins = 4;
common::GHistIndexMatrix gmat;
gmat.Init(dmat_.get(), kMaxBins);
GHistIndexMatrix gmat(dmat_.get(), kMaxBins);
RegTree tree = RegTree();
tree.param.UpdateAllowUnknown(cfg_);
@@ -620,8 +614,7 @@ class QuantileHistMock : public QuantileHistMaker {
tree.param.UpdateAllowUnknown(cfg_);
size_t constexpr kMaxBins = 4;
common::GHistIndexMatrix gmat;
gmat.Init(dmat_.get(), kMaxBins);
GHistIndexMatrix gmat(dmat_.get(), kMaxBins);
if (double_builder_) {
double_builder_->TestBuildHist(0, gmat, *dmat_, tree);
} else {