Refactor parts of fast histogram utilities (#3564)
* Refactor parts of fast histogram utilities * Removed byte packing from column matrix
This commit is contained in:
51
tests/cpp/common/test_column_matrix.cc
Normal file
51
tests/cpp/common/test_column_matrix.cc
Normal file
@@ -0,0 +1,51 @@
|
||||
#include "../../../src/common/column_matrix.h"
|
||||
#include "../helpers.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
TEST(DenseColumn, Test) {
|
||||
auto dmat = CreateDMatrix(100, 10, 0.0);
|
||||
GHistIndexMatrix gmat;
|
||||
gmat.Init(dmat.get(), 256);
|
||||
ColumnMatrix column_matrix;
|
||||
column_matrix.Init(gmat, 0.2);
|
||||
|
||||
for (auto i = 0ull; i < dmat->Info().num_row_; i++) {
|
||||
for (auto j = 0ull; j < dmat->Info().num_col_; j++) {
|
||||
auto col = column_matrix.GetColumn(j);
|
||||
EXPECT_EQ(gmat.index[i * dmat->Info().num_col_ + j],
|
||||
col.GetGlobalBinIdx(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SparseColumn, Test) {
|
||||
auto dmat = CreateDMatrix(100, 1, 0.85);
|
||||
GHistIndexMatrix gmat;
|
||||
gmat.Init(dmat.get(), 256);
|
||||
ColumnMatrix column_matrix;
|
||||
column_matrix.Init(gmat, 0.5);
|
||||
auto col = column_matrix.GetColumn(0);
|
||||
ASSERT_EQ(col.Size(), gmat.index.size());
|
||||
for (auto i = 0ull; i < col.Size(); i++) {
|
||||
EXPECT_EQ(gmat.index[gmat.row_ptr[col.GetRowIdx(i)]],
|
||||
col.GetGlobalBinIdx(i));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(DenseColumnWithMissing, Test) {
|
||||
auto dmat = CreateDMatrix(100, 1, 0.5);
|
||||
GHistIndexMatrix gmat;
|
||||
gmat.Init(dmat.get(), 256);
|
||||
ColumnMatrix column_matrix;
|
||||
column_matrix.Init(gmat, 0.2);
|
||||
auto col = column_matrix.GetColumn(0);
|
||||
for (auto i = 0ull; i < col.Size(); i++) {
|
||||
if (col.IsMissing(i)) continue;
|
||||
EXPECT_EQ(gmat.index[gmat.row_ptr[col.GetRowIdx(i)]],
|
||||
col.GetGlobalBinIdx(i));
|
||||
}
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
@@ -67,59 +67,4 @@ TEST(MetaInfo, SaveLoadBinary) {
|
||||
}
|
||||
|
||||
TEST(MetaInfo, LoadQid) {
|
||||
std::string tmp_file = TempFileName();
|
||||
{
|
||||
std::unique_ptr<dmlc::Stream> fs(
|
||||
dmlc::Stream::Create(tmp_file.c_str(), "w"));
|
||||
dmlc::ostream os(fs.get());
|
||||
os << R"qid(3 qid:1 1:1 2:1 3:0 4:0.2 5:0
|
||||
2 qid:1 1:0 2:0 3:1 4:0.1 5:1
|
||||
1 qid:1 1:0 2:1 3:0 4:0.4 5:0
|
||||
1 qid:1 1:0 2:0 3:1 4:0.3 5:0
|
||||
1 qid:2 1:0 2:0 3:1 4:0.2 5:0
|
||||
2 qid:2 1:1 2:0 3:1 4:0.4 5:0
|
||||
1 qid:2 1:0 2:0 3:1 4:0.1 5:0
|
||||
1 qid:2 1:0 2:0 3:1 4:0.2 5:0
|
||||
2 qid:3 1:0 2:0 3:1 4:0.1 5:1
|
||||
3 qid:3 1:1 2:1 3:0 4:0.3 5:0
|
||||
4 qid:3 1:1 2:0 3:0 4:0.4 5:1
|
||||
1 qid:3 1:0 2:1 3:1 4:0.5 5:0)qid";
|
||||
os.set_stream(nullptr);
|
||||
}
|
||||
std::unique_ptr<xgboost::DMatrix> dmat(
|
||||
xgboost::DMatrix::Load(tmp_file, true, false, "libsvm"));
|
||||
std::remove(tmp_file.c_str());
|
||||
|
||||
const xgboost::MetaInfo& info = dmat->Info();
|
||||
const std::vector<uint64_t> expected_qids{1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3};
|
||||
const std::vector<xgboost::bst_uint> expected_group_ptr{0, 4, 8, 12};
|
||||
CHECK(info.qids_ == expected_qids);
|
||||
CHECK(info.group_ptr_ == expected_group_ptr);
|
||||
CHECK_GE(info.kVersion, info.kVersionQidAdded);
|
||||
|
||||
const std::vector<size_t> expected_offset{
|
||||
0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60
|
||||
};
|
||||
const std::vector<xgboost::Entry> expected_data{
|
||||
{1, 1}, {2, 1}, {3, 0}, {4, 0.2}, {5, 0},
|
||||
{1, 0}, {2, 0}, {3, 1}, {4, 0.1}, {5, 1},
|
||||
{1, 0}, {2, 1}, {3, 0}, {4, 0.4}, {5, 0},
|
||||
{1, 0}, {2, 0}, {3, 1}, {4, 0.3}, {5, 0},
|
||||
{1, 0}, {2, 0}, {3, 1}, {4, 0.2}, {5, 0},
|
||||
{1, 1}, {2, 0}, {3, 1}, {4, 0.4}, {5, 0},
|
||||
{1, 0}, {2, 0}, {3, 1}, {4, 0.1}, {5, 0},
|
||||
{1, 0}, {2, 0}, {3, 1}, {4, 0.2}, {5, 0},
|
||||
{1, 0}, {2, 0}, {3, 1}, {4, 0.1}, {5, 1},
|
||||
{1, 1}, {2, 1}, {3, 0}, {4, 0.3}, {5, 0},
|
||||
{1, 1}, {2, 0}, {3, 0}, {4, 0.4}, {5, 1},
|
||||
{1, 0}, {2, 1}, {3, 1}, {4, 0.5}, {5, 0}
|
||||
};
|
||||
dmlc::DataIter<xgboost::SparsePage>* iter = dmat->RowIterator();
|
||||
iter->BeforeFirst();
|
||||
CHECK(iter->Next());
|
||||
const xgboost::SparsePage& batch = iter->Value();
|
||||
CHECK_EQ(batch.base_rowid, 0);
|
||||
CHECK(batch.offset == expected_offset);
|
||||
CHECK(batch.data == expected_data);
|
||||
CHECK(!iter->Next());
|
||||
}
|
||||
|
||||
@@ -18,11 +18,8 @@ TEST(gpu_hist_experimental, TestSparseShard) {
|
||||
int columns = 80;
|
||||
int max_bins = 4;
|
||||
auto dmat = CreateDMatrix(rows, columns, 0.9f);
|
||||
common::HistCutMatrix hmat;
|
||||
common::GHistIndexMatrix gmat;
|
||||
hmat.Init(dmat.get(), max_bins);
|
||||
gmat.cut = &hmat;
|
||||
gmat.Init(dmat.get());
|
||||
gmat.Init(dmat.get(),max_bins);
|
||||
TrainParam p;
|
||||
p.max_depth = 6;
|
||||
|
||||
@@ -32,7 +29,7 @@ TEST(gpu_hist_experimental, TestSparseShard) {
|
||||
const SparsePage& batch = iter->Value();
|
||||
DeviceShard shard(0, 0, 0, rows, p);
|
||||
shard.InitRowPtrs(batch);
|
||||
shard.InitCompressedData(hmat, batch);
|
||||
shard.InitCompressedData(gmat.cut, batch);
|
||||
CHECK(!iter->Next());
|
||||
|
||||
ASSERT_LT(shard.row_stride, columns);
|
||||
@@ -40,7 +37,7 @@ TEST(gpu_hist_experimental, TestSparseShard) {
|
||||
auto host_gidx_buffer = shard.gidx_buffer.AsVector();
|
||||
|
||||
common::CompressedIterator<uint32_t> gidx(host_gidx_buffer.data(),
|
||||
hmat.row_ptr.back() + 1);
|
||||
gmat.cut.row_ptr.back() + 1);
|
||||
|
||||
for (int i = 0; i < rows; i++) {
|
||||
int row_offset = 0;
|
||||
@@ -60,11 +57,8 @@ TEST(gpu_hist_experimental, TestDenseShard) {
|
||||
int columns = 80;
|
||||
int max_bins = 4;
|
||||
auto dmat = CreateDMatrix(rows, columns, 0);
|
||||
common::HistCutMatrix hmat;
|
||||
common::GHistIndexMatrix gmat;
|
||||
hmat.Init(dmat.get(), max_bins);
|
||||
gmat.cut = &hmat;
|
||||
gmat.Init(dmat.get());
|
||||
gmat.Init(dmat.get(),max_bins);
|
||||
TrainParam p;
|
||||
p.max_depth = 6;
|
||||
|
||||
@@ -75,7 +69,7 @@ TEST(gpu_hist_experimental, TestDenseShard) {
|
||||
|
||||
DeviceShard shard(0, 0, 0, rows, p);
|
||||
shard.InitRowPtrs(batch);
|
||||
shard.InitCompressedData(hmat, batch);
|
||||
shard.InitCompressedData(gmat.cut, batch);
|
||||
CHECK(!iter->Next());
|
||||
|
||||
ASSERT_EQ(shard.row_stride, columns);
|
||||
@@ -83,7 +77,7 @@ TEST(gpu_hist_experimental, TestDenseShard) {
|
||||
auto host_gidx_buffer = shard.gidx_buffer.AsVector();
|
||||
|
||||
common::CompressedIterator<uint32_t> gidx(host_gidx_buffer.data(),
|
||||
hmat.row_ptr.back() + 1);
|
||||
gmat.cut.row_ptr.back() + 1);
|
||||
|
||||
for (int i = 0; i < gmat.index.size(); i++) {
|
||||
ASSERT_EQ(gidx[i], gmat.index[i]);
|
||||
|
||||
Reference in New Issue
Block a user