Copy data from Ellpack to GHist. (#8215)
This commit is contained in:
@@ -23,7 +23,7 @@ TEST(DenseColumn, Test) {
|
||||
common::OmpGetNumThreads(0)};
|
||||
ColumnMatrix column_matrix;
|
||||
for (auto const& page : dmat->GetBatches<SparsePage>()) {
|
||||
column_matrix.Init(page, gmat, sparse_thresh, common::OmpGetNumThreads(0));
|
||||
column_matrix.InitFromSparse(page, gmat, sparse_thresh, common::OmpGetNumThreads(0));
|
||||
}
|
||||
ASSERT_GE(column_matrix.GetTypeSize(), last);
|
||||
ASSERT_LE(column_matrix.GetTypeSize(), kUint32BinsTypeSize);
|
||||
@@ -69,7 +69,7 @@ TEST(SparseColumn, Test) {
|
||||
GHistIndexMatrix gmat{dmat.get(), max_num_bin, 0.5f, false, common::OmpGetNumThreads(0)};
|
||||
ColumnMatrix column_matrix;
|
||||
for (auto const& page : dmat->GetBatches<SparsePage>()) {
|
||||
column_matrix.Init(page, gmat, 1.0, common::OmpGetNumThreads(0));
|
||||
column_matrix.InitFromSparse(page, gmat, 1.0, common::OmpGetNumThreads(0));
|
||||
}
|
||||
common::DispatchBinType(column_matrix.GetTypeSize(), [&](auto dtype) {
|
||||
using T = decltype(dtype);
|
||||
@@ -97,7 +97,7 @@ TEST(DenseColumnWithMissing, Test) {
|
||||
GHistIndexMatrix gmat(dmat.get(), max_num_bin, 0.2, false, common::OmpGetNumThreads(0));
|
||||
ColumnMatrix column_matrix;
|
||||
for (auto const& page : dmat->GetBatches<SparsePage>()) {
|
||||
column_matrix.Init(page, gmat, 0.2, common::OmpGetNumThreads(0));
|
||||
column_matrix.InitFromSparse(page, gmat, 0.2, common::OmpGetNumThreads(0));
|
||||
}
|
||||
ASSERT_TRUE(column_matrix.AnyMissing());
|
||||
DispatchBinType(column_matrix.GetTypeSize(), [&](auto dtype) {
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#include <xgboost/data.h>
|
||||
|
||||
#include "../../../src/common/column_matrix.h"
|
||||
#include "../../../src/common/io.h" // MemoryBufferStream
|
||||
#include "../../../src/data/gradient_index.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
@@ -107,5 +108,81 @@ TEST(GradientIndex, PushBatch) {
|
||||
test(0.5f);
|
||||
test(0.9f);
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
|
||||
namespace {
|
||||
class GHistIndexMatrixTest : public testing::TestWithParam<std::tuple<float, float>> {
|
||||
protected:
|
||||
void Run(float density, double threshold) {
|
||||
// Only testing with small sample size as the cuts might be different between host and
|
||||
// device.
|
||||
size_t n_samples{128}, n_features{13};
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto Xy = RandomDataGenerator{n_samples, n_features, 1 - density}.GenerateDMatrix(true);
|
||||
std::unique_ptr<GHistIndexMatrix> from_ellpack;
|
||||
ASSERT_TRUE(Xy->SingleColBlock());
|
||||
bst_bin_t constexpr kBins{17};
|
||||
auto p = BatchParam{kBins, threshold};
|
||||
for (auto const &page : Xy->GetBatches<EllpackPage>(BatchParam{0, kBins})) {
|
||||
from_ellpack.reset(new GHistIndexMatrix{&ctx, Xy->Info(), page, p});
|
||||
}
|
||||
|
||||
for (auto const &from_sparse_page : Xy->GetBatches<GHistIndexMatrix>(p)) {
|
||||
ASSERT_EQ(from_sparse_page.IsDense(), from_ellpack->IsDense());
|
||||
ASSERT_EQ(from_sparse_page.base_rowid, 0);
|
||||
ASSERT_EQ(from_sparse_page.base_rowid, from_ellpack->base_rowid);
|
||||
ASSERT_EQ(from_sparse_page.Size(), from_ellpack->Size());
|
||||
ASSERT_EQ(from_sparse_page.index.Size(), from_ellpack->index.Size());
|
||||
|
||||
auto const &gidx_from_sparse = from_sparse_page.index;
|
||||
auto const &gidx_from_ellpack = from_ellpack->index;
|
||||
|
||||
for (size_t i = 0; i < gidx_from_sparse.Size(); ++i) {
|
||||
ASSERT_EQ(gidx_from_sparse[i], gidx_from_ellpack[i]);
|
||||
}
|
||||
|
||||
auto const &columns_from_sparse = from_sparse_page.Transpose();
|
||||
auto const &columns_from_ellpack = from_ellpack->Transpose();
|
||||
ASSERT_EQ(columns_from_sparse.AnyMissing(), columns_from_ellpack.AnyMissing());
|
||||
ASSERT_EQ(columns_from_sparse.GetTypeSize(), columns_from_ellpack.GetTypeSize());
|
||||
ASSERT_EQ(columns_from_sparse.GetNumFeature(), columns_from_ellpack.GetNumFeature());
|
||||
for (size_t i = 0; i < n_features; ++i) {
|
||||
ASSERT_EQ(columns_from_sparse.GetColumnType(i), columns_from_ellpack.GetColumnType(i));
|
||||
}
|
||||
|
||||
std::string from_sparse_buf;
|
||||
{
|
||||
common::MemoryBufferStream fo{&from_sparse_buf};
|
||||
columns_from_sparse.Write(&fo);
|
||||
}
|
||||
std::string from_ellpack_buf;
|
||||
{
|
||||
common::MemoryBufferStream fo{&from_ellpack_buf};
|
||||
columns_from_sparse.Write(&fo);
|
||||
}
|
||||
ASSERT_EQ(from_sparse_buf, from_ellpack_buf);
|
||||
}
|
||||
}
|
||||
};
|
||||
} // anonymous namespace
|
||||
|
||||
TEST_P(GHistIndexMatrixTest, FromEllpack) {
|
||||
float sparsity;
|
||||
double thresh;
|
||||
std::tie(sparsity, thresh) = GetParam();
|
||||
this->Run(sparsity, thresh);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(GHistIndexMatrix, GHistIndexMatrixTest,
|
||||
testing::Values(std::make_tuple(1.f, .0), // no missing
|
||||
std::make_tuple(.2f, .8), // sparse columns
|
||||
std::make_tuple(.8f, .2), // dense columns
|
||||
std::make_tuple(1.f, .2), // no missing
|
||||
std::make_tuple(.5f, .6), // sparse columns
|
||||
std::make_tuple(.6f, .4))); // dense columns
|
||||
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
} // namespace data
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -37,7 +37,7 @@ TEST(QuantileHist, Partitioner) {
|
||||
GHistIndexMatrix gmat(page, {}, cuts, 64, true, 0.5, ctx.Threads());
|
||||
bst_feature_t const split_ind = 0;
|
||||
common::ColumnMatrix column_indices;
|
||||
column_indices.Init(page, gmat, 0.5, ctx.Threads());
|
||||
column_indices.InitFromSparse(page, gmat, 0.5, ctx.Threads());
|
||||
{
|
||||
auto min_value = gmat.cut.MinValues()[split_ind];
|
||||
RegTree tree;
|
||||
|
||||
Reference in New Issue
Block a user