Copy data from Ellpack to GHist. (#8215)

This commit is contained in:
Jiaming Yuan
2022-09-06 23:05:49 +08:00
committed by GitHub
parent 7ee10e3dbd
commit 441ffc017a
16 changed files with 466 additions and 112 deletions

View File

@@ -5,6 +5,7 @@
#include <xgboost/data.h>
#include "../../../src/common/column_matrix.h"
#include "../../../src/common/io.h" // MemoryBufferStream
#include "../../../src/data/gradient_index.h"
#include "../helpers.h"
@@ -107,5 +108,81 @@ TEST(GradientIndex, PushBatch) {
test(0.5f);
test(0.9f);
}
#if defined(XGBOOST_USE_CUDA)
namespace {
class GHistIndexMatrixTest : public testing::TestWithParam<std::tuple<float, float>> {
protected:
void Run(float density, double threshold) {
// Only testing with small sample size as the cuts might be different between host and
// device.
size_t n_samples{128}, n_features{13};
Context ctx;
ctx.gpu_id = 0;
auto Xy = RandomDataGenerator{n_samples, n_features, 1 - density}.GenerateDMatrix(true);
std::unique_ptr<GHistIndexMatrix> from_ellpack;
ASSERT_TRUE(Xy->SingleColBlock());
bst_bin_t constexpr kBins{17};
auto p = BatchParam{kBins, threshold};
for (auto const &page : Xy->GetBatches<EllpackPage>(BatchParam{0, kBins})) {
from_ellpack.reset(new GHistIndexMatrix{&ctx, Xy->Info(), page, p});
}
for (auto const &from_sparse_page : Xy->GetBatches<GHistIndexMatrix>(p)) {
ASSERT_EQ(from_sparse_page.IsDense(), from_ellpack->IsDense());
ASSERT_EQ(from_sparse_page.base_rowid, 0);
ASSERT_EQ(from_sparse_page.base_rowid, from_ellpack->base_rowid);
ASSERT_EQ(from_sparse_page.Size(), from_ellpack->Size());
ASSERT_EQ(from_sparse_page.index.Size(), from_ellpack->index.Size());
auto const &gidx_from_sparse = from_sparse_page.index;
auto const &gidx_from_ellpack = from_ellpack->index;
for (size_t i = 0; i < gidx_from_sparse.Size(); ++i) {
ASSERT_EQ(gidx_from_sparse[i], gidx_from_ellpack[i]);
}
auto const &columns_from_sparse = from_sparse_page.Transpose();
auto const &columns_from_ellpack = from_ellpack->Transpose();
ASSERT_EQ(columns_from_sparse.AnyMissing(), columns_from_ellpack.AnyMissing());
ASSERT_EQ(columns_from_sparse.GetTypeSize(), columns_from_ellpack.GetTypeSize());
ASSERT_EQ(columns_from_sparse.GetNumFeature(), columns_from_ellpack.GetNumFeature());
for (size_t i = 0; i < n_features; ++i) {
ASSERT_EQ(columns_from_sparse.GetColumnType(i), columns_from_ellpack.GetColumnType(i));
}
std::string from_sparse_buf;
{
common::MemoryBufferStream fo{&from_sparse_buf};
columns_from_sparse.Write(&fo);
}
std::string from_ellpack_buf;
{
common::MemoryBufferStream fo{&from_ellpack_buf};
columns_from_sparse.Write(&fo);
}
ASSERT_EQ(from_sparse_buf, from_ellpack_buf);
}
}
};
} // anonymous namespace
TEST_P(GHistIndexMatrixTest, FromEllpack) {
float sparsity;
double thresh;
std::tie(sparsity, thresh) = GetParam();
this->Run(sparsity, thresh);
}
INSTANTIATE_TEST_SUITE_P(GHistIndexMatrix, GHistIndexMatrixTest,
testing::Values(std::make_tuple(1.f, .0), // no missing
std::make_tuple(.2f, .8), // sparse columns
std::make_tuple(.8f, .2), // dense columns
std::make_tuple(1.f, .2), // no missing
std::make_tuple(.5f, .6), // sparse columns
std::make_tuple(.6f, .4))); // dense columns
#endif // defined(XGBOOST_USE_CUDA)
} // namespace data
} // namespace xgboost