[EM] Allow staging ellpack on host for GPU external memory. (#10488)

- New parameter `on_host`.
- Abstract format creation and stream creation into policy classes.
This commit is contained in:
Jiaming Yuan
2024-06-28 04:42:18 +08:00
committed by GitHub
parent 824fba783e
commit e8a962575a
36 changed files with 842 additions and 317 deletions

View File

@@ -200,7 +200,7 @@ TEST(EllpackPage, Compact) {
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
// Create an empty result page.
EllpackPageImpl result(FstCU(), page->CutsShared(), page->is_dense, page->row_stride,
EllpackPageImpl result(ctx.Device(), page->CutsShared(), page->is_dense, page->row_stride,
kCompactedRows);
// Compact batch pages into the result page.
@@ -210,7 +210,7 @@ TEST(EllpackPage, Compact) {
thrust::device_vector<size_t> row_indexes_d = row_indexes_h;
common::Span<size_t> row_indexes_span(row_indexes_d.data().get(), kRows);
for (auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
result.Compact(FstCU(), batch.Impl(), row_indexes_span);
result.Compact(&ctx, batch.Impl(), row_indexes_span);
}
size_t current_row = 0;

View File

@@ -4,15 +4,19 @@
#include <gtest/gtest.h>
#include <xgboost/data.h>
#include "../../../src/common/io.h" // for PrivateMmapConstStream, AlignedResourceReadStream...
#include "../../../src/data/ellpack_page.cuh"
#include "../../../src/data/ellpack_page.cuh" // for EllpackPage
#include "../../../src/data/ellpack_page_raw_format.h" // for EllpackPageRawFormat
#include "../../../src/tree/param.h" // TrainParam
#include "../../../src/data/ellpack_page_source.h" // for EllpackFormatStreamPolicy
#include "../../../src/tree/param.h" // for TrainParam
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
namespace xgboost::data {
TEST(EllpackPageRawFormat, IO) {
namespace {
template <typename FormatStreamPolicy>
void TestEllpackPageRawFormat() {
FormatStreamPolicy policy;
Context ctx{MakeCUDACtx(0)};
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
@@ -21,24 +25,26 @@ TEST(EllpackPageRawFormat, IO) {
std::string path = tmpdir.path + "/ellpack.page";
std::shared_ptr<common::HistogramCuts const> cuts;
for (auto const& page : m->GetBatches<EllpackPage>(&ctx, param)) {
for (auto const &page : m->GetBatches<EllpackPage>(&ctx, param)) {
cuts = page.Impl()->CutsShared();
}
cuts->SetDevice(ctx.Device());
auto format = std::make_unique<EllpackPageRawFormat>(cuts);
ASSERT_EQ(cuts->cut_values_.Device(), ctx.Device());
ASSERT_TRUE(cuts->cut_values_.DeviceCanRead());
policy.SetCuts(cuts, ctx.Device());
std::unique_ptr<EllpackPageRawFormat> format{policy.CreatePageFormat()};
std::size_t n_bytes{0};
{
auto fo = std::make_unique<common::AlignedFileWriteStream>(StringView{path}, "wb");
auto fo = policy.CreateWriter(StringView{path}, 0);
for (auto const &ellpack : m->GetBatches<EllpackPage>(&ctx, param)) {
n_bytes += format->Write(ellpack, fo.get());
}
}
EllpackPage page;
std::unique_ptr<common::AlignedResourceReadStream> fi{
std::make_unique<common::PrivateMmapConstStream>(path.c_str(), 0, n_bytes)};
auto fi = policy.CreateReader(StringView{path}, static_cast<bst_idx_t>(0), n_bytes);
ASSERT_TRUE(format->Read(&page, fi.get()));
for (auto const &ellpack : m->GetBatches<EllpackPage>(&ctx, param)) {
@@ -52,4 +58,13 @@ TEST(EllpackPageRawFormat, IO) {
ASSERT_EQ(loaded->gidx_buffer.HostVector(), orig->gidx_buffer.HostVector());
}
}
} // anonymous namespace
TEST(EllpackPageRawFormat, DiskIO) {
TestEllpackPageRawFormat<DefaultFormatStreamPolicy<EllpackPage, EllpackFormatPolicy>>();
}
TEST(EllpackPageRawFormat, HostIO) {
TestEllpackPageRawFormat<EllpackFormatStreamPolicy<EllpackPage, EllpackFormatPolicy>>();
}
} // namespace xgboost::data

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019-2023 by XGBoost Contributors
* Copyright 2019-2024, XGBoost Contributors
*/
#include <xgboost/data.h> // for DMatrix
@@ -29,14 +29,10 @@ TEST(SparsePageDMatrix, EllpackPage) {
EXPECT_EQ(n, dmat->Info().num_row_);
auto path =
data::MakeId(tmp_file + ".cache",
dynamic_cast<data::SparsePageDMatrix *>(dmat)) +
".row.page";
data::MakeId(tmp_file + ".cache", dynamic_cast<data::SparsePageDMatrix*>(dmat)) + ".row.page";
EXPECT_TRUE(FileExists(path));
path =
data::MakeId(tmp_file + ".cache",
dynamic_cast<data::SparsePageDMatrix *>(dmat)) +
".ellpack.page";
path = data::MakeId(tmp_file + ".cache", dynamic_cast<data::SparsePageDMatrix*>(dmat)) +
".ellpack.page";
EXPECT_TRUE(FileExists(path));
delete dmat;
@@ -82,8 +78,8 @@ TEST(SparsePageDMatrix, MultipleEllpackPages) {
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(kEntries, filename);
// Loop over the batches and count the records
int64_t batch_count = 0;
int64_t row_count = 0;
std::int64_t batch_count = 0;
bst_idx_t row_count = 0;
for (const auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
EXPECT_LT(batch.Size(), dmat->Info().num_row_);
batch_count++;
@@ -138,50 +134,85 @@ TEST(SparsePageDMatrix, RetainEllpackPage) {
}
}
TEST(SparsePageDMatrix, EllpackPageContent) {
auto ctx = MakeCUDACtx(0);
constexpr size_t kRows = 6;
constexpr size_t kCols = 2;
constexpr size_t kPageSize = 1;
namespace {
// Test comparing external DMatrix with in-core DMatrix
class TestEllpackPageExt : public ::testing::TestWithParam<std::tuple<bool, bool>> {
protected:
void Run(bool on_host, bool is_dense) {
float sparsity = is_dense ? 0.0 : 0.2;
// Create an in-memory DMatrix.
std::unique_ptr<DMatrix> dmat(CreateSparsePageDMatrixWithRC(kRows, kCols, 0, true));
auto ctx = MakeCUDACtx(0);
constexpr bst_idx_t kRows = 64;
constexpr size_t kCols = 2;
// Create a DMatrix with multiple batches.
dmlc::TemporaryDirectory tmpdir;
std::unique_ptr<DMatrix>
dmat_ext(CreateSparsePageDMatrixWithRC(kRows, kCols, kPageSize, true, tmpdir));
// Create an in-memory DMatrix.
auto p_fmat = RandomDataGenerator{kRows, kCols, sparsity}.GenerateDMatrix(true);
auto param = BatchParam{2, tree::TrainParam::DftSparseThreshold()};
auto impl = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
EXPECT_EQ(impl->base_rowid, 0);
EXPECT_EQ(impl->n_rows, kRows);
EXPECT_FALSE(impl->is_dense);
EXPECT_EQ(impl->row_stride, 2);
EXPECT_EQ(impl->Cuts().TotalBins(), 4);
// Create a DMatrix with multiple batches.
dmlc::TemporaryDirectory tmpdir;
auto prefix = tmpdir.path + "/cache";
std::unique_ptr<EllpackPageImpl> impl_ext;
size_t offset = 0;
for (auto& batch : dmat_ext->GetBatches<EllpackPage>(&ctx, param)) {
if (!impl_ext) {
impl_ext = std::make_unique<EllpackPageImpl>(
batch.Impl()->gidx_buffer.Device(), batch.Impl()->CutsShared(), batch.Impl()->is_dense,
batch.Impl()->row_stride, kRows);
auto p_ext_fmat = RandomDataGenerator{kRows, kCols, sparsity}
.Batches(4)
.OnHost(on_host)
.GenerateSparsePageDMatrix(prefix, true);
auto param = BatchParam{2, tree::TrainParam::DftSparseThreshold()};
auto impl = (*p_fmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
ASSERT_EQ(impl->base_rowid, 0);
ASSERT_EQ(impl->n_rows, kRows);
ASSERT_EQ(impl->is_dense, is_dense);
ASSERT_EQ(impl->row_stride, 2);
ASSERT_EQ(impl->Cuts().TotalBins(), 4);
std::unique_ptr<EllpackPageImpl> impl_ext;
size_t offset = 0;
for (auto& batch : p_ext_fmat->GetBatches<EllpackPage>(&ctx, param)) {
if (!impl_ext) {
impl_ext = std::make_unique<EllpackPageImpl>(
batch.Impl()->gidx_buffer.Device(), batch.Impl()->CutsShared(), batch.Impl()->is_dense,
batch.Impl()->row_stride, kRows);
}
auto n_elems = impl_ext->Copy(ctx.Device(), batch.Impl(), offset);
offset += n_elems;
}
auto n_elems = impl_ext->Copy(ctx.Device(), batch.Impl(), offset);
offset += n_elems;
}
EXPECT_EQ(impl_ext->base_rowid, 0);
EXPECT_EQ(impl_ext->n_rows, kRows);
EXPECT_FALSE(impl_ext->is_dense);
EXPECT_EQ(impl_ext->row_stride, 2);
EXPECT_EQ(impl_ext->Cuts().TotalBins(), 4);
ASSERT_EQ(impl_ext->base_rowid, 0);
ASSERT_EQ(impl_ext->n_rows, kRows);
ASSERT_EQ(impl_ext->is_dense, is_dense);
ASSERT_EQ(impl_ext->row_stride, 2);
ASSERT_EQ(impl_ext->Cuts().TotalBins(), 4);
std::vector<common::CompressedByteT> buffer(impl->gidx_buffer.HostVector());
std::vector<common::CompressedByteT> buffer_ext(impl_ext->gidx_buffer.HostVector());
EXPECT_EQ(buffer, buffer_ext);
std::vector<common::CompressedByteT> buffer(impl->gidx_buffer.HostVector());
std::vector<common::CompressedByteT> buffer_ext(impl_ext->gidx_buffer.HostVector());
ASSERT_EQ(buffer, buffer_ext);
}
};
} // anonymous namespace
TEST_P(TestEllpackPageExt, Data) {
auto [on_host, is_dense] = this->GetParam();
this->Run(on_host, is_dense);
}
INSTANTIATE_TEST_SUITE_P(EllpackPageExt, TestEllpackPageExt, ::testing::ValuesIn([]() {
std::vector<std::tuple<bool, bool>> values;
for (auto on_host : {true, false}) {
for (auto is_dense : {true, false}) {
values.emplace_back(on_host, is_dense);
}
}
return values;
}()),
[](::testing::TestParamInfo<TestEllpackPageExt::ParamType> const& info) {
auto on_host = std::get<0>(info.param);
auto is_dense = std::get<1>(info.param);
std::stringstream ss;
ss << (on_host ? "host" : "ext");
ss << "_";
ss << (is_dense ? "dense" : "sparse");
return ss.str();
});
struct ReadRowFunction {
EllpackDeviceAccessor matrix;
int row;

View File

@@ -437,9 +437,9 @@ void RandomDataGenerator::GenerateCSR(
#endif // defined(XGBOOST_USE_CUDA)
}
std::unique_ptr<DMatrix> dmat{
DMatrix::Create(static_cast<DataIterHandle>(iter.get()), iter->Proxy(), Reset, Next,
std::numeric_limits<float>::quiet_NaN(), Context{}.Threads(), prefix)};
std::unique_ptr<DMatrix> dmat{DMatrix::Create(
static_cast<DataIterHandle>(iter.get()), iter->Proxy(), Reset, Next,
std::numeric_limits<float>::quiet_NaN(), Context{}.Threads(), prefix, on_host_)};
auto row_page_path =
data::MakeId(prefix, dynamic_cast<data::SparsePageDMatrix*>(dmat.get())) + ".row.page";
@@ -520,9 +520,9 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrix(bst_idx_t n_samples, bst_featur
CHECK_GE(n_samples, n_batches);
NumpyArrayIterForTest iter(0, n_samples, n_features, n_batches);
std::unique_ptr<DMatrix> dmat{
DMatrix::Create(static_cast<DataIterHandle>(&iter), iter.Proxy(), Reset, Next,
std::numeric_limits<float>::quiet_NaN(), omp_get_max_threads(), prefix)};
std::unique_ptr<DMatrix> dmat{DMatrix::Create(
static_cast<DataIterHandle>(&iter), iter.Proxy(), Reset, Next,
std::numeric_limits<float>::quiet_NaN(), omp_get_max_threads(), prefix, false)};
auto row_page_path =
data::MakeId(prefix, dynamic_cast<data::SparsePageDMatrix*>(dmat.get())) + ".row.page";
@@ -549,7 +549,7 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrix(size_t n_entries,
std::unique_ptr<DMatrix> dmat{
DMatrix::Create(static_cast<DataIterHandle>(&iter), iter.Proxy(), Reset, Next,
std::numeric_limits<float>::quiet_NaN(), 0, prefix)};
std::numeric_limits<float>::quiet_NaN(), 0, prefix, false)};
auto row_page_path =
data::MakeId(prefix,
dynamic_cast<data::SparsePageDMatrix *>(dmat.get())) +
@@ -568,9 +568,9 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrix(size_t n_entries,
return dmat;
}
std::unique_ptr<DMatrix> CreateSparsePageDMatrixWithRC(
size_t n_rows, size_t n_cols, size_t page_size, bool deterministic,
const dmlc::TemporaryDirectory& tempdir) {
std::unique_ptr<DMatrix> CreateSparsePageDMatrixWithRC(size_t n_rows, size_t n_cols,
size_t page_size, bool deterministic,
const dmlc::TemporaryDirectory& tempdir) {
if (!n_rows || !n_cols) {
return nullptr;
}

View File

@@ -241,6 +241,7 @@ class RandomDataGenerator {
bst_bin_t bins_{0};
std::vector<FeatureType> ft_;
bst_cat_t max_cat_{32};
bool on_host_{false};
Json ArrayInterfaceImpl(HostDeviceVector<float>* storage, size_t rows, size_t cols) const;
@@ -266,6 +267,10 @@ class RandomDataGenerator {
n_batches_ = n_batches;
return *this;
}
RandomDataGenerator& OnHost(bool on_host) {
on_host_ = on_host;
return *this;
}
RandomDataGenerator& Seed(uint64_t s) {
seed_ = s;
lcg_.Seed(seed_);

View File

@@ -67,4 +67,30 @@ TEST(RandomDataGenerator, GenerateArrayInterfaceBatch) {
CHECK_EQ(get<Integer>(j_array["shape"][0]), kRows);
CHECK_EQ(get<Integer>(j_array["shape"][1]), kCols);
}
TEST(RandomDataGenerator, SparseDMatrix) {
bst_idx_t constexpr kCols{100}, kBatches{13};
bst_idx_t n_samples{kBatches * 128};
dmlc::TemporaryDirectory tmpdir;
auto prefix = tmpdir.path + "/cache";
auto p_ext_fmat =
RandomDataGenerator{n_samples, kCols, 0.0}.Batches(kBatches).GenerateSparsePageDMatrix(prefix,
true);
auto p_fmat = RandomDataGenerator{n_samples, kCols, 0.0}.GenerateDMatrix(true);
SparsePage concat;
std::int32_t n_batches{0};
for (auto const& page : p_ext_fmat->GetBatches<SparsePage>()) {
concat.Push(page);
++n_batches;
}
ASSERT_EQ(n_batches, kBatches);
ASSERT_EQ(concat.Size(), n_samples);
for (auto const& page : p_fmat->GetBatches<SparsePage>()) {
ASSERT_EQ(page.data.ConstHostVector(), concat.data.ConstHostVector());
ASSERT_EQ(page.offset.ConstHostVector(), concat.offset.ConstHostVector());
}
}
} // namespace xgboost