[EM] Pass batch parameter into extmem format. (#10736)
- Allow customization for format reading. - Customize the number of pre-fetch batches.
This commit is contained in:
@@ -13,77 +13,84 @@
|
||||
|
||||
namespace xgboost::data {
|
||||
namespace {
|
||||
template <typename FormatStreamPolicy>
|
||||
void TestEllpackPageRawFormat(FormatStreamPolicy *p_policy) {
|
||||
auto &policy = *p_policy;
|
||||
Context ctx{MakeCUDACtx(0)};
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
class TestEllpackPageRawFormat : public ::testing::TestWithParam<bool> {
|
||||
public:
|
||||
template <typename FormatStreamPolicy>
|
||||
void Run(FormatStreamPolicy *p_policy, bool prefetch_copy) {
|
||||
auto &policy = *p_policy;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
|
||||
param.prefetch_copy = prefetch_copy;
|
||||
|
||||
auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::string path = tmpdir.path + "/ellpack.page";
|
||||
auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::string path = tmpdir.path + "/ellpack.page";
|
||||
|
||||
std::shared_ptr<common::HistogramCuts const> cuts;
|
||||
for (auto const &page : m->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
cuts = page.Impl()->CutsShared();
|
||||
}
|
||||
std::shared_ptr<common::HistogramCuts const> cuts;
|
||||
for (auto const &page : m->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
cuts = page.Impl()->CutsShared();
|
||||
}
|
||||
|
||||
ASSERT_EQ(cuts->cut_values_.Device(), ctx.Device());
|
||||
ASSERT_TRUE(cuts->cut_values_.DeviceCanRead());
|
||||
policy.SetCuts(cuts, ctx.Device());
|
||||
ASSERT_EQ(cuts->cut_values_.Device(), ctx.Device());
|
||||
ASSERT_TRUE(cuts->cut_values_.DeviceCanRead());
|
||||
policy.SetCuts(cuts, ctx.Device());
|
||||
|
||||
std::unique_ptr<EllpackPageRawFormat> format{policy.CreatePageFormat()};
|
||||
std::unique_ptr<EllpackPageRawFormat> format{policy.CreatePageFormat(param)};
|
||||
|
||||
std::size_t n_bytes{0};
|
||||
{
|
||||
auto fo = policy.CreateWriter(StringView{path}, 0);
|
||||
for (auto const &ellpack : m->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
n_bytes += format->Write(ellpack, fo.get());
|
||||
}
|
||||
}
|
||||
|
||||
EllpackPage page;
|
||||
auto fi = policy.CreateReader(StringView{path}, static_cast<bst_idx_t>(0), n_bytes);
|
||||
ASSERT_TRUE(format->Read(&page, fi.get()));
|
||||
|
||||
std::size_t n_bytes{0};
|
||||
{
|
||||
auto fo = policy.CreateWriter(StringView{path}, 0);
|
||||
for (auto const &ellpack : m->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
n_bytes += format->Write(ellpack, fo.get());
|
||||
auto loaded = page.Impl();
|
||||
auto orig = ellpack.Impl();
|
||||
ASSERT_EQ(loaded->Cuts().Ptrs(), orig->Cuts().Ptrs());
|
||||
ASSERT_EQ(loaded->Cuts().MinValues(), orig->Cuts().MinValues());
|
||||
ASSERT_EQ(loaded->Cuts().Values(), orig->Cuts().Values());
|
||||
ASSERT_EQ(loaded->base_rowid, orig->base_rowid);
|
||||
ASSERT_EQ(loaded->row_stride, orig->row_stride);
|
||||
std::vector<common::CompressedByteT> h_loaded, h_orig;
|
||||
[[maybe_unused]] auto h_loaded_acc = loaded->GetHostAccessor(&ctx, &h_loaded);
|
||||
[[maybe_unused]] auto h_orig_acc = orig->GetHostAccessor(&ctx, &h_orig);
|
||||
ASSERT_EQ(h_loaded, h_orig);
|
||||
}
|
||||
}
|
||||
|
||||
EllpackPage page;
|
||||
auto fi = policy.CreateReader(StringView{path}, static_cast<bst_idx_t>(0), n_bytes);
|
||||
ASSERT_TRUE(format->Read(&page, fi.get()));
|
||||
|
||||
for (auto const &ellpack : m->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
auto loaded = page.Impl();
|
||||
auto orig = ellpack.Impl();
|
||||
ASSERT_EQ(loaded->Cuts().Ptrs(), orig->Cuts().Ptrs());
|
||||
ASSERT_EQ(loaded->Cuts().MinValues(), orig->Cuts().MinValues());
|
||||
ASSERT_EQ(loaded->Cuts().Values(), orig->Cuts().Values());
|
||||
ASSERT_EQ(loaded->base_rowid, orig->base_rowid);
|
||||
ASSERT_EQ(loaded->row_stride, orig->row_stride);
|
||||
std::vector<common::CompressedByteT> h_loaded, h_orig;
|
||||
[[maybe_unused]] auto h_loaded_acc = loaded->GetHostAccessor(&ctx, &h_loaded);
|
||||
[[maybe_unused]] auto h_orig_acc = orig->GetHostAccessor(&ctx, &h_orig);
|
||||
ASSERT_EQ(h_loaded, h_orig);
|
||||
}
|
||||
}
|
||||
};
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(EllpackPageRawFormat, DiskIO) {
|
||||
TEST_P(TestEllpackPageRawFormat, DiskIO) {
|
||||
EllpackMmapStreamPolicy<EllpackPage, EllpackFormatPolicy> policy{false};
|
||||
TestEllpackPageRawFormat(&policy);
|
||||
this->Run(&policy, this->GetParam());
|
||||
}
|
||||
|
||||
TEST(EllpackPageRawFormat, DiskIOHmm) {
|
||||
TEST_P(TestEllpackPageRawFormat, DiskIOHmm) {
|
||||
if (common::SupportsPageableMem()) {
|
||||
EllpackMmapStreamPolicy<EllpackPage, EllpackFormatPolicy> policy{true};
|
||||
TestEllpackPageRawFormat(&policy);
|
||||
this->Run(&policy, this->GetParam());
|
||||
} else {
|
||||
GTEST_SKIP_("HMM is not supported.");
|
||||
}
|
||||
}
|
||||
|
||||
TEST(EllpackPageRawFormat, HostIO) {
|
||||
TEST_P(TestEllpackPageRawFormat, HostIO) {
|
||||
{
|
||||
EllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy> policy;
|
||||
TestEllpackPageRawFormat(&policy);
|
||||
this->Run(&policy, this->GetParam());
|
||||
}
|
||||
{
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
auto param = BatchParam{32, tree::TrainParam::DftSparseThreshold()};
|
||||
param.n_prefetch_batches = 1;
|
||||
param.prefetch_copy = this->GetParam();
|
||||
|
||||
EllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy> policy;
|
||||
std::unique_ptr<EllpackPageRawFormat> format{};
|
||||
Cache cache{false, "name", "ellpack", true};
|
||||
@@ -92,7 +99,7 @@ TEST(EllpackPageRawFormat, HostIO) {
|
||||
for (auto const &page : p_fmat->GetBatches<EllpackPage>(&ctx, param)) {
|
||||
if (!format) {
|
||||
policy.SetCuts(page.Impl()->CutsShared(), ctx.Device());
|
||||
format = policy.CreatePageFormat();
|
||||
format = policy.CreatePageFormat(param);
|
||||
}
|
||||
auto writer = policy.CreateWriter({}, i);
|
||||
auto n_bytes = format->Write(page, writer.get());
|
||||
@@ -123,4 +130,6 @@ TEST(EllpackPageRawFormat, HostIO) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(EllpackPageRawFormat, TestEllpackPageRawFormat, ::testing::Bool());
|
||||
} // namespace xgboost::data
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2021-2023, XGBoost contributors
|
||||
* Copyright 2021-2024, XGBoost contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h> // for CSCPage, SortedCSCPage, SparsePage
|
||||
@@ -11,8 +11,6 @@
|
||||
#include "../../../src/data/sparse_page_writer.h" // for CreatePageFormat
|
||||
#include "../helpers.h" // for RandomDataGenerator
|
||||
#include "dmlc/filesystem.h" // for TemporaryDirectory
|
||||
#include "dmlc/io.h" // for Stream
|
||||
#include "gtest/gtest_pred_impl.h" // for Test, AssertionResult, ASSERT_EQ, TEST
|
||||
#include "xgboost/context.h" // for Context
|
||||
|
||||
namespace xgboost::data {
|
||||
|
||||
Reference in New Issue
Block a user