Use mmap for external memory. (#9282)

- Have basic infrastructure for mmap.
- Release file write handle.
This commit is contained in:
Jiaming Yuan
2023-06-19 18:52:55 +08:00
committed by GitHub
parent d8beb517ed
commit ee6809e642
16 changed files with 599 additions and 275 deletions

View File

@@ -1,5 +1,5 @@
/*!
* Copyright (c) by XGBoost Contributors 2019
/**
* Copyright 2019-2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
@@ -9,8 +9,7 @@
#include "../helpers.h"
#include "../filesystem.h" // dmlc::TemporaryDirectory
namespace xgboost {
namespace common {
namespace xgboost::common {
TEST(MemoryFixSizeBuffer, Seek) {
size_t constexpr kSize { 64 };
std::vector<int32_t> memory( kSize );
@@ -89,5 +88,54 @@ TEST(IO, LoadSequentialFile) {
ASSERT_THROW(LoadSequentialFile("non-exist", true), dmlc::Error);
}
} // namespace common
} // namespace xgboost
TEST(IO, PrivateMmapStream) {
dmlc::TemporaryDirectory tempdir;
auto path = tempdir.path + "/testfile";
// The page size on Linux is usually set to 4096, while the allocation granularity on
// the Windows machine where this test is writted is 65536. We span the test to cover
// all of them.
std::size_t n_batches{64};
std::size_t multiplier{2048};
std::vector<std::vector<std::int32_t>> batches;
std::vector<std::size_t> offset{0ul};
using T = std::int32_t;
{
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
for (std::size_t i = 0; i < n_batches; ++i) {
std::size_t size = (i + 1) * multiplier;
std::vector<T> data(size, 0);
std::iota(data.begin(), data.end(), i * i);
fo->Write(static_cast<std::uint64_t>(data.size()));
fo->Write(data.data(), data.size() * sizeof(T));
std::size_t bytes = sizeof(std::uint64_t) + data.size() * sizeof(T);
offset.push_back(bytes);
batches.emplace_back(std::move(data));
}
}
// Turn size info offset
std::partial_sum(offset.begin(), offset.end(), offset.begin());
for (std::size_t i = 0; i < n_batches; ++i) {
std::size_t off = offset[i];
std::size_t n = offset.at(i + 1) - offset[i];
std::unique_ptr<dmlc::Stream> fi{std::make_unique<PrivateMmapConstStream>(path, off, n)};
std::vector<T> data;
std::uint64_t size{0};
fi->Read(&size);
data.resize(size);
fi->Read(data.data(), size * sizeof(T));
ASSERT_EQ(data, batches[i]);
}
}
} // namespace xgboost::common

View File

@@ -2,6 +2,10 @@
#include "../../src/data/ellpack_page.cuh"
#endif
#include <xgboost/data.h> // for SparsePage
#include "./helpers.h" // for RandomDataGenerator
namespace xgboost {
#if defined(__CUDACC__)
namespace {

View File

@@ -39,7 +39,8 @@ void VerifySampling(size_t page_size,
EXPECT_NE(page->n_rows, kRows);
}
GradientBasedSampler sampler(&ctx, page, kRows, param, subsample, sampling_method);
GradientBasedSampler sampler(&ctx, kRows, param, subsample, sampling_method,
!fixed_size_sampling);
auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get());
if (fixed_size_sampling) {
@@ -93,7 +94,7 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) {
auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
EXPECT_NE(page->n_rows, kRows);
GradientBasedSampler sampler(&ctx, page, kRows, param, kSubsample, TrainParam::kUniform);
GradientBasedSampler sampler(&ctx, kRows, param, kSubsample, TrainParam::kUniform, true);
auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get());
auto sampled_page = sample.page;
EXPECT_EQ(sample.sample_rows, kRows);
@@ -141,7 +142,8 @@ TEST(GradientBasedSampler, GradientBasedSampling) {
constexpr size_t kPageSize = 0;
constexpr float kSubsample = 0.8;
constexpr int kSamplingMethod = TrainParam::kGradientBased;
VerifySampling(kPageSize, kSubsample, kSamplingMethod);
constexpr bool kFixedSizeSampling = true;
VerifySampling(kPageSize, kSubsample, kSamplingMethod, kFixedSizeSampling);
}
TEST(GradientBasedSampler, GradientBasedSamplingExternalMemory) {

View File

@@ -92,8 +92,8 @@ void TestBuildHist(bool use_shared_memory_histograms) {
auto page = BuildEllpackPage(kNRows, kNCols);
BatchParam batch_param{};
Context ctx{MakeCUDACtx(0)};
GPUHistMakerDevice<GradientSumT> maker(&ctx, page.get(), {}, kNRows, param, kNCols, kNCols,
batch_param);
GPUHistMakerDevice<GradientSumT> maker(&ctx, /*is_external_memory=*/false, {}, kNRows, param,
kNCols, kNCols, batch_param);
xgboost::SimpleLCG gen;
xgboost::SimpleRealUniformDistribution<bst_float> dist(0.0f, 1.0f);
HostDeviceVector<GradientPair> gpair(kNRows);
@@ -106,9 +106,15 @@ void TestBuildHist(bool use_shared_memory_histograms) {
thrust::host_vector<common::CompressedByteT> h_gidx_buffer (page->gidx_buffer.HostVector());
maker.row_partitioner.reset(new RowPartitioner(0, kNRows));
maker.hist.Init(0, page->Cuts().TotalBins());
maker.hist.AllocateHistograms({0});
maker.gpair = gpair.DeviceSpan();
maker.quantiser.reset(new GradientQuantiser(maker.gpair));
maker.page = page.get();
maker.InitFeatureGroupsOnce();
BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
maker.feature_groups->DeviceAccessor(0), gpair.DeviceSpan(),
@@ -126,8 +132,8 @@ void TestBuildHist(bool use_shared_memory_histograms) {
std::vector<GradientPairPrecise> solution = GetHostHistGpair();
for (size_t i = 0; i < h_result.size(); ++i) {
auto result = maker.quantiser->ToFloatingPoint(h_result[i]);
EXPECT_NEAR(result.GetGrad(), solution[i].GetGrad(), 0.01f);
EXPECT_NEAR(result.GetHess(), solution[i].GetHess(), 0.01f);
ASSERT_NEAR(result.GetGrad(), solution[i].GetGrad(), 0.01f);
ASSERT_NEAR(result.GetHess(), solution[i].GetHess(), 0.01f);
}
}