Use mmap for external memory. (#9282)

- Have basic infrastructure for mmap. - Release file write handle.
2023-06-19 18:52:55 +08:00
parent d8beb517ed
commit ee6809e642
16 changed files with 599 additions and 275 deletions
--- a/tests/cpp/tree/gpu_hist/test_gradient_based_sampler.cu
+++ b/tests/cpp/tree/gpu_hist/test_gradient_based_sampler.cu
@@ -39,7 +39,8 @@ void VerifySampling(size_t page_size,
    EXPECT_NE(page->n_rows, kRows);
  }

-  GradientBasedSampler sampler(&ctx, page, kRows, param, subsample, sampling_method);
+  GradientBasedSampler sampler(&ctx, kRows, param, subsample, sampling_method,
+                               !fixed_size_sampling);
  auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get());

  if (fixed_size_sampling) {
@@ -93,7 +94,7 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) {
  auto page = (*dmat->GetBatches<EllpackPage>(&ctx, param).begin()).Impl();
  EXPECT_NE(page->n_rows, kRows);

-  GradientBasedSampler sampler(&ctx, page, kRows, param, kSubsample, TrainParam::kUniform);
+  GradientBasedSampler sampler(&ctx, kRows, param, kSubsample, TrainParam::kUniform, true);
  auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get());
  auto sampled_page = sample.page;
  EXPECT_EQ(sample.sample_rows, kRows);
@@ -141,7 +142,8 @@ TEST(GradientBasedSampler, GradientBasedSampling) {
  constexpr size_t kPageSize = 0;
  constexpr float kSubsample = 0.8;
  constexpr int kSamplingMethod = TrainParam::kGradientBased;
-  VerifySampling(kPageSize, kSubsample, kSamplingMethod);
+  constexpr bool kFixedSizeSampling = true;
+  VerifySampling(kPageSize, kSubsample, kSamplingMethod, kFixedSizeSampling);
 }

 TEST(GradientBasedSampler, GradientBasedSamplingExternalMemory) {
--- a/tests/cpp/tree/test_gpu_hist.cu
+++ b/tests/cpp/tree/test_gpu_hist.cu
@@ -92,8 +92,8 @@ void TestBuildHist(bool use_shared_memory_histograms) {
  auto page = BuildEllpackPage(kNRows, kNCols);
  BatchParam batch_param{};
  Context ctx{MakeCUDACtx(0)};
-  GPUHistMakerDevice<GradientSumT> maker(&ctx, page.get(), {}, kNRows, param, kNCols, kNCols,
-                                         batch_param);
+  GPUHistMakerDevice<GradientSumT> maker(&ctx, /*is_external_memory=*/false, {}, kNRows, param,
+                                         kNCols, kNCols, batch_param);
  xgboost::SimpleLCG gen;
  xgboost::SimpleRealUniformDistribution<bst_float> dist(0.0f, 1.0f);
  HostDeviceVector<GradientPair> gpair(kNRows);
@@ -106,9 +106,15 @@ void TestBuildHist(bool use_shared_memory_histograms) {

  thrust::host_vector<common::CompressedByteT> h_gidx_buffer (page->gidx_buffer.HostVector());
  maker.row_partitioner.reset(new RowPartitioner(0, kNRows));
+
+  maker.hist.Init(0, page->Cuts().TotalBins());
  maker.hist.AllocateHistograms({0});
+
  maker.gpair = gpair.DeviceSpan();
  maker.quantiser.reset(new GradientQuantiser(maker.gpair));
+  maker.page = page.get();
+
+  maker.InitFeatureGroupsOnce();

  BuildGradientHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(0),
                         maker.feature_groups->DeviceAccessor(0), gpair.DeviceSpan(),
@@ -126,8 +132,8 @@ void TestBuildHist(bool use_shared_memory_histograms) {
  std::vector<GradientPairPrecise> solution = GetHostHistGpair();
  for (size_t i = 0; i < h_result.size(); ++i) {
    auto result = maker.quantiser->ToFloatingPoint(h_result[i]);
-    EXPECT_NEAR(result.GetGrad(), solution[i].GetGrad(), 0.01f);
-    EXPECT_NEAR(result.GetHess(), solution[i].GetHess(), 0.01f);
+    ASSERT_NEAR(result.GetGrad(), solution[i].GetGrad(), 0.01f);
+    ASSERT_NEAR(result.GetHess(), solution[i].GetHess(), 0.01f);
  }
 }