[EM] Make page concatenation optional. (#10826)

This PR introduces a new parameter `extmem_concat_pages` to make the page concatenation optional for GPU hist. In addition, the document is updated for the new GPU-based external memory.
This commit is contained in:
Jiaming Yuan
2024-09-24 06:19:28 +08:00
committed by GitHub
parent 215da76263
commit e228c1a121
31 changed files with 690 additions and 388 deletions

View File

@@ -81,10 +81,11 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) {
auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
GradientBasedSampler sampler(&ctx, kRows, param, kSubsample, TrainParam::kUniform, true);
auto sample = sampler.Sample(&ctx, gpair.DeviceSpan(), dmat.get());
auto p_fmat = sample.p_fmat;
ASSERT_EQ(p_fmat, dmat.get());
ASSERT_THAT(
[&] {
GradientBasedSampler sampler(&ctx, kRows, param, kSubsample, TrainParam::kUniform, true);
},
GMockThrow("extmem_concat_pages"));
}
TEST(GradientBasedSampler, UniformSampling) {
@@ -120,4 +121,4 @@ TEST(GradientBasedSampler, GradientBasedSamplingExternalMemory) {
constexpr bool kFixedSizeSampling = false;
VerifySampling(kPageSize, kSubsample, kSamplingMethod, kFixedSizeSampling);
}
}; // namespace xgboost::tree
} // namespace xgboost::tree

View File

@@ -23,7 +23,7 @@ namespace xgboost::tree {
namespace {
void UpdateTree(Context const* ctx, linalg::Matrix<GradientPair>* gpair, DMatrix* dmat,
RegTree* tree, HostDeviceVector<bst_float>* preds, float subsample,
const std::string& sampling_method, bst_bin_t max_bin) {
const std::string& sampling_method, bst_bin_t max_bin, bool concat_pages) {
Args args{
{"max_depth", "2"},
{"max_bin", std::to_string(max_bin)},
@@ -38,13 +38,17 @@ void UpdateTree(Context const* ctx, linalg::Matrix<GradientPair>* gpair, DMatrix
ObjInfo task{ObjInfo::kRegression};
std::unique_ptr<TreeUpdater> hist_maker{TreeUpdater::Create("grow_gpu_hist", ctx, &task)};
hist_maker->Configure(Args{});
if (subsample < 1.0) {
hist_maker->Configure(Args{{"extmem_concat_pages", std::to_string(concat_pages)}});
} else {
hist_maker->Configure(Args{});
}
std::vector<HostDeviceVector<bst_node_t>> position(1);
hist_maker->Update(&param, gpair, dmat, common::Span<HostDeviceVector<bst_node_t>>{position},
{tree});
auto cache = linalg::MakeTensorView(ctx, preds->DeviceSpan(), preds->Size(), 1);
if (subsample < 1.0 && !dmat->SingleColBlock()) {
if (subsample < 1.0 && !dmat->SingleColBlock() && concat_pages) {
ASSERT_FALSE(hist_maker->UpdatePredictionCache(dmat, cache));
} else {
ASSERT_TRUE(hist_maker->UpdatePredictionCache(dmat, cache));
@@ -69,12 +73,12 @@ TEST(GpuHist, UniformSampling) {
// Build a tree using the in-memory DMatrix.
RegTree tree;
HostDeviceVector<bst_float> preds(kRows, 0.0, ctx.Device());
UpdateTree(&ctx, &gpair, p_fmat.get(), &tree, &preds, 1.0, "uniform", kRows);
UpdateTree(&ctx, &gpair, p_fmat.get(), &tree, &preds, 1.0, "uniform", kRows, false);
// Build another tree using sampling.
RegTree tree_sampling;
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, ctx.Device());
UpdateTree(&ctx, &gpair, p_fmat.get(), &tree_sampling, &preds_sampling, kSubsample, "uniform",
kRows);
kRows, false);
// Make sure the predictions are the same.
auto preds_h = preds.ConstHostVector();
@@ -100,13 +104,13 @@ TEST(GpuHist, GradientBasedSampling) {
// Build a tree using the in-memory DMatrix.
RegTree tree;
HostDeviceVector<bst_float> preds(kRows, 0.0, ctx.Device());
UpdateTree(&ctx, &gpair, p_fmat.get(), &tree, &preds, 1.0, "uniform", kRows);
UpdateTree(&ctx, &gpair, p_fmat.get(), &tree, &preds, 1.0, "uniform", kRows, false);
// Build another tree using sampling.
RegTree tree_sampling;
HostDeviceVector<bst_float> preds_sampling(kRows, 0.0, ctx.Device());
UpdateTree(&ctx, &gpair, p_fmat.get(), &tree_sampling, &preds_sampling, kSubsample,
"gradient_based", kRows);
"gradient_based", kRows, false);
// Make sure the predictions are the same.
auto preds_h = preds.ConstHostVector();
@@ -137,11 +141,11 @@ TEST(GpuHist, ExternalMemory) {
// Build a tree using the in-memory DMatrix.
RegTree tree;
HostDeviceVector<bst_float> preds(kRows, 0.0, ctx.Device());
UpdateTree(&ctx, &gpair, p_fmat.get(), &tree, &preds, 1.0, "uniform", kRows);
UpdateTree(&ctx, &gpair, p_fmat.get(), &tree, &preds, 1.0, "uniform", kRows, true);
// Build another tree using multiple ELLPACK pages.
RegTree tree_ext;
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, ctx.Device());
UpdateTree(&ctx, &gpair, p_fmat_ext.get(), &tree_ext, &preds_ext, 1.0, "uniform", kRows);
UpdateTree(&ctx, &gpair, p_fmat_ext.get(), &tree_ext, &preds_ext, 1.0, "uniform", kRows, true);
// Make sure the predictions are the same.
auto preds_h = preds.ConstHostVector();
@@ -181,14 +185,14 @@ TEST(GpuHist, ExternalMemoryWithSampling) {
RegTree tree;
HostDeviceVector<bst_float> preds(kRows, 0.0, ctx.Device());
UpdateTree(&ctx, &gpair, p_fmat.get(), &tree, &preds, kSubsample, kSamplingMethod, kRows);
UpdateTree(&ctx, &gpair, p_fmat.get(), &tree, &preds, kSubsample, kSamplingMethod, kRows, true);
// Build another tree using multiple ELLPACK pages.
common::GlobalRandom() = rng;
RegTree tree_ext;
HostDeviceVector<bst_float> preds_ext(kRows, 0.0, ctx.Device());
UpdateTree(&ctx, &gpair, p_fmat_ext.get(), &tree_ext, &preds_ext, kSubsample, kSamplingMethod,
kRows);
kRows, true);
Json jtree{Object{}};
Json jtree_ext{Object{}};
@@ -228,6 +232,42 @@ TEST(GpuHist, MaxDepth) {
ASSERT_THROW({learner->UpdateOneIter(0, p_mat);}, dmlc::Error);
}
TEST(GpuHist, PageConcatConfig) {
auto ctx = MakeCUDACtx(0);
bst_idx_t n_samples = 64, n_features = 32;
auto p_fmat = RandomDataGenerator{n_samples, n_features, 0}.Batches(2).GenerateSparsePageDMatrix(
"temp", true);
auto learner = std::unique_ptr<Learner>(Learner::Create({p_fmat}));
learner->SetParam("device", ctx.DeviceName());
learner->SetParam("extmem_concat_pages", "true");
learner->SetParam("subsample", "0.8");
learner->Configure();
learner->UpdateOneIter(0, p_fmat);
learner->SetParam("extmem_concat_pages", "false");
learner->Configure();
// GPU Hist rebuilds the updater after configuration. Training continues
learner->UpdateOneIter(1, p_fmat);
learner->SetParam("extmem_concat_pages", "true");
learner->SetParam("subsample", "1.0");
ASSERT_THAT([&] { learner->UpdateOneIter(2, p_fmat); }, GMockThrow("extmem_concat_pages"));
// Throws error on CPU.
{
auto learner = std::unique_ptr<Learner>(Learner::Create({p_fmat}));
learner->SetParam("extmem_concat_pages", "true");
ASSERT_THAT([&] { learner->UpdateOneIter(0, p_fmat); }, GMockThrow("extmem_concat_pages"));
}
{
auto learner = std::unique_ptr<Learner>(Learner::Create({p_fmat}));
learner->SetParam("extmem_concat_pages", "true");
learner->SetParam("tree_method", "approx");
ASSERT_THAT([&] { learner->UpdateOneIter(0, p_fmat); }, GMockThrow("extmem_concat_pages"));
}
}
namespace {
RegTree GetHistTree(Context const* ctx, DMatrix* dmat) {
ObjInfo task{ObjInfo::kRegression};