/** * Copyright 2017-2024, XGBoost contributors */ #include #include // for Args #include // for Context #include // for HostDeviceVector #include // for Json #include // for ObjInfo #include // for RegTree #include // for TreeUpdater #include // for unique_ptr #include // for string #include // for vector #include "../../../src/common/random.h" // for GlobalRandom #include "../../../src/tree/param.h" // for TrainParam #include "../collective/test_worker.h" // for BaseMGPUTest #include "../helpers.h" namespace xgboost::tree { namespace { void UpdateTree(Context const* ctx, linalg::Matrix* gpair, DMatrix* dmat, RegTree* tree, HostDeviceVector* preds, float subsample, const std::string& sampling_method, bst_bin_t max_bin) { Args args{ {"max_depth", "2"}, {"max_bin", std::to_string(max_bin)}, {"min_child_weight", "0.0"}, {"reg_alpha", "0"}, {"reg_lambda", "0"}, {"subsample", std::to_string(subsample)}, {"sampling_method", sampling_method}, }; TrainParam param; param.UpdateAllowUnknown(args); ObjInfo task{ObjInfo::kRegression}; std::unique_ptr hist_maker{TreeUpdater::Create("grow_gpu_hist", ctx, &task)}; hist_maker->Configure(Args{}); std::vector> position(1); hist_maker->Update(¶m, gpair, dmat, common::Span>{position}, {tree}); auto cache = linalg::MakeTensorView(ctx, preds->DeviceSpan(), preds->Size(), 1); if (subsample < 1.0 && !dmat->SingleColBlock()) { ASSERT_FALSE(hist_maker->UpdatePredictionCache(dmat, cache)); } else { ASSERT_TRUE(hist_maker->UpdatePredictionCache(dmat, cache)); } } } // anonymous namespace TEST(GpuHist, UniformSampling) { constexpr size_t kRows = 4096; constexpr size_t kCols = 2; constexpr float kSubsample = 0.9999; common::GlobalRandom().seed(1994); auto ctx = MakeCUDACtx(0); // Create an in-memory DMatrix. auto p_fmat = RandomDataGenerator{kRows, kCols, 0.0f}.GenerateDMatrix(true); ASSERT_TRUE(p_fmat->SingleColBlock()); linalg::Matrix gpair({kRows}, ctx.Device()); gpair.Data()->Copy(GenerateRandomGradients(kRows)); // Build a tree using the in-memory DMatrix. RegTree tree; HostDeviceVector preds(kRows, 0.0, ctx.Device()); UpdateTree(&ctx, &gpair, p_fmat.get(), &tree, &preds, 1.0, "uniform", kRows); // Build another tree using sampling. RegTree tree_sampling; HostDeviceVector preds_sampling(kRows, 0.0, ctx.Device()); UpdateTree(&ctx, &gpair, p_fmat.get(), &tree_sampling, &preds_sampling, kSubsample, "uniform", kRows); // Make sure the predictions are the same. auto preds_h = preds.ConstHostVector(); auto preds_sampling_h = preds_sampling.ConstHostVector(); for (size_t i = 0; i < kRows; i++) { EXPECT_NEAR(preds_h[i], preds_sampling_h[i], 1e-8); } } TEST(GpuHist, GradientBasedSampling) { constexpr size_t kRows = 4096; constexpr size_t kCols = 2; constexpr float kSubsample = 0.9999; common::GlobalRandom().seed(1994); auto ctx = MakeCUDACtx(0); // Create an in-memory DMatrix. auto p_fmat = RandomDataGenerator{kRows, kCols, 0.0f}.GenerateDMatrix(true); linalg::Matrix gpair({kRows}, ctx.Device()); gpair.Data()->Copy(GenerateRandomGradients(kRows)); // Build a tree using the in-memory DMatrix. RegTree tree; HostDeviceVector preds(kRows, 0.0, ctx.Device()); UpdateTree(&ctx, &gpair, p_fmat.get(), &tree, &preds, 1.0, "uniform", kRows); // Build another tree using sampling. RegTree tree_sampling; HostDeviceVector preds_sampling(kRows, 0.0, ctx.Device()); UpdateTree(&ctx, &gpair, p_fmat.get(), &tree_sampling, &preds_sampling, kSubsample, "gradient_based", kRows); // Make sure the predictions are the same. auto preds_h = preds.ConstHostVector(); auto preds_sampling_h = preds_sampling.ConstHostVector(); for (size_t i = 0; i < kRows; i++) { EXPECT_NEAR(preds_h[i], preds_sampling_h[i], 1e-3); } } TEST(GpuHist, ExternalMemory) { constexpr size_t kRows = 4096; constexpr size_t kCols = 2; // Create a DMatrix with multiple batches. auto p_fmat_ext = RandomDataGenerator{kRows, kCols, 0.0f}.Batches(4).GenerateSparsePageDMatrix("temp", true); ASSERT_FALSE(p_fmat_ext->SingleColBlock()); // Create a single batch DMatrix. auto p_fmat = RandomDataGenerator{kRows, kCols, 0.0f}.Batches(1).GenerateSparsePageDMatrix("temp", true); ASSERT_TRUE(p_fmat->SingleColBlock()); auto ctx = MakeCUDACtx(0); linalg::Matrix gpair({kRows}, ctx.Device()); gpair.Data()->Copy(GenerateRandomGradients(kRows)); // Build a tree using the in-memory DMatrix. RegTree tree; HostDeviceVector preds(kRows, 0.0, ctx.Device()); UpdateTree(&ctx, &gpair, p_fmat.get(), &tree, &preds, 1.0, "uniform", kRows); // Build another tree using multiple ELLPACK pages. RegTree tree_ext; HostDeviceVector preds_ext(kRows, 0.0, ctx.Device()); UpdateTree(&ctx, &gpair, p_fmat_ext.get(), &tree_ext, &preds_ext, 1.0, "uniform", kRows); // Make sure the predictions are the same. auto preds_h = preds.ConstHostVector(); auto preds_ext_h = preds_ext.ConstHostVector(); for (size_t i = 0; i < kRows; i++) { EXPECT_NEAR(preds_h[i], preds_ext_h[i], 1e-6); } } TEST(GpuHist, ExternalMemoryWithSampling) { constexpr size_t kRows = 4096, kCols = 2; constexpr float kSubsample = 0.5; const std::string kSamplingMethod = "gradient_based"; common::GlobalRandom().seed(0); auto ctx = MakeCUDACtx(0); // Create a single batch DMatrix. auto p_fmat = RandomDataGenerator{kRows, kCols, 0.0f} .Device(ctx.Device()) .Batches(1) .GenerateSparsePageDMatrix("temp", true); ASSERT_TRUE(p_fmat->SingleColBlock()); // Create a DMatrix with multiple batches. auto p_fmat_ext = RandomDataGenerator{kRows, kCols, 0.0f} .Device(ctx.Device()) .Batches(4) .GenerateSparsePageDMatrix("temp", true); ASSERT_FALSE(p_fmat_ext->SingleColBlock()); linalg::Matrix gpair({kRows}, ctx.Device()); gpair.Data()->Copy(GenerateRandomGradients(kRows)); // Build a tree using the in-memory DMatrix. auto rng = common::GlobalRandom(); RegTree tree; HostDeviceVector preds(kRows, 0.0, ctx.Device()); UpdateTree(&ctx, &gpair, p_fmat.get(), &tree, &preds, kSubsample, kSamplingMethod, kRows); // Build another tree using multiple ELLPACK pages. common::GlobalRandom() = rng; RegTree tree_ext; HostDeviceVector preds_ext(kRows, 0.0, ctx.Device()); UpdateTree(&ctx, &gpair, p_fmat_ext.get(), &tree_ext, &preds_ext, kSubsample, kSamplingMethod, kRows); Json jtree{Object{}}; Json jtree_ext{Object{}}; tree.SaveModel(&jtree); tree_ext.SaveModel(&jtree_ext); ASSERT_EQ(jtree, jtree_ext); } TEST(GpuHist, ConfigIO) { auto ctx = MakeCUDACtx(0); ObjInfo task{ObjInfo::kRegression}; std::unique_ptr updater{TreeUpdater::Create("grow_gpu_hist", &ctx, &task)}; updater->Configure(Args{}); Json j_updater{Object{}}; updater->SaveConfig(&j_updater); ASSERT_TRUE(IsA(j_updater["hist_train_param"])); updater->LoadConfig(j_updater); Json j_updater_roundtrip{Object{}}; updater->SaveConfig(&j_updater_roundtrip); ASSERT_TRUE(IsA(j_updater_roundtrip["hist_train_param"])); ASSERT_EQ(j_updater, j_updater_roundtrip); } TEST(GpuHist, MaxDepth) { auto ctx = MakeCUDACtx(0); size_t constexpr kRows = 16; size_t constexpr kCols = 4; auto p_mat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(); auto learner = std::unique_ptr(Learner::Create({p_mat})); learner->SetParam("max_depth", "32"); learner->Configure(); ASSERT_THROW({learner->UpdateOneIter(0, p_mat);}, dmlc::Error); } namespace { RegTree GetHistTree(Context const* ctx, DMatrix* dmat) { ObjInfo task{ObjInfo::kRegression}; std::unique_ptr hist_maker {TreeUpdater::Create("grow_gpu_hist", ctx, &task)}; hist_maker->Configure(Args{}); TrainParam param; param.UpdateAllowUnknown(Args{}); linalg::Matrix gpair({dmat->Info().num_row_}, ctx->Device()); gpair.Data()->Copy(GenerateRandomGradients(dmat->Info().num_row_)); std::vector> position(1); RegTree tree; hist_maker->Update(¶m, &gpair, dmat, common::Span>{position}, {&tree}); return tree; } void VerifyHistColumnSplit(bst_idx_t rows, bst_feature_t cols, RegTree const& expected_tree) { Context ctx(MakeCUDACtx(GPUIDX)); auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true); auto const world_size = collective::GetWorldSize(); auto const rank = collective::GetRank(); std::unique_ptr sliced{Xy->SliceCol(world_size, rank)}; RegTree tree = GetHistTree(&ctx, sliced.get()); Json json{Object{}}; tree.SaveModel(&json); Json expected_json{Object{}}; expected_tree.SaveModel(&expected_json); ASSERT_EQ(json, expected_json); } } // anonymous namespace class MGPUHistTest : public collective::BaseMGPUTest {}; TEST_F(MGPUHistTest, HistColumnSplit) { auto constexpr kRows = 32; auto constexpr kCols = 16; Context ctx(MakeCUDACtx(0)); auto dmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true); RegTree expected_tree = GetHistTree(&ctx, dmat.get()); this->DoTest([&] { VerifyHistColumnSplit(kRows, kCols, expected_tree); }, true); this->DoTest([&] { VerifyHistColumnSplit(kRows, kCols, expected_tree); }, false); } namespace { RegTree GetApproxTree(Context const* ctx, DMatrix* dmat) { ObjInfo task{ObjInfo::kRegression}; std::unique_ptr approx_maker{TreeUpdater::Create("grow_gpu_approx", ctx, &task)}; approx_maker->Configure(Args{}); TrainParam param; param.UpdateAllowUnknown(Args{}); linalg::Matrix gpair({dmat->Info().num_row_}, ctx->Device()); gpair.Data()->Copy(GenerateRandomGradients(dmat->Info().num_row_)); std::vector> position(1); RegTree tree; approx_maker->Update(¶m, &gpair, dmat, common::Span>{position}, {&tree}); return tree; } void VerifyApproxColumnSplit(bst_idx_t rows, bst_feature_t cols, RegTree const& expected_tree) { auto ctx = MakeCUDACtx(DistGpuIdx()); auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true); auto const world_size = collective::GetWorldSize(); auto const rank = collective::GetRank(); std::unique_ptr sliced{Xy->SliceCol(world_size, rank)}; RegTree tree = GetApproxTree(&ctx, sliced.get()); Json json{Object{}}; tree.SaveModel(&json); Json expected_json{Object{}}; expected_tree.SaveModel(&expected_json); ASSERT_EQ(json, expected_json); } } // anonymous namespace class MGPUApproxTest : public collective::BaseMGPUTest {}; TEST_F(MGPUApproxTest, GPUApproxColumnSplit) { auto constexpr kRows = 32; auto constexpr kCols = 16; Context ctx(MakeCUDACtx(0)); auto dmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true); RegTree expected_tree = GetApproxTree(&ctx, dmat.get()); this->DoTest([&] { VerifyApproxColumnSplit(kRows, kCols, expected_tree); }, true); this->DoTest([&] { VerifyApproxColumnSplit(kRows, kCols, expected_tree); }, false); } } // namespace xgboost::tree