Initial GPU support for the approx tree method. (#9414)

This commit is contained in:
Jiaming Yuan
2023-07-31 15:50:28 +08:00
committed by GitHub
parent 8f0efb4ab3
commit 912e341d57
23 changed files with 639 additions and 360 deletions

View File

@@ -13,10 +13,7 @@
#include "../../../src/common/common.h"
#include "../../../src/data/ellpack_page.cuh" // for EllpackPageImpl
#include "../../../src/data/ellpack_page.h" // for EllpackPage
#include "../../../src/data/sparse_page_source.h"
#include "../../../src/tree/constraints.cuh"
#include "../../../src/tree/param.h" // for TrainParam
#include "../../../src/tree/updater_gpu_common.cuh"
#include "../../../src/tree/updater_gpu_hist.cu"
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
@@ -94,8 +91,9 @@ void TestBuildHist(bool use_shared_memory_histograms) {
auto page = BuildEllpackPage(kNRows, kNCols);
BatchParam batch_param{};
Context ctx{MakeCUDACtx(0)};
GPUHistMakerDevice<GradientSumT> maker(&ctx, /*is_external_memory=*/false, {}, kNRows, param,
kNCols, kNCols, batch_param);
auto cs = std::make_shared<common::ColumnSampler>(0);
GPUHistMakerDevice maker(&ctx, /*is_external_memory=*/false, {}, kNRows, param, cs, kNCols,
batch_param);
xgboost::SimpleLCG gen;
xgboost::SimpleRealUniformDistribution<bst_float> dist(0.0f, 1.0f);
HostDeviceVector<GradientPair> gpair(kNRows);

View File

@@ -24,15 +24,11 @@ class TestPredictionCache : public ::testing::Test {
Xy_ = RandomDataGenerator{n_samples_, n_features, 0}.Targets(n_targets).GenerateDMatrix(true);
}
void RunLearnerTest(std::string updater_name, float subsample, std::string const& grow_policy,
std::string const& strategy) {
void RunLearnerTest(Context const* ctx, std::string updater_name, float subsample,
std::string const& grow_policy, std::string const& strategy) {
std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
if (updater_name == "grow_gpu_hist") {
// gpu_id setup
learner->SetParam("tree_method", "gpu_hist");
} else {
learner->SetParam("updater", updater_name);
}
learner->SetParam("device", ctx->DeviceName());
learner->SetParam("updater", updater_name);
learner->SetParam("multi_strategy", strategy);
learner->SetParam("grow_policy", grow_policy);
learner->SetParam("subsample", std::to_string(subsample));
@@ -65,20 +61,14 @@ class TestPredictionCache : public ::testing::Test {
}
}
void RunTest(std::string const& updater_name, std::string const& strategy) {
void RunTest(Context* ctx, std::string const& updater_name, std::string const& strategy) {
{
Context ctx;
ctx.InitAllowUnknown(Args{{"nthread", "8"}});
if (updater_name == "grow_gpu_hist") {
ctx = ctx.MakeCUDA(0);
} else {
ctx = ctx.MakeCPU();
}
ctx->InitAllowUnknown(Args{{"nthread", "8"}});
ObjInfo task{ObjInfo::kRegression};
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(updater_name, &ctx, &task)};
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(updater_name, ctx, &task)};
RegTree tree;
std::vector<RegTree *> trees{&tree};
std::vector<RegTree*> trees{&tree};
auto gpair = GenerateRandomGradients(n_samples_);
tree::TrainParam param;
param.UpdateAllowUnknown(Args{{"max_bin", "64"}});
@@ -86,33 +76,46 @@ class TestPredictionCache : public ::testing::Test {
std::vector<HostDeviceVector<bst_node_t>> position(1);
updater->Update(&param, &gpair, Xy_.get(), position, trees);
HostDeviceVector<float> out_prediction_cached;
out_prediction_cached.SetDevice(ctx.gpu_id);
out_prediction_cached.SetDevice(ctx->Device());
out_prediction_cached.Resize(n_samples_);
auto cache =
linalg::MakeTensorView(&ctx, &out_prediction_cached, out_prediction_cached.Size(), 1);
linalg::MakeTensorView(ctx, &out_prediction_cached, out_prediction_cached.Size(), 1);
ASSERT_TRUE(updater->UpdatePredictionCache(Xy_.get(), cache));
}
for (auto policy : {"depthwise", "lossguide"}) {
for (auto subsample : {1.0f, 0.4f}) {
this->RunLearnerTest(updater_name, subsample, policy, strategy);
this->RunLearnerTest(updater_name, subsample, policy, strategy);
this->RunLearnerTest(ctx, updater_name, subsample, policy, strategy);
this->RunLearnerTest(ctx, updater_name, subsample, policy, strategy);
}
}
}
};
TEST_F(TestPredictionCache, Approx) { this->RunTest("grow_histmaker", "one_output_per_tree"); }
TEST_F(TestPredictionCache, Approx) {
Context ctx;
this->RunTest(&ctx, "grow_histmaker", "one_output_per_tree");
}
TEST_F(TestPredictionCache, Hist) {
this->RunTest("grow_quantile_histmaker", "one_output_per_tree");
Context ctx;
this->RunTest(&ctx, "grow_quantile_histmaker", "one_output_per_tree");
}
TEST_F(TestPredictionCache, HistMulti) {
this->RunTest("grow_quantile_histmaker", "multi_output_tree");
Context ctx;
this->RunTest(&ctx, "grow_quantile_histmaker", "multi_output_tree");
}
#if defined(XGBOOST_USE_CUDA)
TEST_F(TestPredictionCache, GpuHist) { this->RunTest("grow_gpu_hist", "one_output_per_tree"); }
TEST_F(TestPredictionCache, GpuHist) {
auto ctx = MakeCUDACtx(0);
this->RunTest(&ctx, "grow_gpu_hist", "one_output_per_tree");
}
TEST_F(TestPredictionCache, GpuApprox) {
auto ctx = MakeCUDACtx(0);
this->RunTest(&ctx, "grow_gpu_approx", "one_output_per_tree");
}
#endif // defined(XGBOOST_USE_CUDA)
} // namespace xgboost

View File

@@ -62,8 +62,10 @@ class RegenTest : public ::testing::Test {
auto constexpr Iter() const { return 4; }
template <typename Page>
size_t TestTreeMethod(std::string tree_method, std::string obj, bool reset = true) const {
size_t TestTreeMethod(Context const* ctx, std::string tree_method, std::string obj,
bool reset = true) const {
auto learner = std::unique_ptr<Learner>{Learner::Create({p_fmat_})};
learner->SetParam("device", ctx->DeviceName());
learner->SetParam("tree_method", tree_method);
learner->SetParam("objective", obj);
learner->Configure();
@@ -87,40 +89,71 @@ class RegenTest : public ::testing::Test {
} // anonymous namespace
TEST_F(RegenTest, Approx) {
auto n = this->TestTreeMethod<GHistIndexMatrix>("approx", "reg:squarederror");
Context ctx;
auto n = this->TestTreeMethod<GHistIndexMatrix>(&ctx, "approx", "reg:squarederror");
ASSERT_EQ(n, 1);
n = this->TestTreeMethod<GHistIndexMatrix>("approx", "reg:logistic");
n = this->TestTreeMethod<GHistIndexMatrix>(&ctx, "approx", "reg:logistic");
ASSERT_EQ(n, this->Iter());
}
TEST_F(RegenTest, Hist) {
auto n = this->TestTreeMethod<GHistIndexMatrix>("hist", "reg:squarederror");
Context ctx;
auto n = this->TestTreeMethod<GHistIndexMatrix>(&ctx, "hist", "reg:squarederror");
ASSERT_EQ(n, 1);
n = this->TestTreeMethod<GHistIndexMatrix>("hist", "reg:logistic");
n = this->TestTreeMethod<GHistIndexMatrix>(&ctx, "hist", "reg:logistic");
ASSERT_EQ(n, 1);
}
TEST_F(RegenTest, Mixed) {
auto n = this->TestTreeMethod<GHistIndexMatrix>("hist", "reg:squarederror", false);
Context ctx;
auto n = this->TestTreeMethod<GHistIndexMatrix>(&ctx, "hist", "reg:squarederror", false);
ASSERT_EQ(n, 1);
n = this->TestTreeMethod<GHistIndexMatrix>("approx", "reg:logistic", true);
n = this->TestTreeMethod<GHistIndexMatrix>(&ctx, "approx", "reg:logistic", true);
ASSERT_EQ(n, this->Iter() + 1);
n = this->TestTreeMethod<GHistIndexMatrix>("approx", "reg:logistic", false);
n = this->TestTreeMethod<GHistIndexMatrix>(&ctx, "approx", "reg:logistic", false);
ASSERT_EQ(n, this->Iter());
n = this->TestTreeMethod<GHistIndexMatrix>("hist", "reg:squarederror", true);
n = this->TestTreeMethod<GHistIndexMatrix>(&ctx, "hist", "reg:squarederror", true);
ASSERT_EQ(n, this->Iter() + 1);
}
#if defined(XGBOOST_USE_CUDA)
TEST_F(RegenTest, GpuHist) {
auto n = this->TestTreeMethod<EllpackPage>("gpu_hist", "reg:squarederror");
TEST_F(RegenTest, GpuApprox) {
auto ctx = MakeCUDACtx(0);
auto n = this->TestTreeMethod<EllpackPage>(&ctx, "approx", "reg:squarederror", true);
ASSERT_EQ(n, 1);
n = this->TestTreeMethod<EllpackPage>("gpu_hist", "reg:logistic", false);
n = this->TestTreeMethod<EllpackPage>(&ctx, "approx", "reg:logistic", false);
ASSERT_EQ(n, this->Iter());
n = this->TestTreeMethod<EllpackPage>(&ctx, "approx", "reg:logistic", true);
ASSERT_EQ(n, this->Iter() * 2);
}
TEST_F(RegenTest, GpuHist) {
auto ctx = MakeCUDACtx(0);
auto n = this->TestTreeMethod<EllpackPage>(&ctx, "hist", "reg:squarederror", true);
ASSERT_EQ(n, 1);
n = this->TestTreeMethod<EllpackPage>(&ctx, "hist", "reg:logistic", false);
ASSERT_EQ(n, 1);
n = this->TestTreeMethod<EllpackPage>("hist", "reg:logistic");
ASSERT_EQ(n, 2);
{
Context ctx;
n = this->TestTreeMethod<EllpackPage>(&ctx, "hist", "reg:logistic");
ASSERT_EQ(n, 2);
}
}
TEST_F(RegenTest, GpuMixed) {
auto ctx = MakeCUDACtx(0);
auto n = this->TestTreeMethod<EllpackPage>(&ctx, "hist", "reg:squarederror", false);
ASSERT_EQ(n, 1);
n = this->TestTreeMethod<EllpackPage>(&ctx, "approx", "reg:logistic", true);
ASSERT_EQ(n, this->Iter() + 1);
n = this->TestTreeMethod<EllpackPage>(&ctx, "approx", "reg:logistic", false);
ASSERT_EQ(n, this->Iter());
n = this->TestTreeMethod<EllpackPage>(&ctx, "hist", "reg:squarederror", true);
ASSERT_EQ(n, this->Iter() + 1);
}
#endif // defined(XGBOOST_USE_CUDA)
} // namespace xgboost

View File

@@ -20,10 +20,11 @@ class TestGrowPolicy : public ::testing::Test {
true);
}
std::unique_ptr<Learner> TrainOneIter(std::string tree_method, std::string policy,
int32_t max_leaves, int32_t max_depth) {
std::unique_ptr<Learner> TrainOneIter(Context const* ctx, std::string tree_method,
std::string policy, int32_t max_leaves, int32_t max_depth) {
std::unique_ptr<Learner> learner{Learner::Create({this->Xy_})};
learner->SetParam("tree_method", tree_method);
learner->SetParam("device", ctx->DeviceName());
if (max_leaves >= 0) {
learner->SetParam("max_leaves", std::to_string(max_leaves));
}
@@ -63,7 +64,7 @@ class TestGrowPolicy : public ::testing::Test {
if (max_leaves == 0 && max_depth == 0) {
// unconstrainted
if (tree_method != "gpu_hist") {
if (ctx->IsCPU()) {
// GPU pre-allocates for all nodes.
learner->UpdateOneIter(0, Xy_);
}
@@ -86,23 +87,23 @@ class TestGrowPolicy : public ::testing::Test {
return learner;
}
void TestCombination(std::string tree_method) {
void TestCombination(Context const* ctx, std::string tree_method) {
for (auto policy : {"depthwise", "lossguide"}) {
// -1 means default
for (auto leaves : {-1, 0, 3}) {
for (auto depth : {-1, 0, 3}) {
this->TrainOneIter(tree_method, policy, leaves, depth);
this->TrainOneIter(ctx, tree_method, policy, leaves, depth);
}
}
}
}
void TestTreeGrowPolicy(std::string tree_method, std::string policy) {
void TestTreeGrowPolicy(Context const* ctx, std::string tree_method, std::string policy) {
{
/**
* max_leaves
*/
auto learner = this->TrainOneIter(tree_method, policy, 16, -1);
auto learner = this->TrainOneIter(ctx, tree_method, policy, 16, -1);
Json model{Object{}};
learner->SaveModel(&model);
@@ -115,7 +116,7 @@ class TestGrowPolicy : public ::testing::Test {
/**
* max_depth
*/
auto learner = this->TrainOneIter(tree_method, policy, -1, 3);
auto learner = this->TrainOneIter(ctx, tree_method, policy, -1, 3);
Json model{Object{}};
learner->SaveModel(&model);
@@ -133,25 +134,36 @@ class TestGrowPolicy : public ::testing::Test {
};
TEST_F(TestGrowPolicy, Approx) {
this->TestTreeGrowPolicy("approx", "depthwise");
this->TestTreeGrowPolicy("approx", "lossguide");
Context ctx;
this->TestTreeGrowPolicy(&ctx, "approx", "depthwise");
this->TestTreeGrowPolicy(&ctx, "approx", "lossguide");
this->TestCombination("approx");
this->TestCombination(&ctx, "approx");
}
TEST_F(TestGrowPolicy, Hist) {
this->TestTreeGrowPolicy("hist", "depthwise");
this->TestTreeGrowPolicy("hist", "lossguide");
Context ctx;
this->TestTreeGrowPolicy(&ctx, "hist", "depthwise");
this->TestTreeGrowPolicy(&ctx, "hist", "lossguide");
this->TestCombination("hist");
this->TestCombination(&ctx, "hist");
}
#if defined(XGBOOST_USE_CUDA)
TEST_F(TestGrowPolicy, GpuHist) {
this->TestTreeGrowPolicy("gpu_hist", "depthwise");
this->TestTreeGrowPolicy("gpu_hist", "lossguide");
auto ctx = MakeCUDACtx(0);
this->TestTreeGrowPolicy(&ctx, "hist", "depthwise");
this->TestTreeGrowPolicy(&ctx, "hist", "lossguide");
this->TestCombination("gpu_hist");
this->TestCombination(&ctx, "hist");
}
TEST_F(TestGrowPolicy, GpuApprox) {
auto ctx = MakeCUDACtx(0);
this->TestTreeGrowPolicy(&ctx, "approx", "depthwise");
this->TestTreeGrowPolicy(&ctx, "approx", "lossguide");
this->TestCombination(&ctx, "approx");
}
#endif // defined(XGBOOST_USE_CUDA)
} // namespace xgboost

View File

@@ -135,7 +135,7 @@ class TestMinSplitLoss : public ::testing::Test {
gpair_ = GenerateRandomGradients(kRows);
}
std::int32_t Update(std::string updater, float gamma) {
std::int32_t Update(Context const* ctx, std::string updater, float gamma) {
Args args{{"max_depth", "1"},
{"max_leaves", "0"},
@@ -154,8 +154,7 @@ class TestMinSplitLoss : public ::testing::Test {
param.UpdateAllowUnknown(args);
ObjInfo task{ObjInfo::kRegression};
Context ctx{MakeCUDACtx(updater == "grow_gpu_hist" ? 0 : Context::kCpuId)};
auto up = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, &ctx, &task)};
auto up = std::unique_ptr<TreeUpdater>{TreeUpdater::Create(updater, ctx, &task)};
up->Configure({});
RegTree tree;
@@ -167,16 +166,16 @@ class TestMinSplitLoss : public ::testing::Test {
}
public:
void RunTest(std::string updater) {
void RunTest(Context const* ctx, std::string updater) {
{
int32_t n_nodes = Update(updater, 0.01);
int32_t n_nodes = Update(ctx, updater, 0.01);
// This is not strictly verified, meaning the numeber `2` is whatever GPU_Hist retured
// when writing this test, and only used for testing larger gamma (below) does prevent
// building tree.
ASSERT_EQ(n_nodes, 2);
}
{
int32_t n_nodes = Update(updater, 100.0);
int32_t n_nodes = Update(ctx, updater, 100.0);
// No new nodes with gamma == 100.
ASSERT_EQ(n_nodes, static_cast<decltype(n_nodes)>(0));
}
@@ -185,10 +184,25 @@ class TestMinSplitLoss : public ::testing::Test {
/* Exact tree method requires a pruner as an additional updater, so not tested here. */
TEST_F(TestMinSplitLoss, Approx) { this->RunTest("grow_histmaker"); }
TEST_F(TestMinSplitLoss, Approx) {
Context ctx;
this->RunTest(&ctx, "grow_histmaker");
}
TEST_F(TestMinSplitLoss, Hist) {
Context ctx;
this->RunTest(&ctx, "grow_quantile_histmaker");
}
TEST_F(TestMinSplitLoss, Hist) { this->RunTest("grow_quantile_histmaker"); }
#if defined(XGBOOST_USE_CUDA)
TEST_F(TestMinSplitLoss, GpuHist) { this->RunTest("grow_gpu_hist"); }
TEST_F(TestMinSplitLoss, GpuHist) {
auto ctx = MakeCUDACtx(0);
this->RunTest(&ctx, "grow_gpu_hist");
}
TEST_F(TestMinSplitLoss, GpuApprox) {
auto ctx = MakeCUDACtx(0);
this->RunTest(&ctx, "grow_gpu_approx");
}
#endif // defined(XGBOOST_USE_CUDA)
} // namespace xgboost