Fuse gpu_hist all-reduce calls where possible (#7867)

This commit is contained in:
Rory Mitchell
2022-05-17 13:27:50 +02:00
committed by GitHub
parent b41cf92dc2
commit 71d3b2e036
9 changed files with 234 additions and 185 deletions

View File

@@ -6,41 +6,58 @@ namespace xgboost {
namespace tree {
TEST(GpuHist, DriverDepthWise) {
Driver<GPUExpandEntry> driver(TrainParam::kDepthWise);
TrainParam p;
p.InitAllowUnknown(Args{});
p.grow_policy = TrainParam::kDepthWise;
Driver<GPUExpandEntry> driver(p, 2);
EXPECT_TRUE(driver.Pop().empty());
DeviceSplitCandidate split;
split.loss_chg = 1.0f;
GPUExpandEntry root(0, 0, split, .0f, .0f, .0f);
split.left_sum = {0.0f, 1.0f};
split.right_sum = {0.0f, 1.0f};
GPUExpandEntry root(0, 0, split, 2.0f, 1.0f, 1.0f);
driver.Push({root});
EXPECT_EQ(driver.Pop().front().nid, 0);
driver.Push({GPUExpandEntry{1, 1, split, .0f, .0f, .0f}});
driver.Push({GPUExpandEntry{2, 1, split, .0f, .0f, .0f}});
driver.Push({GPUExpandEntry{3, 2, split, .0f, .0f, .0f}});
// Should return entries from level 1
driver.Push({GPUExpandEntry{1, 1, split, 2.0f, 1.0f, 1.0f}});
driver.Push({GPUExpandEntry{2, 1, split, 2.0f, 1.0f, 1.0f}});
driver.Push({GPUExpandEntry{3, 1, split, 2.0f, 1.0f, 1.0f}});
driver.Push({GPUExpandEntry{4, 2, split, 2.0f, 1.0f, 1.0f}});
// Should return 2 entries from level 1
// as we limited the driver to pop maximum 2 nodes
auto res = driver.Pop();
EXPECT_EQ(res.size(), 2);
for (auto &e : res) {
EXPECT_EQ(e.depth, 1);
}
// Should now return 1 entry from level 1
res = driver.Pop();
EXPECT_EQ(res[0].depth, 2);
EXPECT_EQ(res.size(), 1);
EXPECT_EQ(res.at(0).depth, 1);
res = driver.Pop();
EXPECT_EQ(res.at(0).depth, 2);
EXPECT_TRUE(driver.Pop().empty());
}
TEST(GpuHist, DriverLossGuided) {
DeviceSplitCandidate high_gain;
high_gain.left_sum = {0.0f, 1.0f};
high_gain.right_sum = {0.0f, 1.0f};
high_gain.loss_chg = 5.0f;
DeviceSplitCandidate low_gain;
DeviceSplitCandidate low_gain = high_gain;
low_gain.loss_chg = 1.0f;
Driver<GPUExpandEntry> driver(TrainParam::kLossGuide);
TrainParam p;
p.grow_policy=TrainParam::kLossGuide;
Driver<GPUExpandEntry> driver(p);
EXPECT_TRUE(driver.Pop().empty());
GPUExpandEntry root(0, 0, high_gain, .0f, .0f, .0f);
GPUExpandEntry root(0, 0, high_gain, 2.0f, 1.0f, 1.0f );
driver.Push({root});
EXPECT_EQ(driver.Pop().front().nid, 0);
// Select high gain first
driver.Push({GPUExpandEntry{1, 1, low_gain, .0f, .0f, .0f}});
driver.Push({GPUExpandEntry{2, 2, high_gain, .0f, .0f, .0f}});
driver.Push({GPUExpandEntry{1, 1, low_gain, 2.0f, 1.0f, 1.0f}});
driver.Push({GPUExpandEntry{2, 2, high_gain, 2.0f, 1.0f, 1.0f}});
auto res = driver.Pop();
EXPECT_EQ(res.size(), 1);
EXPECT_EQ(res[0].nid, 2);
@@ -49,8 +66,8 @@ TEST(GpuHist, DriverLossGuided) {
EXPECT_EQ(res[0].nid, 1);
// If equal gain, use nid
driver.Push({GPUExpandEntry{2, 1, low_gain, .0f, .0f, .0f}});
driver.Push({GPUExpandEntry{1, 1, low_gain, .0f, .0f, .0f}});
driver.Push({GPUExpandEntry{2, 1, low_gain, 2.0f, 1.0f, 1.0f}});
driver.Push({GPUExpandEntry{1, 1, low_gain, 2.0f, 1.0f, 1.0f}});
res = driver.Pop();
EXPECT_EQ(res[0].nid, 1);
res = driver.Pop();

View File

@@ -95,7 +95,6 @@ TEST(Histogram, GPUDeterministic) {
std::vector<int> shm_sizes{48 * 1024, 64 * 1024, 160 * 1024};
for (bool is_dense : is_dense_array) {
for (int shm_size : shm_sizes) {
TestDeterministicHistogram<GradientPair>(is_dense, shm_size);
TestDeterministicHistogram<GradientPairPrecise>(is_dense, shm_size);
}
}