Revamp the rabit implementation. (#10112)

This PR replaces the original RABIT implementation with a new one, which has already been partially merged into XGBoost. The new one features:
- Federated learning for both CPU and GPU.
- NCCL.
- More data types.
- A unified interface for all the underlying implementations.
- Improved timeout handling for both tracker and workers.
- Exhausted tests with metrics (fixed a couple of bugs along the way).
- A reusable tracker for Python and JVM packages.
This commit is contained in:
Jiaming Yuan
2024-05-20 11:56:23 +08:00
committed by GitHub
parent ba9b4cb1ee
commit a5a58102e5
195 changed files with 2768 additions and 9234 deletions

View File

@@ -1,12 +1,12 @@
/**
* Copyright 2020-2023, XGBoost contributors
* Copyright 2020-2024, XGBoost contributors
*/
#include <gtest/gtest.h>
#include <thrust/host_vector.h>
#include "../../../../src/tree/gpu_hist/evaluate_splits.cuh"
#include "../../collective/test_worker.h" // for BaseMGPUTest
#include "../../helpers.h"
#include "../../histogram_helpers.h"
#include "../test_evaluate_splits.h" // TestPartitionBasedSplit
namespace xgboost::tree {
@@ -17,13 +17,13 @@ auto ZeroParam() {
tparam.UpdateAllowUnknown(args);
return tparam;
}
} // anonymous namespace
inline GradientQuantiser DummyRoundingFactor(Context const* ctx) {
GradientQuantiser DummyRoundingFactor(Context const* ctx) {
thrust::device_vector<GradientPair> gpair(1);
gpair[0] = {1000.f, 1000.f}; // Tests should not exceed sum of 1000
return {ctx, dh::ToSpan(gpair), MetaInfo()};
}
} // anonymous namespace
thrust::device_vector<GradientPairInt64> ConvertToInteger(Context const* ctx,
std::vector<GradientPairPrecise> x) {
@@ -546,7 +546,7 @@ TEST_F(TestPartitionBasedSplit, GpuHist) {
ASSERT_NEAR(split.loss_chg, best_score_, 1e-2);
}
class MGPUHistTest : public BaseMGPUTest {};
class MGPUHistTest : public collective::BaseMGPUTest {};
namespace {
void VerifyColumnSplitEvaluateSingleSplit(bool is_categorical) {
@@ -589,21 +589,29 @@ void VerifyColumnSplitEvaluateSingleSplit(bool is_categorical) {
evaluator.Reset(cuts, dh::ToSpan(feature_types), feature_set.size(), tparam, true, ctx.Device());
DeviceSplitCandidate result = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs).split;
EXPECT_EQ(result.findex, 1) << "rank: " << rank;
EXPECT_EQ(result.findex, 1);
if (is_categorical) {
ASSERT_TRUE(std::isnan(result.fvalue));
} else {
EXPECT_EQ(result.fvalue, 11.0) << "rank: " << rank;
EXPECT_EQ(result.fvalue, 11.0);
}
EXPECT_EQ(result.left_sum + result.right_sum, parent_sum) << "rank: " << rank;
EXPECT_EQ(result.left_sum + result.right_sum, parent_sum);
}
} // anonymous namespace
TEST_F(MGPUHistTest, ColumnSplitEvaluateSingleSplit) {
DoTest(VerifyColumnSplitEvaluateSingleSplit, false);
if (common::AllVisibleGPUs() > 1) {
// We can't emulate multiple GPUs with NCCL.
this->DoTest([] { VerifyColumnSplitEvaluateSingleSplit(false); }, false, true);
}
this->DoTest([] { VerifyColumnSplitEvaluateSingleSplit(false); }, true, true);
}
TEST_F(MGPUHistTest, ColumnSplitEvaluateSingleCategoricalSplit) {
DoTest(VerifyColumnSplitEvaluateSingleSplit, true);
if (common::AllVisibleGPUs() > 1) {
// We can't emulate multiple GPUs with NCCL.
this->DoTest([] { VerifyColumnSplitEvaluateSingleSplit(true); }, false, true);
}
this->DoTest([] { VerifyColumnSplitEvaluateSingleSplit(true); }, true, true);
}
} // namespace xgboost::tree

View File

@@ -33,6 +33,7 @@
#include "../../../../src/tree/hist/histogram.h" // for HistogramBuilder
#include "../../../../src/tree/hist/param.h" // for HistMakerTrainParam
#include "../../categorical_helpers.h" // for OneHotEncodeFeature
#include "../../collective/test_worker.h" // for TestDistributedGlobal
#include "../../helpers.h" // for RandomDataGenerator, GenerateRa...
namespace xgboost::tree {
@@ -300,8 +301,8 @@ TEST(CPUHistogram, BuildHist) {
TEST(CPUHistogram, BuildHistColSplit) {
auto constexpr kWorkers = 4;
RunWithInMemoryCommunicator(kWorkers, TestBuildHistogram, true, true, true);
RunWithInMemoryCommunicator(kWorkers, TestBuildHistogram, true, false, true);
collective::TestDistributedGlobal(kWorkers, [] { TestBuildHistogram(true, true, true); });
collective::TestDistributedGlobal(kWorkers, [] { TestBuildHistogram(true, false, true); });
}
namespace {

View File

@@ -1,15 +1,15 @@
/**
* Copyright 2021-2023 by XGBoost contributors.
* Copyright 2021-2024, XGBoost contributors.
*/
#include <gtest/gtest.h>
#include "../../../src/common/numeric.h"
#include "../../../src/tree/common_row_partitioner.h"
#include "../collective/test_worker.h" // for TestDistributedGlobal
#include "../helpers.h"
#include "test_partitioner.h"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
namespace {
std::vector<float> GenerateHess(size_t n_samples) {
auto grad = GenerateRandomGradients(n_samples);
@@ -145,8 +145,9 @@ TEST(Approx, PartitionerColSplit) {
}
auto constexpr kWorkers = 4;
RunWithInMemoryCommunicator(kWorkers, TestColumnSplitPartitioner, n_samples, base_rowid, Xy,
&hess, min_value, mid_value, mid_partitioner);
collective::TestDistributedGlobal(kWorkers, [&] {
TestColumnSplitPartitioner(n_samples, base_rowid, Xy, &hess, min_value, mid_value,
mid_partitioner);
});
}
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree

View File

@@ -1,5 +1,5 @@
/**
* Copyright 2022-2023 by XGBoost Contributors
* Copyright 2022-2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/base.h> // for GradientPairInternal, GradientPairPrecise
@@ -14,7 +14,6 @@
#include <limits> // for numeric_limits
#include <numeric> // for iota
#include <tuple> // for make_tuple, tie, tuple
#include <utility> // for pair
#include <vector> // for vector
#include "../../../src/common/hist_util.h" // for HistogramCuts, HistCollection, GHistRow
@@ -23,7 +22,6 @@
#include "../../../src/tree/param.h" // for TrainParam, GradStats
#include "../../../src/tree/split_evaluator.h" // for TreeEvaluator
#include "../helpers.h" // for SimpleLCG, SimpleRealUniformDistribution
#include "gtest/gtest_pred_impl.h" // for AssertionResult, ASSERT_EQ, ASSERT_TRUE
namespace xgboost::tree {
/**
@@ -96,13 +94,11 @@ class TestPartitionBasedSplit : public ::testing::Test {
// enumerate all possible partitions to find the optimal split
do {
int32_t thresh;
float score;
std::vector<GradientPairPrecise> sorted_hist(node_hist.size());
for (size_t i = 0; i < sorted_hist.size(); ++i) {
sorted_hist[i] = node_hist[sorted_idx_[i]];
}
std::tie(thresh, score) = enumerate({sorted_hist}, total_gpair_);
auto [thresh, score] = enumerate({sorted_hist}, total_gpair_);
if (score > best_score_) {
best_score_ = score;
}

View File

@@ -1,11 +1,12 @@
/**
* Copyright 2022-2023, XGBoost Contributors
* Copyright 2022-2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/linalg.h>
#include "../../src/common/linalg_op.h"
#include "../../src/tree/fit_stump.h"
#include "../collective/test_worker.h" // for TestDistributedGlobal
#include "../helpers.h"
namespace xgboost::tree {
@@ -43,7 +44,7 @@ TEST(InitEstimation, FitStump) {
#if defined(XGBOOST_USE_CUDA)
TEST(InitEstimation, GPUFitStump) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
ctx.UpdateAllowUnknown(Args{{"device", "cuda"}});
TestFitStump(&ctx);
}
#endif // defined(XGBOOST_USE_CUDA)
@@ -51,6 +52,6 @@ TEST(InitEstimation, GPUFitStump) {
TEST(InitEstimation, FitStumpColumnSplit) {
Context ctx;
auto constexpr kWorldSize{3};
RunWithInMemoryCommunicator(kWorldSize, &TestFitStump, &ctx, DataSplitMode::kCol);
collective::TestDistributedGlobal(kWorldSize, [&] { TestFitStump(&ctx, DataSplitMode::kCol); });
}
} // namespace xgboost::tree

View File

@@ -13,14 +13,19 @@
#include "../../../src/common/common.h"
#include "../../../src/data/ellpack_page.cuh" // for EllpackPageImpl
#include "../../../src/data/ellpack_page.h" // for EllpackPage
#include "../../../src/tree/param.h" // for TrainParam
#include "../../../src/tree/param.h" // for TrainParam
#include "../../../src/tree/updater_gpu_hist.cu"
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../collective/test_worker.h" // for BaseMGPUTest
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
#include "../histogram_helpers.h"
#include "xgboost/context.h"
#include "xgboost/json.h"
#if defined(XGBOOST_USE_FEDERATED)
#include "../plugin/federated/test_worker.h" // for TestFederatedGlobal
#endif // defined(XGBOOST_USE_FEDERATED)
namespace xgboost::tree {
TEST(GpuHist, DeviceHistogram) {
// Ensures that node allocates correctly after reaching `kStopGrowingSize`.
@@ -458,9 +463,9 @@ void VerifyHistColumnSplit(bst_idx_t rows, bst_feature_t cols, RegTree const& ex
}
} // anonymous namespace
class MGPUHistTest : public BaseMGPUTest {};
class MGPUHistTest : public collective::BaseMGPUTest {};
TEST_F(MGPUHistTest, GPUHistColumnSplit) {
TEST_F(MGPUHistTest, HistColumnSplit) {
auto constexpr kRows = 32;
auto constexpr kCols = 16;
@@ -468,7 +473,8 @@ TEST_F(MGPUHistTest, GPUHistColumnSplit) {
auto dmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);
RegTree expected_tree = GetHistTree(&ctx, dmat.get());
DoTest(VerifyHistColumnSplit, kRows, kCols, expected_tree);
this->DoTest([&] { VerifyHistColumnSplit(kRows, kCols, expected_tree); }, true);
this->DoTest([&] { VerifyHistColumnSplit(kRows, kCols, expected_tree); }, false);
}
namespace {
@@ -508,7 +514,7 @@ void VerifyApproxColumnSplit(bst_idx_t rows, bst_feature_t cols, RegTree const&
}
} // anonymous namespace
class MGPUApproxTest : public BaseMGPUTest {};
class MGPUApproxTest : public collective::BaseMGPUTest {};
TEST_F(MGPUApproxTest, GPUApproxColumnSplit) {
auto constexpr kRows = 32;
@@ -518,6 +524,7 @@ TEST_F(MGPUApproxTest, GPUApproxColumnSplit) {
auto dmat = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);
RegTree expected_tree = GetApproxTree(&ctx, dmat.get());
DoTest(VerifyApproxColumnSplit, kRows, kCols, expected_tree);
this->DoTest([&] { VerifyApproxColumnSplit(kRows, kCols, expected_tree); }, true);
this->DoTest([&] { VerifyApproxColumnSplit(kRows, kCols, expected_tree); }, false);
}
} // namespace xgboost::tree

View File

@@ -5,7 +5,8 @@
#include <xgboost/tree_model.h>
#include <xgboost/tree_updater.h>
#include "../../../src/tree/param.h" // for TrainParam
#include "../../../src/tree/param.h" // for TrainParam
#include "../collective/test_worker.h" // for TestDistributedGlobal
#include "../helpers.h"
namespace xgboost::tree {
@@ -118,8 +119,8 @@ void TestColumnSplit(bool categorical) {
}
auto constexpr kWorldSize = 2;
RunWithInMemoryCommunicator(kWorldSize, VerifyColumnSplit, kRows, kCols, categorical,
std::cref(expected_tree));
collective::TestDistributedGlobal(
kWorldSize, [&] { VerifyColumnSplit(kRows, kCols, categorical, expected_tree); });
}
} // anonymous namespace

View File

@@ -11,26 +11,26 @@ namespace {
auto MakeTreeForTest() {
bst_target_t n_targets{3};
bst_feature_t n_features{4};
RegTree tree{n_targets, n_features};
CHECK(tree.IsMultiTarget());
std::unique_ptr<RegTree> tree{std::make_unique<RegTree>(n_targets, n_features)};
CHECK(tree->IsMultiTarget());
linalg::Vector<float> base_weight{{1.0f, 2.0f, 3.0f}, {3ul}, DeviceOrd::CPU()};
linalg::Vector<float> left_weight{{2.0f, 3.0f, 4.0f}, {3ul}, DeviceOrd::CPU()};
linalg::Vector<float> right_weight{{3.0f, 4.0f, 5.0f}, {3ul}, DeviceOrd::CPU()};
tree.ExpandNode(RegTree::kRoot, /*split_idx=*/1, 0.5f, true, base_weight.HostView(),
left_weight.HostView(), right_weight.HostView());
tree->ExpandNode(RegTree::kRoot, /*split_idx=*/1, 0.5f, true, base_weight.HostView(),
left_weight.HostView(), right_weight.HostView());
return tree;
}
} // namespace
TEST(MultiTargetTree, JsonIO) {
auto tree = MakeTreeForTest();
ASSERT_EQ(tree.NumNodes(), 3);
ASSERT_EQ(tree.NumTargets(), 3);
ASSERT_EQ(tree.GetMultiTargetTree()->Size(), 3);
ASSERT_EQ(tree.Size(), 3);
ASSERT_EQ(tree->NumNodes(), 3);
ASSERT_EQ(tree->NumTargets(), 3);
ASSERT_EQ(tree->GetMultiTargetTree()->Size(), 3);
ASSERT_EQ(tree->Size(), 3);
Json jtree{Object{}};
tree.SaveModel(&jtree);
tree->SaveModel(&jtree);
auto check_jtree = [](Json jtree, RegTree const& tree) {
ASSERT_EQ(get<String const>(jtree["tree_param"]["num_nodes"]), std::to_string(tree.NumNodes()));
@@ -40,7 +40,7 @@ TEST(MultiTargetTree, JsonIO) {
ASSERT_EQ(get<I32Array const>(jtree["left_children"]).size(), tree.NumNodes());
ASSERT_EQ(get<I32Array const>(jtree["right_children"]).size(), tree.NumNodes());
};
check_jtree(jtree, tree);
check_jtree(jtree, *tree);
RegTree loaded;
loaded.LoadModel(jtree);
@@ -49,18 +49,18 @@ TEST(MultiTargetTree, JsonIO) {
Json jtree1{Object{}};
loaded.SaveModel(&jtree1);
check_jtree(jtree1, tree);
check_jtree(jtree1, *tree);
}
TEST(MultiTargetTree, DumpDot) {
auto tree = MakeTreeForTest();
auto n_features = tree.NumFeatures();
auto n_features = tree->NumFeatures();
FeatureMap fmap;
for (bst_feature_t f = 0; f < n_features; ++f) {
auto name = "feat_" + std::to_string(f);
fmap.PushBack(f, name.c_str(), "q");
}
auto str = tree.DumpModel(fmap, true, "dot");
auto str = tree->DumpModel(fmap, true, "dot");
ASSERT_NE(str.find("leaf=[2, 3, 4]"), std::string::npos);
ASSERT_NE(str.find("leaf=[3, 4, 5]"), std::string::npos);

View File

@@ -13,6 +13,7 @@
#include "../../../src/tree/common_row_partitioner.h"
#include "../../../src/tree/hist/expand_entry.h" // for MultiExpandEntry, CPUExpandEntry
#include "../../../src/tree/param.h"
#include "../collective/test_worker.h" // for TestDistributedGlobal
#include "../helpers.h"
#include "test_partitioner.h"
#include "xgboost/data.h"
@@ -190,9 +191,10 @@ void TestColumnSplitPartitioner(bst_target_t n_targets) {
}
auto constexpr kWorkers = 4;
RunWithInMemoryCommunicator(kWorkers, VerifyColumnSplitPartitioner<ExpandEntry>, n_targets,
n_samples, n_features, base_rowid, Xy, min_value, mid_value,
mid_partitioner);
collective::TestDistributedGlobal(kWorkers, [&] {
VerifyColumnSplitPartitioner<ExpandEntry>(n_targets, n_samples, n_features, base_rowid, Xy,
min_value, mid_value, mid_partitioner);
});
}
} // anonymous namespace
@@ -245,8 +247,9 @@ void TestColumnSplit(bst_target_t n_targets) {
}
auto constexpr kWorldSize = 2;
RunWithInMemoryCommunicator(kWorldSize, VerifyColumnSplit, &ctx, kRows, kCols, n_targets,
std::cref(expected_tree));
collective::TestDistributedGlobal(kWorldSize, [&] {
VerifyColumnSplit(&ctx, kRows, kCols, n_targets, std::cref(expected_tree));
});
}
} // anonymous namespace