Revamp the rabit implementation. (#10112)
This PR replaces the original RABIT implementation with a new one, which has already been partially merged into XGBoost. The new one features: - Federated learning for both CPU and GPU. - NCCL. - More data types. - A unified interface for all the underlying implementations. - Improved timeout handling for both tracker and workers. - Exhausted tests with metrics (fixed a couple of bugs along the way). - A reusable tracker for Python and JVM packages.
This commit is contained in:
@@ -12,6 +12,7 @@
|
||||
#include "../../../src/data/proxy_dmatrix.h"
|
||||
#include "../../../src/gbm/gbtree.h"
|
||||
#include "../../../src/gbm/gbtree_model.h"
|
||||
#include "../collective/test_worker.h" // for TestDistributedGlobal
|
||||
#include "../filesystem.h" // dmlc::TemporaryDirectory
|
||||
#include "../helpers.h"
|
||||
#include "test_predictor.h"
|
||||
@@ -43,7 +44,7 @@ void TestColumnSplit() {
|
||||
|
||||
TEST(CpuPredictor, BasicColumnSplit) {
|
||||
auto constexpr kWorldSize = 2;
|
||||
RunWithInMemoryCommunicator(kWorldSize, TestColumnSplit);
|
||||
collective::TestDistributedGlobal(kWorldSize, TestColumnSplit);
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, IterationRange) {
|
||||
@@ -157,7 +158,7 @@ TEST(CPUPredictor, CategoricalPrediction) {
|
||||
|
||||
TEST(CPUPredictor, CategoricalPredictionColumnSplit) {
|
||||
auto constexpr kWorldSize = 2;
|
||||
RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPrediction, false, true);
|
||||
collective::TestDistributedGlobal(kWorldSize, [] { TestCategoricalPrediction(false, true); });
|
||||
}
|
||||
|
||||
TEST(CPUPredictor, CategoricalPredictLeaf) {
|
||||
@@ -168,7 +169,7 @@ TEST(CPUPredictor, CategoricalPredictLeaf) {
|
||||
TEST(CPUPredictor, CategoricalPredictLeafColumnSplit) {
|
||||
auto constexpr kWorldSize = 2;
|
||||
Context ctx;
|
||||
RunWithInMemoryCommunicator(kWorldSize, TestCategoricalPredictLeaf, &ctx, true);
|
||||
collective::TestDistributedGlobal(kWorldSize, [&] { TestCategoricalPredictLeaf(&ctx, true); });
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, UpdatePredictionCache) {
|
||||
@@ -183,7 +184,8 @@ TEST(CpuPredictor, LesserFeatures) {
|
||||
|
||||
TEST(CpuPredictor, LesserFeaturesColumnSplit) {
|
||||
auto constexpr kWorldSize = 2;
|
||||
RunWithInMemoryCommunicator(kWorldSize, TestPredictionWithLesserFeaturesColumnSplit, false);
|
||||
collective::TestDistributedGlobal(kWorldSize,
|
||||
[] { TestPredictionWithLesserFeaturesColumnSplit(false); });
|
||||
}
|
||||
|
||||
TEST(CpuPredictor, Sparse) {
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include "../../../src/data/device_adapter.cuh"
|
||||
#include "../../../src/data/proxy_dmatrix.h"
|
||||
#include "../../../src/gbm/gbtree_model.h"
|
||||
#include "../collective/test_worker.h" // for TestDistributedGlobal, BaseMGPUTest
|
||||
#include "../helpers.h"
|
||||
#include "test_predictor.h"
|
||||
|
||||
@@ -85,7 +86,7 @@ void VerifyBasicColumnSplit(std::array<std::vector<float>, 32> const& expected_r
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
class MGPUPredictorTest : public BaseMGPUTest {};
|
||||
class MGPUPredictorTest : public collective::BaseMGPUTest {};
|
||||
|
||||
TEST_F(MGPUPredictorTest, BasicColumnSplit) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
@@ -111,7 +112,8 @@ TEST_F(MGPUPredictorTest, BasicColumnSplit) {
|
||||
result[i - 1] = out_predictions_h;
|
||||
}
|
||||
|
||||
DoTest(VerifyBasicColumnSplit, result);
|
||||
this->DoTest([&] { VerifyBasicColumnSplit(result); }, true);
|
||||
this->DoTest([&] { VerifyBasicColumnSplit(result); }, false);
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, EllpackBasic) {
|
||||
@@ -209,7 +211,8 @@ TEST(GpuPredictor, LesserFeatures) {
|
||||
}
|
||||
|
||||
TEST_F(MGPUPredictorTest, LesserFeaturesColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, TestPredictionWithLesserFeaturesColumnSplit, true);
|
||||
this->DoTest([] { TestPredictionWithLesserFeaturesColumnSplit(true); }, true);
|
||||
this->DoTest([] { TestPredictionWithLesserFeaturesColumnSplit(true); }, false);
|
||||
}
|
||||
|
||||
// Very basic test of empty model
|
||||
@@ -277,7 +280,7 @@ TEST(GPUPredictor, IterationRange) {
|
||||
}
|
||||
|
||||
TEST_F(MGPUPredictorTest, IterationRangeColumnSplit) {
|
||||
TestIterationRangeColumnSplit(world_size_, true);
|
||||
TestIterationRangeColumnSplit(common::AllVisibleGPUs(), true);
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, CategoricalPrediction) {
|
||||
@@ -285,7 +288,8 @@ TEST(GPUPredictor, CategoricalPrediction) {
|
||||
}
|
||||
|
||||
TEST_F(MGPUPredictorTest, CategoricalPredictionColumnSplit) {
|
||||
RunWithInMemoryCommunicator(world_size_, TestCategoricalPrediction, true, true);
|
||||
this->DoTest([] { TestCategoricalPrediction(true, true); }, true);
|
||||
this->DoTest([] { TestCategoricalPrediction(true, true); }, false);
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, CategoricalPredictLeaf) {
|
||||
@@ -294,8 +298,18 @@ TEST(GPUPredictor, CategoricalPredictLeaf) {
|
||||
}
|
||||
|
||||
TEST_F(MGPUPredictorTest, CategoricalPredictionLeafColumnSplit) {
|
||||
auto ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank());
|
||||
RunWithInMemoryCommunicator(world_size_, TestCategoricalPredictLeaf, &ctx, true);
|
||||
this->DoTest(
|
||||
[&] {
|
||||
auto ctx = MakeCUDACtx(collective::GetRank());
|
||||
TestCategoricalPredictLeaf(&ctx, true);
|
||||
},
|
||||
true);
|
||||
this->DoTest(
|
||||
[&] {
|
||||
auto ctx = MakeCUDACtx(collective::GetRank());
|
||||
TestCategoricalPredictLeaf(&ctx, true);
|
||||
},
|
||||
false);
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, PredictLeafBasic) {
|
||||
@@ -325,7 +339,7 @@ TEST(GPUPredictor, Sparse) {
|
||||
}
|
||||
|
||||
TEST_F(MGPUPredictorTest, SparseColumnSplit) {
|
||||
TestSparsePredictionColumnSplit(world_size_, true, 0.2);
|
||||
TestSparsePredictionColumnSplit(world_size_, true, 0.8);
|
||||
TestSparsePredictionColumnSplit(common::AllVisibleGPUs(), true, 0.2);
|
||||
TestSparsePredictionColumnSplit(common::AllVisibleGPUs(), true, 0.8);
|
||||
}
|
||||
} // namespace xgboost::predictor
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2020-2023 by XGBoost Contributors
|
||||
* Copyright 2020-2024, XGBoost Contributors
|
||||
*/
|
||||
#include "test_predictor.h"
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
#include <xgboost/predictor.h> // for PredictionCacheEntry, Predictor, Predic...
|
||||
#include <xgboost/string_view.h> // for StringView
|
||||
|
||||
#include <algorithm> // for max
|
||||
#include <limits> // for numeric_limits
|
||||
#include <memory> // for shared_ptr
|
||||
#include <unordered_map> // for unordered_map
|
||||
@@ -18,6 +17,7 @@
|
||||
#include "../../../src/common/bitfield.h" // for LBitField32
|
||||
#include "../../../src/data/iterative_dmatrix.h" // for IterativeDMatrix
|
||||
#include "../../../src/data/proxy_dmatrix.h" // for DMatrixProxy
|
||||
#include "../collective/test_worker.h" // for TestDistributedGlobal
|
||||
#include "../helpers.h" // for GetDMatrixFromData, RandomDataGenerator
|
||||
#include "xgboost/json.h" // for Json, Object, get, String
|
||||
#include "xgboost/linalg.h" // for MakeVec, Tensor, TensorView, Vector
|
||||
@@ -593,9 +593,23 @@ void TestIterationRangeColumnSplit(int world_size, bool use_gpu) {
|
||||
Json sliced_model{Object{}};
|
||||
sliced->SaveModel(&sliced_model);
|
||||
|
||||
RunWithInMemoryCommunicator(world_size, VerifyIterationRangeColumnSplit, use_gpu, ranged_model,
|
||||
sliced_model, kRows, kCols, kClasses, margin_ranged, margin_sliced,
|
||||
leaf_ranged, leaf_sliced);
|
||||
#if !defined(XGBOOST_USE_NCCL)
|
||||
if (use_gpu) {
|
||||
GTEST_SKIP_("Not compiled with NCCL");
|
||||
return;
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_NCCL)
|
||||
collective::TestDistributedGlobal(world_size, [&] {
|
||||
VerifyIterationRangeColumnSplit(use_gpu, ranged_model, sliced_model, kRows, kCols, kClasses,
|
||||
margin_ranged, margin_sliced, leaf_ranged, leaf_sliced);
|
||||
});
|
||||
|
||||
#if defined(XGBOOST_USE_FEDERATED)
|
||||
collective::TestFederatedGlobal(world_size, [&] {
|
||||
VerifyIterationRangeColumnSplit(use_gpu, ranged_model, sliced_model, kRows, kCols, kClasses,
|
||||
margin_ranged, margin_sliced, leaf_ranged, leaf_sliced);
|
||||
});
|
||||
#endif // defined(XGBOOST_USE_FEDERATED)
|
||||
}
|
||||
|
||||
void TestSparsePrediction(Context const *ctx, float sparsity) {
|
||||
@@ -701,8 +715,23 @@ void TestSparsePredictionColumnSplit(int world_size, bool use_gpu, float sparsit
|
||||
learner->SetParam("device", ctx.DeviceName());
|
||||
learner->Predict(Xy, false, &sparse_predt, 0, 0);
|
||||
|
||||
RunWithInMemoryCommunicator(world_size, VerifySparsePredictionColumnSplit, use_gpu, model,
|
||||
kRows, kCols, sparsity, sparse_predt.HostVector());
|
||||
#if !defined(XGBOOST_USE_NCCL)
|
||||
if (use_gpu) {
|
||||
GTEST_SKIP_("Not compiled with NCCL.");
|
||||
return;
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
collective::TestDistributedGlobal(world_size, [&] {
|
||||
VerifySparsePredictionColumnSplit(use_gpu, model, kRows, kCols, sparsity,
|
||||
sparse_predt.HostVector());
|
||||
});
|
||||
|
||||
#if defined(XGBOOST_USE_FEDERATED)
|
||||
collective::TestFederatedGlobal(world_size, [&] {
|
||||
VerifySparsePredictionColumnSplit(use_gpu, model, kRows, kCols, sparsity,
|
||||
sparse_predt.HostVector());
|
||||
});
|
||||
#endif // defined(XGBOOST_USE_FEDERATED)
|
||||
}
|
||||
|
||||
void TestVectorLeafPrediction(Context const *ctx) {
|
||||
|
||||
Reference in New Issue
Block a user