More tests for column split and vertical federated learning (#8985)
Added some more tests for the learner and fit_stump, for both column-wise distributed learning and vertical federated learning. Also moved the `IsRowSplit` and `IsColumnSplit` methods from the `DMatrix` to the `MetaInfo` since in some places we only have access to the `MetaInfo`. Added a new convenience method `IsVerticalFederatedLearning`. Some refactoring of the testing fixtures.
This commit is contained in:
@@ -191,15 +191,9 @@ double GetMultiMetricEval(xgboost::Metric* metric,
|
||||
}
|
||||
|
||||
namespace xgboost {
|
||||
bool IsNear(std::vector<xgboost::bst_float>::const_iterator _beg1,
|
||||
std::vector<xgboost::bst_float>::const_iterator _end1,
|
||||
std::vector<xgboost::bst_float>::const_iterator _beg2) {
|
||||
for (auto iter1 = _beg1, iter2 = _beg2; iter1 != _end1; ++iter1, ++iter2) {
|
||||
if (std::abs(*iter1 - *iter2) > xgboost::kRtEps){
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
||||
float GetBaseScore(Json const &config) {
|
||||
return std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
|
||||
}
|
||||
|
||||
SimpleLCG::StateType SimpleLCG::operator()() {
|
||||
|
||||
@@ -101,9 +101,8 @@ double GetMultiMetricEval(xgboost::Metric* metric,
|
||||
std::vector<xgboost::bst_uint> groups = {});
|
||||
|
||||
namespace xgboost {
|
||||
bool IsNear(std::vector<xgboost::bst_float>::const_iterator _beg1,
|
||||
std::vector<xgboost::bst_float>::const_iterator _end1,
|
||||
std::vector<xgboost::bst_float>::const_iterator _beg2);
|
||||
|
||||
float GetBaseScore(Json const &config);
|
||||
|
||||
/*!
|
||||
* \brief Linear congruential generator.
|
||||
|
||||
@@ -52,18 +52,33 @@ class BaseFederatedTest : public ::testing::Test {
|
||||
server_thread_->join();
|
||||
}
|
||||
|
||||
void InitCommunicator(int rank) {
|
||||
Json config{JsonObject()};
|
||||
config["xgboost_communicator"] = String("federated");
|
||||
config["federated_server_address"] = String(server_address_);
|
||||
config["federated_world_size"] = kWorldSize;
|
||||
config["federated_rank"] = rank;
|
||||
xgboost::collective::Init(config);
|
||||
}
|
||||
|
||||
static int const kWorldSize{3};
|
||||
std::string server_address_;
|
||||
std::unique_ptr<std::thread> server_thread_;
|
||||
std::unique_ptr<grpc::Server> server_;
|
||||
};
|
||||
|
||||
template <typename Function, typename... Args>
|
||||
void RunWithFederatedCommunicator(int32_t world_size, std::string const& server_address,
|
||||
Function&& function, Args&&... args) {
|
||||
std::vector<std::thread> threads;
|
||||
for (auto rank = 0; rank < world_size; rank++) {
|
||||
threads.emplace_back([&, rank]() {
|
||||
Json config{JsonObject()};
|
||||
config["xgboost_communicator"] = String("federated");
|
||||
config["federated_server_address"] = String(server_address);
|
||||
config["federated_world_size"] = world_size;
|
||||
config["federated_rank"] = rank;
|
||||
xgboost::collective::Init(config);
|
||||
|
||||
std::forward<Function>(function)(std::forward<Args>(args)...);
|
||||
|
||||
xgboost::collective::Finalize();
|
||||
});
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -1,12 +1,9 @@
|
||||
/*!
|
||||
* Copyright 2023 XGBoost contributors
|
||||
*/
|
||||
#include <dmlc/parameter.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
|
||||
#include "../../../plugin/federated/federated_server.h"
|
||||
@@ -17,49 +14,40 @@
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
class FederatedDataTest : public BaseFederatedTest {
|
||||
public:
|
||||
void VerifyLoadUri(int rank) {
|
||||
InitCommunicator(rank);
|
||||
class FederatedDataTest : public BaseFederatedTest {};
|
||||
|
||||
size_t constexpr kRows{16};
|
||||
size_t const kCols = 8 + rank;
|
||||
void VerifyLoadUri() {
|
||||
auto const rank = collective::GetRank();
|
||||
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::string path = tmpdir.path + "/small" + std::to_string(rank) + ".csv";
|
||||
CreateTestCSV(path, kRows, kCols);
|
||||
size_t constexpr kRows{16};
|
||||
size_t const kCols = 8 + rank;
|
||||
|
||||
std::unique_ptr<DMatrix> dmat;
|
||||
std::string uri = path + "?format=csv";
|
||||
dmat.reset(DMatrix::Load(uri, false, DataSplitMode::kCol));
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::string path = tmpdir.path + "/small" + std::to_string(rank) + ".csv";
|
||||
CreateTestCSV(path, kRows, kCols);
|
||||
|
||||
ASSERT_EQ(dmat->Info().num_col_, 8 * kWorldSize + 3);
|
||||
ASSERT_EQ(dmat->Info().num_row_, kRows);
|
||||
std::unique_ptr<DMatrix> dmat;
|
||||
std::string uri = path + "?format=csv";
|
||||
dmat.reset(DMatrix::Load(uri, false, DataSplitMode::kCol));
|
||||
|
||||
for (auto const& page : dmat->GetBatches<SparsePage>()) {
|
||||
auto entries = page.GetView().data;
|
||||
auto index = 0;
|
||||
int offsets[] = {0, 8, 17};
|
||||
int offset = offsets[rank];
|
||||
for (auto row = 0; row < kRows; row++) {
|
||||
for (auto col = 0; col < kCols; col++) {
|
||||
EXPECT_EQ(entries[index].index, col + offset);
|
||||
index++;
|
||||
}
|
||||
ASSERT_EQ(dmat->Info().num_col_, 8 * collective::GetWorldSize() + 3);
|
||||
ASSERT_EQ(dmat->Info().num_row_, kRows);
|
||||
|
||||
for (auto const& page : dmat->GetBatches<SparsePage>()) {
|
||||
auto entries = page.GetView().data;
|
||||
auto index = 0;
|
||||
int offsets[] = {0, 8, 17};
|
||||
int offset = offsets[rank];
|
||||
for (auto row = 0; row < kRows; row++) {
|
||||
for (auto col = 0; col < kCols; col++) {
|
||||
EXPECT_EQ(entries[index].index, col + offset);
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
xgboost::collective::Finalize();
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(FederatedDataTest, LoadUri) {
|
||||
std::vector<std::thread> threads;
|
||||
for (auto rank = 0; rank < kWorldSize; rank++) {
|
||||
threads.emplace_back(&FederatedDataTest_LoadUri_Test::VerifyLoadUri, this, rank);
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(FederatedDataTest, LoadUri) {
|
||||
RunWithFederatedCommunicator(kWorldSize, server_address_, &VerifyLoadUri);
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
78
tests/cpp/plugin/test_federated_learner.cc
Normal file
78
tests/cpp/plugin/test_federated_learner.cc
Normal file
@@ -0,0 +1,78 @@
|
||||
/*!
|
||||
* Copyright 2023 XGBoost contributors
|
||||
*/
|
||||
#include <dmlc/parameter.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/data.h>
|
||||
#include <xgboost/objective.h>
|
||||
|
||||
#include "../../../plugin/federated/federated_server.h"
|
||||
#include "../../../src/collective/communicator-inl.h"
|
||||
#include "../helpers.h"
|
||||
#include "helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
class FederatedLearnerTest : public BaseFederatedTest {
|
||||
protected:
|
||||
static auto constexpr kRows{16};
|
||||
static auto constexpr kCols{16};
|
||||
};
|
||||
|
||||
void VerifyBaseScore(size_t rows, size_t cols, float expected_base_score) {
|
||||
auto const world_size = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
std::shared_ptr<DMatrix> Xy_{RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(rank == 0)};
|
||||
std::shared_ptr<DMatrix> sliced{Xy_->SliceCol(world_size, rank)};
|
||||
std::unique_ptr<Learner> learner{Learner::Create({sliced})};
|
||||
learner->SetParam("tree_method", "approx");
|
||||
learner->SetParam("objective", "binary:logistic");
|
||||
learner->UpdateOneIter(0, sliced);
|
||||
Json config{Object{}};
|
||||
learner->SaveConfig(&config);
|
||||
auto base_score = GetBaseScore(config);
|
||||
ASSERT_EQ(base_score, expected_base_score);
|
||||
}
|
||||
|
||||
void VerifyModel(size_t rows, size_t cols, Json const& expected_model) {
|
||||
auto const world_size = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
std::shared_ptr<DMatrix> Xy_{RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(rank == 0)};
|
||||
std::shared_ptr<DMatrix> sliced{Xy_->SliceCol(world_size, rank)};
|
||||
std::unique_ptr<Learner> learner{Learner::Create({sliced})};
|
||||
learner->SetParam("tree_method", "approx");
|
||||
learner->SetParam("objective", "binary:logistic");
|
||||
learner->UpdateOneIter(0, sliced);
|
||||
Json model{Object{}};
|
||||
learner->SaveModel(&model);
|
||||
ASSERT_EQ(model, expected_model);
|
||||
}
|
||||
|
||||
TEST_F(FederatedLearnerTest, BaseScore) {
|
||||
std::shared_ptr<DMatrix> Xy_{RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true)};
|
||||
std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
|
||||
learner->SetParam("tree_method", "approx");
|
||||
learner->SetParam("objective", "binary:logistic");
|
||||
learner->UpdateOneIter(0, Xy_);
|
||||
Json config{Object{}};
|
||||
learner->SaveConfig(&config);
|
||||
auto base_score = GetBaseScore(config);
|
||||
ASSERT_NE(base_score, ObjFunction::DefaultBaseScore());
|
||||
|
||||
RunWithFederatedCommunicator(kWorldSize, server_address_, &VerifyBaseScore, kRows, kCols,
|
||||
base_score);
|
||||
}
|
||||
|
||||
TEST_F(FederatedLearnerTest, Model) {
|
||||
std::shared_ptr<DMatrix> Xy_{RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true)};
|
||||
std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
|
||||
learner->SetParam("tree_method", "approx");
|
||||
learner->SetParam("objective", "binary:logistic");
|
||||
learner->UpdateOneIter(0, Xy_);
|
||||
Json model{Object{}};
|
||||
learner->SaveModel(&model);
|
||||
|
||||
RunWithFederatedCommunicator(kWorldSize, server_address_, &VerifyModel, kRows, kCols,
|
||||
std::cref(model));
|
||||
}
|
||||
} // namespace xgboost
|
||||
@@ -460,10 +460,6 @@ class InitBaseScore : public ::testing::Test {
|
||||
|
||||
void SetUp() override { Xy_ = RandomDataGenerator{10, Cols(), 0}.GenerateDMatrix(true); }
|
||||
|
||||
static float GetBaseScore(Json const &config) {
|
||||
return std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
|
||||
}
|
||||
|
||||
public:
|
||||
void TestUpdateConfig() {
|
||||
std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
|
||||
@@ -611,4 +607,32 @@ TEST_F(InitBaseScore, InitAfterLoad) { this->TestInitAfterLoad(); }
|
||||
TEST_F(InitBaseScore, InitWithPredict) { this->TestInitWithPredt(); }
|
||||
|
||||
TEST_F(InitBaseScore, UpdateProcess) { this->TestUpdateProcess(); }
|
||||
|
||||
void TestColumnSplitBaseScore(std::shared_ptr<DMatrix> Xy_, float expected_base_score) {
|
||||
auto const world_size = collective::GetWorldSize();
|
||||
auto const rank = collective::GetRank();
|
||||
std::shared_ptr<DMatrix> sliced{Xy_->SliceCol(world_size, rank)};
|
||||
std::unique_ptr<Learner> learner{Learner::Create({sliced})};
|
||||
learner->SetParam("tree_method", "approx");
|
||||
learner->SetParam("objective", "binary:logistic");
|
||||
learner->UpdateOneIter(0, sliced);
|
||||
Json config{Object{}};
|
||||
learner->SaveConfig(&config);
|
||||
auto base_score = GetBaseScore(config);
|
||||
ASSERT_EQ(base_score, expected_base_score);
|
||||
}
|
||||
|
||||
TEST_F(InitBaseScore, ColumnSplit) {
|
||||
std::unique_ptr<Learner> learner{Learner::Create({Xy_})};
|
||||
learner->SetParam("tree_method", "approx");
|
||||
learner->SetParam("objective", "binary:logistic");
|
||||
learner->UpdateOneIter(0, Xy_);
|
||||
Json config{Object{}};
|
||||
learner->SaveConfig(&config);
|
||||
auto base_score = GetBaseScore(config);
|
||||
ASSERT_NE(base_score, ObjFunction::DefaultBaseScore());
|
||||
|
||||
auto constexpr kWorldSize{3};
|
||||
RunWithInMemoryCommunicator(kWorldSize, &TestColumnSplitBaseScore, Xy_, base_score);
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -6,11 +6,12 @@
|
||||
|
||||
#include "../../src/common/linalg_op.h"
|
||||
#include "../../src/tree/fit_stump.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace {
|
||||
void TestFitStump(Context const *ctx) {
|
||||
void TestFitStump(Context const *ctx, DataSplitMode split = DataSplitMode::kRow) {
|
||||
std::size_t constexpr kRows = 16, kTargets = 2;
|
||||
HostDeviceVector<GradientPair> gpair;
|
||||
auto &h_gpair = gpair.HostVector();
|
||||
@@ -22,6 +23,7 @@ void TestFitStump(Context const *ctx) {
|
||||
}
|
||||
linalg::Vector<float> out;
|
||||
MetaInfo info;
|
||||
info.data_split_mode = split;
|
||||
FitStump(ctx, info, gpair, kTargets, &out);
|
||||
auto h_out = out.HostView();
|
||||
for (auto it = linalg::cbegin(h_out); it != linalg::cend(h_out); ++it) {
|
||||
@@ -45,5 +47,12 @@ TEST(InitEstimation, GPUFitStump) {
|
||||
TestFitStump(&ctx);
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
TEST(InitEstimation, FitStumpColumnSplit) {
|
||||
Context ctx;
|
||||
auto constexpr kWorldSize{3};
|
||||
RunWithInMemoryCommunicator(kWorldSize, &TestFitStump, &ctx, DataSplitMode::kCol);
|
||||
}
|
||||
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
|
||||
Reference in New Issue
Block a user