Reduce thread contention in column split tests. (#10658)
--------- Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
parent
778751a1bb
commit
77c844cef7
@ -372,7 +372,7 @@ RabitComm::~RabitComm() noexcept(false) {
|
|||||||
// Tell the error hanlding thread that we are shutting down.
|
// Tell the error hanlding thread that we are shutting down.
|
||||||
TCPSocket err_client;
|
TCPSocket err_client;
|
||||||
|
|
||||||
return Success() << [&] {
|
auto rc = Success() << [&] {
|
||||||
return ConnectTrackerImpl(tracker_, timeout_, retry_, task_id_, &tracker, Rank(), World());
|
return ConnectTrackerImpl(tracker_, timeout_, retry_, task_id_, &tracker, Rank(), World());
|
||||||
} << [&] {
|
} << [&] {
|
||||||
return this->Block();
|
return this->Block();
|
||||||
@ -403,6 +403,10 @@ RabitComm::~RabitComm() noexcept(false) {
|
|||||||
// the previous more important steps.
|
// the previous more important steps.
|
||||||
return proto::Error{}.SignalShutdown(&err_client);
|
return proto::Error{}.SignalShutdown(&err_client);
|
||||||
};
|
};
|
||||||
|
if (!rc.OK()) {
|
||||||
|
return Fail("Failed to shutdown.", std::move(rc));
|
||||||
|
}
|
||||||
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] Result RabitComm::LogTracker(std::string msg) const {
|
[[nodiscard]] Result RabitComm::LogTracker(std::string msg) const {
|
||||||
|
|||||||
@ -186,6 +186,12 @@ void TestDistributedGlobal(std::int32_t n_workers, WorkerFn worker_fn, bool need
|
|||||||
system::SocketFinalize();
|
system::SocketFinalize();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline std::int32_t GetWorkerLocalThreads(std::int32_t n_workers) {
|
||||||
|
std::int32_t n_total_threads = std::thread::hardware_concurrency();
|
||||||
|
auto n_threads = std::max(n_total_threads / n_workers, 1);
|
||||||
|
return n_threads;
|
||||||
|
}
|
||||||
|
|
||||||
class BaseMGPUTest : public ::testing::Test {
|
class BaseMGPUTest : public ::testing::Test {
|
||||||
public:
|
public:
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -1,12 +1,9 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2017-2023 by XGBoost contributors
|
* Copyright 2017-2024, XGBoost contributors
|
||||||
*/
|
*/
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
#include <xgboost/predictor.h>
|
#include <xgboost/predictor.h>
|
||||||
|
|
||||||
#include <cstdint>
|
|
||||||
#include <thread>
|
|
||||||
|
|
||||||
#include "../../../src/collective/communicator-inl.h"
|
#include "../../../src/collective/communicator-inl.h"
|
||||||
#include "../../../src/data/adapter.h"
|
#include "../../../src/data/adapter.h"
|
||||||
#include "../../../src/data/proxy_dmatrix.h"
|
#include "../../../src/data/proxy_dmatrix.h"
|
||||||
|
|||||||
@ -511,15 +511,20 @@ void VerifyIterationRangeColumnSplit(bool use_gpu, Json const &ranged_model,
|
|||||||
if (use_gpu) {
|
if (use_gpu) {
|
||||||
ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : rank);
|
ctx = MakeCUDACtx(common::AllVisibleGPUs() == 1 ? 0 : rank);
|
||||||
}
|
}
|
||||||
|
auto n_threads = collective::GetWorkerLocalThreads(world_size);
|
||||||
|
ctx.UpdateAllowUnknown(
|
||||||
|
Args{{"nthread", std::to_string(n_threads)}, {"device", ctx.DeviceName()}});
|
||||||
|
|
||||||
auto dmat = RandomDataGenerator(rows, cols, 0).GenerateDMatrix(true, true, classes);
|
auto dmat = RandomDataGenerator(rows, cols, 0).GenerateDMatrix(true, true, classes);
|
||||||
std::shared_ptr<DMatrix> Xy{dmat->SliceCol(world_size, rank)};
|
std::shared_ptr<DMatrix> Xy{dmat->SliceCol(world_size, rank)};
|
||||||
|
|
||||||
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
|
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
|
||||||
learner->SetParam("device", ctx.DeviceName());
|
auto args = Args{{"device", ctx.DeviceName()}, {"nthread", std::to_string(ctx.Threads())}};
|
||||||
|
learner->SetParams(args);
|
||||||
learner->LoadModel(ranged_model);
|
learner->LoadModel(ranged_model);
|
||||||
|
|
||||||
std::unique_ptr<Learner> sliced{Learner::Create({Xy})};
|
std::unique_ptr<Learner> sliced{Learner::Create({Xy})};
|
||||||
sliced->SetParam("device", ctx.DeviceName());
|
sliced->SetParams(args);
|
||||||
sliced->LoadModel(sliced_model);
|
sliced->LoadModel(sliced_model);
|
||||||
|
|
||||||
HostDeviceVector<float> out_predt_sliced;
|
HostDeviceVector<float> out_predt_sliced;
|
||||||
|
|||||||
@ -662,13 +662,15 @@ TEST_F(InitBaseScore, UpdateProcess) { this->TestUpdateProcess(); }
|
|||||||
class TestColumnSplit : public ::testing::TestWithParam<std::string> {
|
class TestColumnSplit : public ::testing::TestWithParam<std::string> {
|
||||||
void TestBaseScore(std::string objective, float expected_base_score, Json expected_model) {
|
void TestBaseScore(std::string objective, float expected_base_score, Json expected_model) {
|
||||||
auto const world_size = collective::GetWorldSize();
|
auto const world_size = collective::GetWorldSize();
|
||||||
|
auto n_threads = collective::GetWorkerLocalThreads(world_size);
|
||||||
auto const rank = collective::GetRank();
|
auto const rank = collective::GetRank();
|
||||||
|
|
||||||
auto p_fmat = MakeFmatForObjTest(objective, 10, 10);
|
auto p_fmat = MakeFmatForObjTest(objective, 10, 10);
|
||||||
std::shared_ptr<DMatrix> sliced{p_fmat->SliceCol(world_size, rank)};
|
std::shared_ptr<DMatrix> sliced{p_fmat->SliceCol(world_size, rank)};
|
||||||
std::unique_ptr<Learner> learner{Learner::Create({sliced})};
|
std::unique_ptr<Learner> learner{Learner::Create({sliced})};
|
||||||
learner->SetParam("tree_method", "approx");
|
learner->SetParams(Args{{"nthread", std::to_string(n_threads)},
|
||||||
learner->SetParam("objective", objective);
|
{"tree_method", "approx"},
|
||||||
|
{"objective", objective}});
|
||||||
if (objective.find("quantile") != std::string::npos) {
|
if (objective.find("quantile") != std::string::npos) {
|
||||||
learner->SetParam("quantile_alpha", "0.5");
|
learner->SetParam("quantile_alpha", "0.5");
|
||||||
}
|
}
|
||||||
@ -707,7 +709,9 @@ class TestColumnSplit : public ::testing::TestWithParam<std::string> {
|
|||||||
learner->SaveModel(&model);
|
learner->SaveModel(&model);
|
||||||
|
|
||||||
auto constexpr kWorldSize{3};
|
auto constexpr kWorldSize{3};
|
||||||
auto call = [this, &objective](auto&... args) { TestBaseScore(objective, args...); };
|
auto call = [this, &objective](auto&... args) {
|
||||||
|
this->TestBaseScore(objective, args...);
|
||||||
|
};
|
||||||
auto score = GetBaseScore(config);
|
auto score = GetBaseScore(config);
|
||||||
collective::TestDistributedGlobal(kWorldSize, [&] {
|
collective::TestDistributedGlobal(kWorldSize, [&] {
|
||||||
call(score, model);
|
call(score, model);
|
||||||
@ -730,8 +734,10 @@ namespace {
|
|||||||
Json GetModelWithArgs(std::shared_ptr<DMatrix> dmat, std::string const& tree_method,
|
Json GetModelWithArgs(std::shared_ptr<DMatrix> dmat, std::string const& tree_method,
|
||||||
std::string const& device, Args const& args) {
|
std::string const& device, Args const& args) {
|
||||||
std::unique_ptr<Learner> learner{Learner::Create({dmat})};
|
std::unique_ptr<Learner> learner{Learner::Create({dmat})};
|
||||||
|
auto n_threads = collective::GetWorkerLocalThreads(collective::GetWorldSize());
|
||||||
learner->SetParam("tree_method", tree_method);
|
learner->SetParam("tree_method", tree_method);
|
||||||
learner->SetParam("device", device);
|
learner->SetParam("device", device);
|
||||||
|
learner->SetParam("nthread", std::to_string(n_threads));
|
||||||
learner->SetParam("objective", "reg:logistic");
|
learner->SetParam("objective", "reg:logistic");
|
||||||
learner->SetParams(args);
|
learner->SetParams(args);
|
||||||
learner->UpdateOneIter(0, dmat);
|
learner->UpdateOneIter(0, dmat);
|
||||||
|
|||||||
@ -2,6 +2,11 @@
|
|||||||
* Copyright 2021-2024, XGBoost contributors.
|
* Copyright 2021-2024, XGBoost contributors.
|
||||||
*/
|
*/
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
#include <xgboost/tree_updater.h> // for TreeUpdater
|
||||||
|
|
||||||
|
#include <algorithm> // for transform
|
||||||
|
#include <memory> // for unique_ptr
|
||||||
|
#include <vector> // for vector
|
||||||
|
|
||||||
#include "../../../src/tree/common_row_partitioner.h"
|
#include "../../../src/tree/common_row_partitioner.h"
|
||||||
#include "../../../src/tree/param.h" // for TrainParam
|
#include "../../../src/tree/param.h" // for TrainParam
|
||||||
|
|||||||
67
tests/cpp/tree/test_column_split.cc
Normal file
67
tests/cpp/tree/test_column_split.cc
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2024, XGBoost Contributors
|
||||||
|
*/
|
||||||
|
#include "test_column_split.h"
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <xgboost/tree_model.h> // for RegTree
|
||||||
|
#include <xgboost/tree_updater.h> // for TreeUpdater
|
||||||
|
|
||||||
|
#include <thread> // for hardware_concurrency
|
||||||
|
#include <vector> // for vector
|
||||||
|
|
||||||
|
#include "../../../src/tree/param.h" // for TrainParam
|
||||||
|
#include "../collective/test_worker.h" // for TestDistributedGlobal
|
||||||
|
|
||||||
|
namespace xgboost::tree {
|
||||||
|
void TestColumnSplit(bst_target_t n_targets, bool categorical, std::string name, float sparsity) {
|
||||||
|
auto constexpr kRows = 32;
|
||||||
|
auto constexpr kCols = 16;
|
||||||
|
|
||||||
|
RegTree expected_tree{n_targets, static_cast<bst_feature_t>(kCols)};
|
||||||
|
ObjInfo task{ObjInfo::kRegression};
|
||||||
|
Context ctx;
|
||||||
|
{
|
||||||
|
auto p_dmat = GenerateCatDMatrix(kRows, kCols, sparsity, categorical);
|
||||||
|
auto gpair = GenerateRandomGradients(&ctx, kRows, n_targets);
|
||||||
|
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(name, &ctx, &task)};
|
||||||
|
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||||
|
TrainParam param;
|
||||||
|
param.Init(Args{});
|
||||||
|
updater->Configure(Args{});
|
||||||
|
updater->Update(¶m, &gpair, p_dmat.get(), position, {&expected_tree});
|
||||||
|
}
|
||||||
|
|
||||||
|
auto constexpr kWorldSize = 2;
|
||||||
|
|
||||||
|
auto verify = [&] {
|
||||||
|
Context ctx;
|
||||||
|
ctx.UpdateAllowUnknown(
|
||||||
|
Args{{"nthread", std::to_string(collective::GetWorkerLocalThreads(kWorldSize))}});
|
||||||
|
|
||||||
|
auto p_dmat = GenerateCatDMatrix(kRows, kCols, sparsity, categorical);
|
||||||
|
auto gpair = GenerateRandomGradients(&ctx, kRows, n_targets);
|
||||||
|
|
||||||
|
ObjInfo task{ObjInfo::kRegression};
|
||||||
|
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(name, &ctx, &task)};
|
||||||
|
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
||||||
|
|
||||||
|
std::unique_ptr<DMatrix> sliced{
|
||||||
|
p_dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
|
||||||
|
|
||||||
|
RegTree tree{n_targets, static_cast<bst_feature_t>(kCols)};
|
||||||
|
TrainParam param;
|
||||||
|
param.Init(Args{});
|
||||||
|
updater->Configure(Args{});
|
||||||
|
updater->Update(¶m, &gpair, sliced.get(), position, {&tree});
|
||||||
|
|
||||||
|
Json json{Object{}};
|
||||||
|
tree.SaveModel(&json);
|
||||||
|
Json expected_json{Object{}};
|
||||||
|
expected_tree.SaveModel(&expected_json);
|
||||||
|
ASSERT_EQ(json, expected_json);
|
||||||
|
};
|
||||||
|
|
||||||
|
collective::TestDistributedGlobal(kWorldSize, [&] { verify(); });
|
||||||
|
}
|
||||||
|
} // namespace xgboost::tree
|
||||||
@ -4,15 +4,11 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <xgboost/data.h> // for FeatureType, DMatrix
|
#include <xgboost/data.h> // for FeatureType, DMatrix
|
||||||
#include <xgboost/tree_model.h> // for RegTree
|
|
||||||
#include <xgboost/tree_updater.h> // for TreeUpdater
|
|
||||||
|
|
||||||
#include <cstddef> // for size_t
|
#include <cstddef> // for size_t
|
||||||
#include <memory> // for shared_ptr
|
#include <memory> // for shared_ptr
|
||||||
#include <vector> // for vector
|
#include <vector> // for vector
|
||||||
|
|
||||||
#include "../../../src/tree/param.h" // for TrainParam
|
|
||||||
#include "../collective/test_worker.h" // for TestDistributedGlobal
|
|
||||||
#include "../helpers.h" // for RandomDataGenerator
|
#include "../helpers.h" // for RandomDataGenerator
|
||||||
|
|
||||||
namespace xgboost::tree {
|
namespace xgboost::tree {
|
||||||
@ -33,51 +29,5 @@ inline std::shared_ptr<DMatrix> GenerateCatDMatrix(std::size_t rows, std::size_t
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void TestColumnSplit(bst_target_t n_targets, bool categorical, std::string name,
|
void TestColumnSplit(bst_target_t n_targets, bool categorical, std::string name, float sparsity);
|
||||||
float sparsity) {
|
|
||||||
auto constexpr kRows = 32;
|
|
||||||
auto constexpr kCols = 16;
|
|
||||||
|
|
||||||
RegTree expected_tree{n_targets, static_cast<bst_feature_t>(kCols)};
|
|
||||||
ObjInfo task{ObjInfo::kRegression};
|
|
||||||
Context ctx;
|
|
||||||
{
|
|
||||||
auto p_dmat = GenerateCatDMatrix(kRows, kCols, sparsity, categorical);
|
|
||||||
auto gpair = GenerateRandomGradients(&ctx, kRows, n_targets);
|
|
||||||
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(name, &ctx, &task)};
|
|
||||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
|
||||||
TrainParam param;
|
|
||||||
param.Init(Args{});
|
|
||||||
updater->Configure(Args{});
|
|
||||||
updater->Update(¶m, &gpair, p_dmat.get(), position, {&expected_tree});
|
|
||||||
}
|
|
||||||
|
|
||||||
auto verify = [&] {
|
|
||||||
Context ctx;
|
|
||||||
auto p_dmat = GenerateCatDMatrix(kRows, kCols, sparsity, categorical);
|
|
||||||
auto gpair = GenerateRandomGradients(&ctx, kRows, n_targets);
|
|
||||||
|
|
||||||
ObjInfo task{ObjInfo::kRegression};
|
|
||||||
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(name, &ctx, &task)};
|
|
||||||
std::vector<HostDeviceVector<bst_node_t>> position(1);
|
|
||||||
|
|
||||||
std::unique_ptr<DMatrix> sliced{
|
|
||||||
p_dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};
|
|
||||||
|
|
||||||
RegTree tree{n_targets, static_cast<bst_feature_t>(kCols)};
|
|
||||||
TrainParam param;
|
|
||||||
param.Init(Args{});
|
|
||||||
updater->Configure(Args{});
|
|
||||||
updater->Update(¶m, &gpair, sliced.get(), position, {&tree});
|
|
||||||
|
|
||||||
Json json{Object{}};
|
|
||||||
tree.SaveModel(&json);
|
|
||||||
Json expected_json{Object{}};
|
|
||||||
expected_tree.SaveModel(&expected_json);
|
|
||||||
ASSERT_EQ(json, expected_json);
|
|
||||||
};
|
|
||||||
|
|
||||||
auto constexpr kWorldSize = 2;
|
|
||||||
collective::TestDistributedGlobal(kWorldSize, [&] { verify(); });
|
|
||||||
}
|
|
||||||
} // namespace xgboost::tree
|
} // namespace xgboost::tree
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user