[coll] Improve column split tests with named threads. (#10735)
This commit is contained in:
parent
55aef8f546
commit
fd0138c91c
@ -30,4 +30,4 @@
|
|||||||
|
|
||||||
#endif // xgboost_IS_MINGW
|
#endif // xgboost_IS_MINGW
|
||||||
|
|
||||||
#endif // defined(xgboost_IS_WIN)
|
#endif // !defined(xgboost_IS_WIN)
|
||||||
|
|||||||
@ -14,6 +14,7 @@
|
|||||||
#include <thread> // for thread
|
#include <thread> // for thread
|
||||||
#include <utility> // for move
|
#include <utility> // for move
|
||||||
|
|
||||||
|
#include "../common/threading_utils.h" // for NameThread
|
||||||
#include "xgboost/collective/poll_utils.h" // for PollHelper
|
#include "xgboost/collective/poll_utils.h" // for PollHelper
|
||||||
#include "xgboost/collective/result.h" // for Fail, Success
|
#include "xgboost/collective/result.h" // for Fail, Success
|
||||||
#include "xgboost/collective/socket.h" // for FailWithCode
|
#include "xgboost/collective/socket.h" // for FailWithCode
|
||||||
@ -271,5 +272,6 @@ Loop::Loop(std::chrono::seconds timeout) : timeout_{timeout} {
|
|||||||
worker_ = std::thread{[this] {
|
worker_ = std::thread{[this] {
|
||||||
this->Process();
|
this->Process();
|
||||||
}};
|
}};
|
||||||
|
common::NameThread(&worker_, "lw");
|
||||||
}
|
}
|
||||||
} // namespace xgboost::collective
|
} // namespace xgboost::collective
|
||||||
|
|||||||
@ -23,6 +23,7 @@
|
|||||||
#include <utility> // for move, forward
|
#include <utility> // for move, forward
|
||||||
|
|
||||||
#include "../common/json_utils.h"
|
#include "../common/json_utils.h"
|
||||||
|
#include "../common/threading_utils.h" // for NameThread
|
||||||
#include "comm.h"
|
#include "comm.h"
|
||||||
#include "protocol.h" // for kMagic, PeerInfo
|
#include "protocol.h" // for kMagic, PeerInfo
|
||||||
#include "tracker.h"
|
#include "tracker.h"
|
||||||
@ -143,6 +144,8 @@ Result RabitTracker::Bootstrap(std::vector<WorkerProxy>* p_workers) {
|
|||||||
Json::Dump(jnext, &str);
|
Json::Dump(jnext, &str);
|
||||||
worker.Send(StringView{str});
|
worker.Send(StringView{str});
|
||||||
});
|
});
|
||||||
|
std::string name = "tkbs_t-" + std::to_string(r);
|
||||||
|
common::NameThread(&bootstrap_threads.back(), name.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto& t : bootstrap_threads) {
|
for (auto& t : bootstrap_threads) {
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2022-2023 by XGBoost Contributors
|
* Copyright 2022-2024, XGBoost Contributors
|
||||||
*/
|
*/
|
||||||
#include "threading_utils.h"
|
#include "threading_utils.h"
|
||||||
|
|
||||||
@ -9,7 +9,11 @@
|
|||||||
#include <fstream> // for ifstream
|
#include <fstream> // for ifstream
|
||||||
#include <string> // for string
|
#include <string> // for string
|
||||||
|
|
||||||
#include "common.h" // for DivRoundUp
|
#include "common.h" // for DivRoundUp
|
||||||
|
|
||||||
|
#if defined(__linux__)
|
||||||
|
#include <pthread.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace xgboost::common {
|
namespace xgboost::common {
|
||||||
/**
|
/**
|
||||||
@ -113,4 +117,26 @@ std::int32_t OmpGetNumThreads(std::int32_t n_threads) {
|
|||||||
n_threads = std::max(n_threads, 1);
|
n_threads = std::max(n_threads, 1);
|
||||||
return n_threads;
|
return n_threads;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void NameThread(std::thread* t, StringView name) {
|
||||||
|
#if defined(__linux__)
|
||||||
|
auto handle = t->native_handle();
|
||||||
|
char old[16];
|
||||||
|
auto ret = pthread_getname_np(handle, old, 16);
|
||||||
|
if (ret != 0) {
|
||||||
|
LOG(WARNING) << "Failed to get the name from thread";
|
||||||
|
}
|
||||||
|
auto new_name = std::string{old} + ">" + name.c_str(); // NOLINT
|
||||||
|
if (new_name.size() > 15) {
|
||||||
|
new_name = new_name.substr(new_name.size() - 15);
|
||||||
|
}
|
||||||
|
ret = pthread_setname_np(handle, new_name.c_str());
|
||||||
|
if (ret != 0) {
|
||||||
|
LOG(WARNING) << "Failed to name thread:" << ret << " :" << new_name;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
(void)name;
|
||||||
|
(void)t;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
} // namespace xgboost::common
|
} // namespace xgboost::common
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2019-2023 by XGBoost Contributors
|
* Copyright 2019-2024, XGBoost Contributors
|
||||||
*/
|
*/
|
||||||
#ifndef XGBOOST_COMMON_THREADING_UTILS_H_
|
#ifndef XGBOOST_COMMON_THREADING_UTILS_H_
|
||||||
#define XGBOOST_COMMON_THREADING_UTILS_H_
|
#define XGBOOST_COMMON_THREADING_UTILS_H_
|
||||||
@ -11,12 +11,13 @@
|
|||||||
#include <cstddef> // for size_t
|
#include <cstddef> // for size_t
|
||||||
#include <cstdint> // for int32_t
|
#include <cstdint> // for int32_t
|
||||||
#include <cstdlib> // for malloc, free
|
#include <cstdlib> // for malloc, free
|
||||||
#include <functional> // for function
|
|
||||||
#include <new> // for bad_alloc
|
#include <new> // for bad_alloc
|
||||||
|
#include <thread> // for thread
|
||||||
#include <type_traits> // for is_signed, conditional_t, is_integral_v, invoke_result_t
|
#include <type_traits> // for is_signed, conditional_t, is_integral_v, invoke_result_t
|
||||||
#include <vector> // for vector
|
#include <vector> // for vector
|
||||||
|
|
||||||
#include "xgboost/logging.h"
|
#include "xgboost/logging.h"
|
||||||
|
#include "xgboost/string_view.h" // for StringView
|
||||||
|
|
||||||
#if !defined(_OPENMP)
|
#if !defined(_OPENMP)
|
||||||
extern "C" {
|
extern "C" {
|
||||||
@ -308,6 +309,11 @@ class MemStackAllocator {
|
|||||||
* \brief Constant that can be used for initializing static thread local memory.
|
* \brief Constant that can be used for initializing static thread local memory.
|
||||||
*/
|
*/
|
||||||
std::int32_t constexpr DefaultMaxThreads() { return 128; }
|
std::int32_t constexpr DefaultMaxThreads() { return 128; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Give the thread a name. Supports only pthread on linux.
|
||||||
|
*/
|
||||||
|
void NameThread(std::thread* t, StringView name);
|
||||||
} // namespace xgboost::common
|
} // namespace xgboost::common
|
||||||
|
|
||||||
#endif // XGBOOST_COMMON_THREADING_UTILS_H_
|
#endif // XGBOOST_COMMON_THREADING_UTILS_H_
|
||||||
|
|||||||
@ -9,11 +9,15 @@
|
|||||||
#include <memory> // for make_shared
|
#include <memory> // for make_shared
|
||||||
#include <mutex> // for mutex, unique_lock
|
#include <mutex> // for mutex, unique_lock
|
||||||
#include <queue> // for queue
|
#include <queue> // for queue
|
||||||
|
#include <string> // for string
|
||||||
#include <thread> // for thread
|
#include <thread> // for thread
|
||||||
#include <type_traits> // for invoke_result_t
|
#include <type_traits> // for invoke_result_t
|
||||||
#include <utility> // for move
|
#include <utility> // for move
|
||||||
#include <vector> // for vector
|
#include <vector> // for vector
|
||||||
|
|
||||||
|
#include "threading_utils.h" // for NameThread
|
||||||
|
#include "xgboost/string_view.h" // for StringView
|
||||||
|
|
||||||
namespace xgboost::common {
|
namespace xgboost::common {
|
||||||
/**
|
/**
|
||||||
* @brief Simple implementation of a thread pool.
|
* @brief Simple implementation of a thread pool.
|
||||||
@ -27,11 +31,12 @@ class ThreadPool {
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
/**
|
/**
|
||||||
|
* @param name Name prefix for threads.
|
||||||
* @param n_threads The number of threads this pool should hold.
|
* @param n_threads The number of threads this pool should hold.
|
||||||
* @param init_fn Function called once during thread creation.
|
* @param init_fn Function called once during thread creation.
|
||||||
*/
|
*/
|
||||||
template <typename InitFn>
|
template <typename InitFn>
|
||||||
explicit ThreadPool(std::int32_t n_threads, InitFn&& init_fn) {
|
explicit ThreadPool(StringView name, std::int32_t n_threads, InitFn&& init_fn) {
|
||||||
for (std::int32_t i = 0; i < n_threads; ++i) {
|
for (std::int32_t i = 0; i < n_threads; ++i) {
|
||||||
pool_.emplace_back([&, init_fn = std::forward<InitFn>(init_fn)] {
|
pool_.emplace_back([&, init_fn = std::forward<InitFn>(init_fn)] {
|
||||||
init_fn();
|
init_fn();
|
||||||
@ -55,6 +60,8 @@ class ThreadPool {
|
|||||||
fn();
|
fn();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
std::string name_i = name.c_str() + std::string{"-"} + std::to_string(i); // NOLINT
|
||||||
|
NameThread(&pool_.back(), name_i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -336,7 +336,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl<S>, public FormatStreamPol
|
|||||||
public:
|
public:
|
||||||
SparsePageSourceImpl(float missing, int nthreads, bst_feature_t n_features, bst_idx_t n_batches,
|
SparsePageSourceImpl(float missing, int nthreads, bst_feature_t n_features, bst_idx_t n_batches,
|
||||||
std::shared_ptr<Cache> cache)
|
std::shared_ptr<Cache> cache)
|
||||||
: workers_{std::max(2, std::min(nthreads, 16)), InitNewThread{}},
|
: workers_{StringView{"ext-mem"}, std::max(2, std::min(nthreads, 16)), InitNewThread{}},
|
||||||
missing_{missing},
|
missing_{missing},
|
||||||
nthreads_{nthreads},
|
nthreads_{nthreads},
|
||||||
n_features_{n_features},
|
n_features_{n_features},
|
||||||
|
|||||||
@ -12,10 +12,11 @@
|
|||||||
#include <utility> // for move
|
#include <utility> // for move
|
||||||
#include <vector> // for vector
|
#include <vector> // for vector
|
||||||
|
|
||||||
#include "../../../src/collective/comm.h"
|
#include "../../../src/collective/comm.h" // for RabitComm
|
||||||
#include "../../../src/collective/communicator-inl.h" // for Init, Finalize
|
#include "../../../src/collective/communicator-inl.h" // for Init, Finalize
|
||||||
#include "../../../src/collective/tracker.h" // for GetHostAddress
|
#include "../../../src/collective/tracker.h" // for GetHostAddress
|
||||||
#include "../../../src/common/cuda_rt_utils.h" // for AllVisibleGPUs
|
#include "../../../src/common/cuda_rt_utils.h" // for AllVisibleGPUs
|
||||||
|
#include "../../../src/common/threading_utils.h" // for NameThread
|
||||||
#include "../helpers.h" // for FileExists
|
#include "../helpers.h" // for FileExists
|
||||||
|
|
||||||
#if defined(XGBOOST_USE_FEDERATED)
|
#if defined(XGBOOST_USE_FEDERATED)
|
||||||
@ -176,6 +177,9 @@ void TestDistributedGlobal(std::int32_t n_workers, WorkerFn worker_fn, bool need
|
|||||||
CHECK(status == std::future_status::ready) << "Test timeout";
|
CHECK(status == std::future_status::ready) << "Test timeout";
|
||||||
fut.get();
|
fut.get();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
std::string name = "tw-" + std::to_string(i);
|
||||||
|
common::NameThread(&workers.back(), name.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto& t : workers) {
|
for (auto& t : workers) {
|
||||||
@ -199,7 +203,7 @@ class BaseMGPUTest : public ::testing::Test {
|
|||||||
* available.
|
* available.
|
||||||
*/
|
*/
|
||||||
template <typename Fn>
|
template <typename Fn>
|
||||||
auto DoTest(Fn&& fn, bool is_federated, bool emulate_if_single = false) const {
|
auto DoTest(Fn&& fn, bool is_federated, [[maybe_unused]] bool emulate_if_single = false) const {
|
||||||
auto n_gpus = common::AllVisibleGPUs();
|
auto n_gpus = common::AllVisibleGPUs();
|
||||||
if (is_federated) {
|
if (is_federated) {
|
||||||
#if defined(XGBOOST_USE_FEDERATED)
|
#if defined(XGBOOST_USE_FEDERATED)
|
||||||
|
|||||||
@ -21,7 +21,7 @@ TEST(ThreadPool, Basic) {
|
|||||||
// 4 is an invalid value, it's only possible to set it by bypassing the parameter
|
// 4 is an invalid value, it's only possible to set it by bypassing the parameter
|
||||||
// validation.
|
// validation.
|
||||||
ASSERT_NE(orig, GlobalConfigThreadLocalStore::Get()->verbosity);
|
ASSERT_NE(orig, GlobalConfigThreadLocalStore::Get()->verbosity);
|
||||||
ThreadPool pool{n_threads, [config = *GlobalConfigThreadLocalStore::Get()] {
|
ThreadPool pool{StringView{"test"}, n_threads, [config = *GlobalConfigThreadLocalStore::Get()] {
|
||||||
*GlobalConfigThreadLocalStore::Get() = config;
|
*GlobalConfigThreadLocalStore::Get() = config;
|
||||||
}};
|
}};
|
||||||
GlobalConfigThreadLocalStore::Get()->verbosity = orig; // restore
|
GlobalConfigThreadLocalStore::Get()->verbosity = orig; // restore
|
||||||
|
|||||||
@ -745,8 +745,7 @@ void VerifyColumnSplitWithArgs(std::string const& tree_method, bool use_gpu, Arg
|
|||||||
std::shared_ptr<DMatrix> sliced{p_fmat->SliceCol(world_size, rank)};
|
std::shared_ptr<DMatrix> sliced{p_fmat->SliceCol(world_size, rank)};
|
||||||
std::string device = "cpu";
|
std::string device = "cpu";
|
||||||
if (use_gpu) {
|
if (use_gpu) {
|
||||||
auto gpu_id = common::AllVisibleGPUs() == 1 ? 0 : rank;
|
device = MakeCUDACtx(DistGpuIdx()).DeviceName();
|
||||||
device = "cuda:" + std::to_string(gpu_id);
|
|
||||||
}
|
}
|
||||||
auto model = GetModelWithArgs(sliced, tree_method, device, args);
|
auto model = GetModelWithArgs(sliced, tree_method, device, args);
|
||||||
ASSERT_EQ(model, expected_model);
|
ASSERT_EQ(model, expected_model);
|
||||||
@ -807,44 +806,32 @@ class ColumnSplitTrainingTest
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
auto MakeParamsForTest() {
|
auto WithFed() {
|
||||||
std::vector<std::tuple<std::string, bool, bool>> configs;
|
|
||||||
for (auto tm : {"hist", "approx"}) {
|
|
||||||
#if defined(XGBOOST_USE_CUDA)
|
|
||||||
std::array<bool, 2> use_gpu{true, false};
|
|
||||||
#else
|
|
||||||
std::array<bool, 1> use_gpu{false};
|
|
||||||
#endif
|
|
||||||
for (auto i : use_gpu) {
|
|
||||||
#if defined(XGBOOST_USE_FEDERATED)
|
#if defined(XGBOOST_USE_FEDERATED)
|
||||||
std::array<bool, 2> fed{true, false};
|
return ::testing::Bool();
|
||||||
#else
|
#else
|
||||||
std::array<bool, 1> fed{false};
|
return ::testing::Values(false);
|
||||||
#endif
|
#endif
|
||||||
for (auto j : fed) {
|
|
||||||
configs.emplace_back(tm, i, j);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return configs;
|
|
||||||
}
|
}
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
|
||||||
TEST_P(ColumnSplitTrainingTest, ColumnSampler) {
|
TEST_P(ColumnSplitTrainingTest, ColumnSampler) {
|
||||||
auto param = GetParam();
|
std::apply(TestColumnSplitColumnSampler, GetParam());
|
||||||
std::apply(TestColumnSplitColumnSampler, param);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(ColumnSplitTrainingTest, InteractionConstraints) {
|
TEST_P(ColumnSplitTrainingTest, InteractionConstraints) {
|
||||||
auto param = GetParam();
|
std::apply(TestColumnSplitInteractionConstraints, GetParam());
|
||||||
std::apply(TestColumnSplitInteractionConstraints, param);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(ColumnSplitTrainingTest, MonotoneConstraints) {
|
TEST_P(ColumnSplitTrainingTest, MonotoneConstraints) {
|
||||||
auto param = GetParam();
|
std::apply(TestColumnSplitMonotoneConstraints, GetParam());
|
||||||
std::apply(TestColumnSplitMonotoneConstraints, param);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_SUITE_P(ColumnSplit, ColumnSplitTrainingTest,
|
INSTANTIATE_TEST_SUITE_P(Cpu, ColumnSplitTrainingTest,
|
||||||
::testing::ValuesIn(MakeParamsForTest()));
|
::testing::Combine(::testing::Values("hist", "approx"),
|
||||||
|
::testing::Values(false), WithFed()));
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(MGPU, ColumnSplitTrainingTest,
|
||||||
|
::testing::Combine(::testing::Values("hist", "approx"),
|
||||||
|
::testing::Values(true), WithFed()));
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user