Honor CPU counts from CFS. (#7654)
This commit is contained in:
parent
671b3c8d8e
commit
549f3bd781
@ -79,6 +79,7 @@
|
||||
#include "../src/common/json.cc"
|
||||
#include "../src/common/io.cc"
|
||||
#include "../src/common/survival_util.cc"
|
||||
#include "../src/common/threading_utils.cc"
|
||||
#include "../src/common/version.cc"
|
||||
|
||||
// c_api
|
||||
|
||||
@ -13,20 +13,27 @@
|
||||
namespace xgboost {
|
||||
|
||||
struct GenericParameter : public XGBoostParameter<GenericParameter> {
|
||||
private:
|
||||
// cached value for CFS CPU limit. (used in containerized env)
|
||||
int32_t cfs_cpu_count_; // NOLINT
|
||||
|
||||
public:
|
||||
// Constant representing the device ID of CPU.
|
||||
static int32_t constexpr kCpuId = -1;
|
||||
static int64_t constexpr kDefaultSeed = 0;
|
||||
|
||||
public:
|
||||
GenericParameter();
|
||||
|
||||
// stored random seed
|
||||
int64_t seed { kDefaultSeed };
|
||||
// whether seed the PRNG each iteration
|
||||
bool seed_per_iteration;
|
||||
bool seed_per_iteration{false};
|
||||
// number of threads to use if OpenMP is enabled
|
||||
// if equals 0, use system default
|
||||
int nthread{0};
|
||||
// primary device, -1 means no gpu.
|
||||
int gpu_id;
|
||||
int gpu_id{kCpuId};
|
||||
// fail when gpu_id is invalid
|
||||
bool fail_on_invalid_gpu_id {false};
|
||||
bool validate_parameters {false};
|
||||
|
||||
51
src/common/threading_utils.cc
Normal file
51
src/common/threading_utils.cc
Normal file
@ -0,0 +1,51 @@
|
||||
/*!
|
||||
* Copyright 2022 by XGBoost Contributors
|
||||
*/
|
||||
#include "threading_utils.h"
|
||||
#if defined(__linux__)
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#endif // defined(__linux__)
|
||||
#include "xgboost/logging.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
/**
|
||||
* \brief Get thread limit from CFS
|
||||
*
|
||||
* Modified from
|
||||
* github.com/psiha/sweater/blob/master/include/boost/sweater/hardware_concurrency.hpp
|
||||
*
|
||||
* MIT License: Copyright (c) 2016 Domagoj Šarić
|
||||
*/
|
||||
int32_t GetCfsCPUCount() noexcept {
|
||||
#if defined(__linux__)
|
||||
// https://bugs.openjdk.java.net/browse/JDK-8146115
|
||||
// http://hg.openjdk.java.net/jdk/hs/rev/7f22774a5f42
|
||||
// RAM limit /sys/fs/cgroup/memory.limit_in_bytes
|
||||
// swap limt /sys/fs/cgroup/memory.memsw.limit_in_bytes
|
||||
|
||||
auto read_int = [](char const* const file_path) noexcept {
|
||||
auto const fd(::open(file_path, O_RDONLY, 0));
|
||||
if (fd == -1) {
|
||||
return -1;
|
||||
}
|
||||
char value[64];
|
||||
CHECK(::read(fd, value, sizeof(value)) < signed(sizeof(value)));
|
||||
try {
|
||||
return std::stoi(value);
|
||||
} catch (std::exception const&) {
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
// complete fair scheduler from Linux
|
||||
auto const cfs_quota(read_int("/sys/fs/cgroup/cpu/cpu.cfs_quota_us"));
|
||||
auto const cfs_period(read_int("/sys/fs/cgroup/cpu/cpu.cfs_period_us"));
|
||||
if ((cfs_quota > 0) && (cfs_period > 0)) {
|
||||
return std::max(cfs_quota / cfs_period, 1);
|
||||
}
|
||||
#endif // defined(__linux__)
|
||||
return -1;
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
@ -1,7 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2015-2019 by Contributors
|
||||
* \file common.h
|
||||
* \brief Threading utilities
|
||||
* Copyright 2019-2022 by XGBoost Contributors
|
||||
*/
|
||||
#ifndef XGBOOST_COMMON_THREADING_UTILS_H_
|
||||
#define XGBOOST_COMMON_THREADING_UTILS_H_
|
||||
@ -238,6 +236,8 @@ inline int32_t OmpGetThreadLimit() {
|
||||
return limit;
|
||||
}
|
||||
|
||||
int32_t GetCfsCPUCount() noexcept;
|
||||
|
||||
inline int32_t OmpGetNumThreads(int32_t n_threads) {
|
||||
if (n_threads <= 0) {
|
||||
n_threads = std::min(omp_get_num_procs(), omp_get_max_threads());
|
||||
|
||||
@ -228,6 +228,8 @@ DMLC_REGISTER_PARAMETER(GenericParameter);
|
||||
int constexpr GenericParameter::kCpuId;
|
||||
int64_t constexpr GenericParameter::kDefaultSeed;
|
||||
|
||||
GenericParameter::GenericParameter() : cfs_cpu_count_{common::GetCfsCPUCount()} {}
|
||||
|
||||
void GenericParameter::ConfigureGpuId(bool require_gpu) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
if (gpu_id == kCpuId) { // 0. User didn't specify the `gpu_id'
|
||||
@ -262,7 +264,11 @@ void GenericParameter::ConfigureGpuId(bool require_gpu) {
|
||||
}
|
||||
|
||||
int32_t GenericParameter::Threads() const {
|
||||
return common::OmpGetNumThreads(nthread);
|
||||
auto n_threads = common::OmpGetNumThreads(nthread);
|
||||
if (cfs_cpu_count_ > 0) {
|
||||
n_threads = std::min(n_threads, cfs_cpu_count_);
|
||||
}
|
||||
return n_threads;
|
||||
}
|
||||
|
||||
using LearnerAPIThreadLocalStore =
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user