Consistent use of context to specify number of threads. (#8733)

- Use context in all tests.
- Use context in R.
- Use context in C API DMatrix initialization. (0 threads is used as dft).
This commit is contained in:
Jiaming Yuan
2023-01-30 15:25:31 +08:00
committed by GitHub
parent 21a28f2cc5
commit 3760cede0f
24 changed files with 212 additions and 152 deletions

View File

@@ -1,4 +1,6 @@
// Copyright (c) 2014-2022 by Contributors
/**
* Copyright 2014-2023 by XGBoost Contributors
*/
#include <rabit/c_api.h>
#include <cstring>
@@ -279,7 +281,7 @@ XGB_DLL int XGDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy
auto jconfig = Json::Load(StringView{config});
auto missing = GetMissing(jconfig);
std::string cache = RequiredArg<String>(jconfig, "cache_prefix", __func__);
auto n_threads = OptionalArg<Integer, int64_t>(jconfig, "nthread", common::OmpGetNumThreads(0));
auto n_threads = OptionalArg<Integer, int64_t>(jconfig, "nthread", 0);
xgboost_CHECK_C_ARG_PTR(next);
xgboost_CHECK_C_ARG_PTR(reset);
@@ -319,7 +321,7 @@ XGB_DLL int XGQuantileDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHand
xgboost_CHECK_C_ARG_PTR(config);
auto jconfig = Json::Load(StringView{config});
auto missing = GetMissing(jconfig);
auto n_threads = OptionalArg<Integer, int64_t>(jconfig, "nthread", common::OmpGetNumThreads(0));
auto n_threads = OptionalArg<Integer, int64_t>(jconfig, "nthread", 0);
auto max_bin = OptionalArg<Integer, int64_t>(jconfig, "max_bin", 256);
xgboost_CHECK_C_ARG_PTR(next);
@@ -420,7 +422,7 @@ XGB_DLL int XGDMatrixCreateFromCSR(char const *indptr, char const *indices, char
xgboost_CHECK_C_ARG_PTR(c_json_config);
auto config = Json::Load(StringView{c_json_config});
float missing = GetMissing(config);
auto n_threads = OptionalArg<Integer, int64_t>(config, "nthread", common::OmpGetNumThreads(0));
auto n_threads = OptionalArg<Integer, int64_t>(config, "nthread", 0);
xgboost_CHECK_C_ARG_PTR(out);
*out = new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, n_threads));
API_END();
@@ -435,10 +437,9 @@ XGB_DLL int XGDMatrixCreateFromDense(char const *data,
xgboost_CHECK_C_ARG_PTR(c_json_config);
auto config = Json::Load(StringView{c_json_config});
float missing = GetMissing(config);
auto n_threads = OptionalArg<Integer, int64_t>(config, "nthread", common::OmpGetNumThreads(0));
auto n_threads = OptionalArg<Integer, int64_t>(config, "nthread", 0);
xgboost_CHECK_C_ARG_PTR(out);
*out =
new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, n_threads));
*out = new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, n_threads));
API_END();
}
@@ -506,8 +507,7 @@ XGB_DLL int XGDMatrixCreateFromArrowCallback(XGDMatrixCallbackNext *next, char c
auto jconfig = Json::Load(StringView{config});
auto missing = GetMissing(jconfig);
auto n_batches = RequiredArg<Integer>(jconfig, "nbatch", __func__);
auto n_threads =
OptionalArg<Integer, std::int64_t>(jconfig, "nthread", common::OmpGetNumThreads(0));
auto n_threads = OptionalArg<Integer, std::int64_t>(jconfig, "nthread", 0);
data::RecordBatchesIterAdapter adapter(next, n_batches);
xgboost_CHECK_C_ARG_PTR(out);
*out = new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, n_threads));

View File

@@ -1,4 +1,6 @@
// Copyright (c) 2019-2022 by Contributors
/**
* Copyright 2019-2023 by XGBoost Contributors
*/
#include "../common/threading_utils.h"
#include "../data/device_adapter.cuh"
#include "../data/proxy_dmatrix.h"
@@ -68,8 +70,7 @@ XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data,
auto config = Json::Load(StringView{c_json_config});
float missing = GetMissing(config);
auto n_threads =
OptionalArg<Integer, std::int64_t>(config, "nthread", common::OmpGetNumThreads(0));
auto n_threads = OptionalArg<Integer, std::int64_t>(config, "nthread", 0);
data::CudfAdapter adapter(json_str);
*out =
new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, n_threads));
@@ -83,8 +84,7 @@ XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
std::string json_str{data};
auto config = Json::Load(StringView{c_json_config});
float missing = GetMissing(config);
auto n_threads =
OptionalArg<Integer, std::int64_t>(config, "nthread", common::OmpGetNumThreads(0));
auto n_threads = OptionalArg<Integer, std::int64_t>(config, "nthread", 0);
data::CupyAdapter adapter(json_str);
*out =
new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, n_threads));

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2022 by XGBoost Contributors
/**
* Copyright 2022-2023 by XGBoost Contributors
*/
#include "threading_utils.h"
@@ -10,14 +10,6 @@
namespace xgboost {
namespace common {
/**
* \brief Get thread limit from CFS
*
* Modified from
* github.com/psiha/sweater/blob/master/include/boost/sweater/hardware_concurrency.hpp
*
* MIT License: Copyright (c) 2016 Domagoj Šarić
*/
int32_t GetCfsCPUCount() noexcept {
#if defined(__linux__)
// https://bugs.openjdk.java.net/browse/JDK-8146115
@@ -47,5 +39,20 @@ int32_t GetCfsCPUCount() noexcept {
#endif // defined(__linux__)
return -1;
}
std::int32_t OmpGetNumThreads(std::int32_t n_threads) {
// Don't use parallel if we are in a parallel region.
if (omp_in_parallel()) {
return 1;
}
// If -1 or 0 is specified by the user, we default to maximum number of threads.
if (n_threads <= 0) {
n_threads = std::min(omp_get_num_procs(), omp_get_max_threads());
}
// Honor the openmp thread limit, which can be set via environment variable.
n_threads = std::min(n_threads, OmpGetThreadLimit());
n_threads = std::max(n_threads, 1);
return n_threads;
}
} // namespace common
} // namespace xgboost

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2019-2022 by XGBoost Contributors
/**
* Copyright 2019-2023 by XGBoost Contributors
*/
#ifndef XGBOOST_COMMON_THREADING_UTILS_H_
#define XGBOOST_COMMON_THREADING_UTILS_H_
@@ -231,23 +231,28 @@ void ParallelFor(Index size, int32_t n_threads, Func fn) {
ParallelFor(size, n_threads, Sched::Static(), fn);
}
inline int32_t OmpGetThreadLimit() {
int32_t limit = omp_get_thread_limit();
inline std::int32_t OmpGetThreadLimit() {
std::int32_t limit = omp_get_thread_limit();
CHECK_GE(limit, 1) << "Invalid thread limit for OpenMP.";
return limit;
}
int32_t GetCfsCPUCount() noexcept;
inline int32_t OmpGetNumThreads(int32_t n_threads) {
if (n_threads <= 0) {
n_threads = std::min(omp_get_num_procs(), omp_get_max_threads());
}
n_threads = std::min(n_threads, OmpGetThreadLimit());
n_threads = std::max(n_threads, 1);
return n_threads;
}
/**
* \brief Get thread limit from CFS.
*
* This function has non-trivial overhead and should not be called repeatly.
*
* Modified from
* github.com/psiha/sweater/blob/master/include/boost/sweater/hardware_concurrency.hpp
*
* MIT License: Copyright (c) 2016 Domagoj Šarić
*/
std::int32_t GetCfsCPUCount() noexcept;
/**
* \brief Get the number of available threads based on n_threads specified by users.
*/
std::int32_t OmpGetNumThreads(std::int32_t n_threads);
/*!
* \brief A C-style array with in-stack allocation. As long as the array is smaller than

View File

@@ -53,9 +53,6 @@ void Context::ConfigureGpuId(bool require_gpu) {
}
std::int32_t Context::Threads() const {
if (omp_in_parallel()) {
return 1;
}
auto n_threads = common::OmpGetNumThreads(nthread);
if (cfs_cpu_count_ > 0) {
n_threads = std::min(n_threads, cfs_cpu_count_);

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2015-2022 by XGBoost Contributors
/**
* Copyright 2015-2023 by XGBoost Contributors
* \file data.cc
*/
#include "xgboost/data.h"
@@ -27,6 +27,7 @@
#include "sparse_page_writer.h"
#include "validation.h"
#include "xgboost/c_api.h"
#include "xgboost/context.h"
#include "xgboost/host_device_vector.h"
#include "xgboost/learner.h"
#include "xgboost/logging.h"
@@ -850,7 +851,8 @@ DMatrix* DMatrix::Load(const std::string& uri, bool silent, DataSplitMode data_s
std::unique_ptr<dmlc::Parser<uint32_t>> parser(
dmlc::Parser<uint32_t>::Create(fname.c_str(), partid, npart, file_format.c_str()));
data::FileAdapter adapter(parser.get());
dmat = DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1, cache_file);
dmat = DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), Context{}.Threads(),
cache_file);
} else {
data::FileIterator iter{fname, static_cast<uint32_t>(partid), static_cast<uint32_t>(npart),
file_format};