diff --git a/src/context.cc b/src/context.cc index 437c16f1d..2628c9d95 100644 --- a/src/context.cc +++ b/src/context.cc @@ -1,5 +1,5 @@ /** - * Copyright 2014-2022 by XGBoost Contributors + * Copyright 2014-2023 by XGBoost Contributors * * \brief Context object used for controlling runtime parameters. */ @@ -53,6 +53,9 @@ void Context::ConfigureGpuId(bool require_gpu) { } std::int32_t Context::Threads() const { + if (omp_in_parallel()) { + return 1; + } auto n_threads = common::OmpGetNumThreads(nthread); if (cfs_cpu_count_ > 0) { n_threads = std::min(n_threads, cfs_cpu_count_); diff --git a/tests/cpp/common/test_threading_utils.cc b/tests/cpp/common/test_threading_utils.cc index 5607e6d05..2b1a2580a 100644 --- a/tests/cpp/common/test_threading_utils.cc +++ b/tests/cpp/common/test_threading_utils.cc @@ -1,13 +1,18 @@ -#include +/** + * Copyright 2019-2023 by XGBoost Contributors + */ #include -#include "../../../src/common/column_matrix.h" -#include "../../../src/common/threading_utils.h" +#include // std::size_t + +#include "../../../src/common/threading_utils.h" // BlockedSpace2d,ParallelFor2d,ParallelFor +#include "dmlc/omp.h" // omp_in_parallel +#include "xgboost/context.h" // Context namespace xgboost { namespace common { -TEST(CreateBlockedSpace2d, Test) { +TEST(ParallelFor2d, CreateBlockedSpace2d) { constexpr size_t kDim1 = 5; constexpr size_t kDim2 = 3; constexpr size_t kGrainSize = 1; @@ -35,30 +40,25 @@ TEST(ParallelFor2d, Test) { std::vector matrix(kDim1 * kDim2, 0); BlockedSpace2d space( kDim1, [&](size_t) { return kDim2; }, kGrainSize); + Context ctx; + ctx.UpdateAllowUnknown(Args{{"nthread", "4"}}); + ASSERT_EQ(ctx.nthread, 4); - auto old = omp_get_max_threads(); - omp_set_num_threads(4); - - ParallelFor2d(space, omp_get_max_threads(), [&](size_t i, Range1d r) { + ParallelFor2d(space, ctx.Threads(), [&](size_t i, Range1d r) { for (auto j = r.begin(); j < r.end(); ++j) { - matrix[i*kDim2 + j] += 1; + matrix[i * kDim2 + j] += 1; } }); for (size_t i = 0; i < kDim1 * kDim2; i++) { ASSERT_EQ(matrix[i], 1); } - - omp_set_num_threads(old); } -TEST(ParallelFor2dNonUniform, Test) { +TEST(ParallelFor2d, NonUniform) { constexpr size_t kDim1 = 5; constexpr size_t kGrainSize = 256; - auto old = omp_get_max_threads(); - omp_set_num_threads(4); - // here are quite non-uniform distribution in space // but ParallelFor2d should split them by blocks with max size = kGrainSize // and process in balanced manner (optimal performance) @@ -72,7 +72,11 @@ TEST(ParallelFor2dNonUniform, Test) { working_space[i].resize(dim2[i], 0); } - ParallelFor2d(space, omp_get_max_threads(), [&](size_t i, Range1d r) { + Context ctx; + ctx.UpdateAllowUnknown(Args{{"nthread", "4"}}); + ASSERT_EQ(ctx.nthread, 4); + + ParallelFor2d(space, ctx.Threads(), [&](size_t i, Range1d r) { for (auto j = r.begin(); j < r.end(); ++j) { working_space[i][j] += 1; } @@ -83,8 +87,20 @@ TEST(ParallelFor2dNonUniform, Test) { ASSERT_EQ(working_space[i][j], 1); } } +} - omp_set_num_threads(old); +TEST(ParallelFor, Basic) { + Context ctx; + std::size_t n{16}; + auto n_threads = ctx.Threads(); + ParallelFor(n, n_threads, [&](auto i) { + ASSERT_EQ(ctx.Threads(), 1); + if (n_threads > 1) { + ASSERT_TRUE(omp_in_parallel()); + } + ASSERT_LT(i, n); + }); + ASSERT_FALSE(omp_in_parallel()); } } // namespace common } // namespace xgboost