Return single thread from context when called inside omp region. (#8693)

2023-01-18 09:23:37 +08:00 · 2023-01-18 09:23:37 +08:00 · 4416452f94
commit 4416452f94
parent 31b9cbab3d
2 changed files with 37 additions and 18 deletions
--- a/src/context.cc
+++ b/src/context.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2014-2022 by XGBoost Contributors
+ * Copyright 2014-2023 by XGBoost Contributors
 *
 * \brief Context object used for controlling runtime parameters.
 */
@ -53,6 +53,9 @@ void Context::ConfigureGpuId(bool require_gpu) {
 }

 std::int32_t Context::Threads() const {
+  if (omp_in_parallel()) {
+    return 1;
+  }
  auto n_threads = common::OmpGetNumThreads(nthread);
  if (cfs_cpu_count_ > 0) {
    n_threads = std::min(n_threads, cfs_cpu_count_);
--- a/tests/cpp/common/test_threading_utils.cc
+++ b/tests/cpp/common/test_threading_utils.cc
@ -1,13 +1,18 @@
-#include <cstddef>
+/**
+ * Copyright 2019-2023 by XGBoost Contributors
+ */
 #include <gtest/gtest.h>

-#include "../../../src/common/column_matrix.h"
-#include "../../../src/common/threading_utils.h"
+#include <cstddef>  // std::size_t
+
+#include "../../../src/common/threading_utils.h"  // BlockedSpace2d,ParallelFor2d,ParallelFor
+#include "dmlc/omp.h"                             // omp_in_parallel
+#include "xgboost/context.h"                      // Context

 namespace xgboost {
 namespace common {

-TEST(CreateBlockedSpace2d, Test) {
+TEST(ParallelFor2d, CreateBlockedSpace2d) {
  constexpr size_t kDim1 = 5;
  constexpr size_t kDim2 = 3;
  constexpr size_t kGrainSize = 1;
@ -35,30 +40,25 @@ TEST(ParallelFor2d, Test) {
  std::vector<int> matrix(kDim1 * kDim2, 0);
  BlockedSpace2d space(
      kDim1, [&](size_t) { return kDim2; }, kGrainSize);
+  Context ctx;
+  ctx.UpdateAllowUnknown(Args{{"nthread", "4"}});
+  ASSERT_EQ(ctx.nthread, 4);

-  auto old = omp_get_max_threads();
-  omp_set_num_threads(4);
-
-  ParallelFor2d(space, omp_get_max_threads(), [&](size_t i, Range1d r) {
+  ParallelFor2d(space, ctx.Threads(), [&](size_t i, Range1d r) {
    for (auto j = r.begin(); j < r.end(); ++j) {
-      matrix[i*kDim2 + j] += 1;
+      matrix[i * kDim2 + j] += 1;
    }
  });

  for (size_t i = 0; i < kDim1 * kDim2; i++) {
    ASSERT_EQ(matrix[i], 1);
  }
-
-  omp_set_num_threads(old);
 }

-TEST(ParallelFor2dNonUniform, Test) {
+TEST(ParallelFor2d, NonUniform) {
  constexpr size_t kDim1 = 5;
  constexpr size_t kGrainSize = 256;

-  auto old = omp_get_max_threads();
-  omp_set_num_threads(4);
-
  // here are quite non-uniform distribution in space
  // but ParallelFor2d should split them by blocks with max size = kGrainSize
  // and process in balanced manner (optimal performance)
@ -72,7 +72,11 @@ TEST(ParallelFor2dNonUniform, Test) {
    working_space[i].resize(dim2[i], 0);
  }

-  ParallelFor2d(space, omp_get_max_threads(), [&](size_t i, Range1d r) {
+  Context ctx;
+  ctx.UpdateAllowUnknown(Args{{"nthread", "4"}});
+  ASSERT_EQ(ctx.nthread, 4);
+
+  ParallelFor2d(space, ctx.Threads(), [&](size_t i, Range1d r) {
    for (auto j = r.begin(); j < r.end(); ++j) {
      working_space[i][j] += 1;
    }
@ -83,8 +87,20 @@ TEST(ParallelFor2dNonUniform, Test) {
      ASSERT_EQ(working_space[i][j], 1);
    }
  }
+}

-  omp_set_num_threads(old);
+TEST(ParallelFor, Basic) {
+  Context ctx;
+  std::size_t n{16};
+  auto n_threads = ctx.Threads();
+  ParallelFor(n, n_threads, [&](auto i) {
+    ASSERT_EQ(ctx.Threads(), 1);
+    if (n_threads > 1) {
+      ASSERT_TRUE(omp_in_parallel());
+    }
+    ASSERT_LT(i, n);
+  });
+  ASSERT_FALSE(omp_in_parallel());
 }
 }  // namespace common
 }  // namespace xgboost