diff --git a/src/context.cc b/src/context.cc
index 437c16f1d..2628c9d95 100644
--- a/src/context.cc
+++ b/src/context.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2014-2022 by XGBoost Contributors
+ * Copyright 2014-2023 by XGBoost Contributors
  *
  * \brief Context object used for controlling runtime parameters.
  */
@@ -53,6 +53,9 @@ void Context::ConfigureGpuId(bool require_gpu) {
 }
 
 std::int32_t Context::Threads() const {
+  if (omp_in_parallel()) {
+    return 1;
+  }
   auto n_threads = common::OmpGetNumThreads(nthread);
   if (cfs_cpu_count_ > 0) {
     n_threads = std::min(n_threads, cfs_cpu_count_);
diff --git a/tests/cpp/common/test_threading_utils.cc b/tests/cpp/common/test_threading_utils.cc
index 5607e6d05..2b1a2580a 100644
--- a/tests/cpp/common/test_threading_utils.cc
+++ b/tests/cpp/common/test_threading_utils.cc
@@ -1,13 +1,18 @@
-#include <cstddef>
+/**
+ * Copyright 2019-2023 by XGBoost Contributors
+ */
 #include <gtest/gtest.h>
 
-#include "../../../src/common/column_matrix.h"
-#include "../../../src/common/threading_utils.h"
+#include <cstddef>  // std::size_t
+
+#include "../../../src/common/threading_utils.h"  // BlockedSpace2d,ParallelFor2d,ParallelFor
+#include "dmlc/omp.h"                             // omp_in_parallel
+#include "xgboost/context.h"                      // Context
 
 namespace xgboost {
 namespace common {
 
-TEST(CreateBlockedSpace2d, Test) {
+TEST(ParallelFor2d, CreateBlockedSpace2d) {
   constexpr size_t kDim1 = 5;
   constexpr size_t kDim2 = 3;
   constexpr size_t kGrainSize = 1;
@@ -35,30 +40,25 @@ TEST(ParallelFor2d, Test) {
   std::vector<int> matrix(kDim1 * kDim2, 0);
   BlockedSpace2d space(
       kDim1, [&](size_t) { return kDim2; }, kGrainSize);
+  Context ctx;
+  ctx.UpdateAllowUnknown(Args{{"nthread", "4"}});
+  ASSERT_EQ(ctx.nthread, 4);
 
-  auto old = omp_get_max_threads();
-  omp_set_num_threads(4);
-
-  ParallelFor2d(space, omp_get_max_threads(), [&](size_t i, Range1d r) {
+  ParallelFor2d(space, ctx.Threads(), [&](size_t i, Range1d r) {
     for (auto j = r.begin(); j < r.end(); ++j) {
-      matrix[i*kDim2 + j] += 1;
+      matrix[i * kDim2 + j] += 1;
     }
   });
 
   for (size_t i = 0; i < kDim1 * kDim2; i++) {
     ASSERT_EQ(matrix[i], 1);
   }
-
-  omp_set_num_threads(old);
 }
 
-TEST(ParallelFor2dNonUniform, Test) {
+TEST(ParallelFor2d, NonUniform) {
   constexpr size_t kDim1 = 5;
   constexpr size_t kGrainSize = 256;
 
-  auto old = omp_get_max_threads();
-  omp_set_num_threads(4);
-
   // here are quite non-uniform distribution in space
   // but ParallelFor2d should split them by blocks with max size = kGrainSize
   // and process in balanced manner (optimal performance)
@@ -72,7 +72,11 @@ TEST(ParallelFor2dNonUniform, Test) {
     working_space[i].resize(dim2[i], 0);
   }
 
-  ParallelFor2d(space, omp_get_max_threads(), [&](size_t i, Range1d r) {
+  Context ctx;
+  ctx.UpdateAllowUnknown(Args{{"nthread", "4"}});
+  ASSERT_EQ(ctx.nthread, 4);
+
+  ParallelFor2d(space, ctx.Threads(), [&](size_t i, Range1d r) {
     for (auto j = r.begin(); j < r.end(); ++j) {
       working_space[i][j] += 1;
     }
@@ -83,8 +87,20 @@ TEST(ParallelFor2dNonUniform, Test) {
       ASSERT_EQ(working_space[i][j], 1);
     }
   }
+}
 
-  omp_set_num_threads(old);
+TEST(ParallelFor, Basic) {
+  Context ctx;
+  std::size_t n{16};
+  auto n_threads = ctx.Threads();
+  ParallelFor(n, n_threads, [&](auto i) {
+    ASSERT_EQ(ctx.Threads(), 1);
+    if (n_threads > 1) {
+      ASSERT_TRUE(omp_in_parallel());
+    }
+    ASSERT_LT(i, n);
+  });
+  ASSERT_FALSE(omp_in_parallel());
 }
 }  // namespace common
 }  // namespace xgboost