diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh index 7271dcb5e..2a7034706 100644 --- a/src/common/device_helpers.cuh +++ b/src/common/device_helpers.cuh @@ -1004,7 +1004,7 @@ class AllReducer { template void ExecuteShards(std::vector *shards, FunctionT f) { -#pragma omp parallel for schedule(static, 1) +#pragma omp parallel for schedule(static, 1) if (shards->size() > 1) for (int shard = 0; shard < shards->size(); ++shard) { f(shards->at(shard)); } @@ -1023,7 +1023,7 @@ void ExecuteShards(std::vector *shards, FunctionT f) { template void ExecuteIndexShards(std::vector *shards, FunctionT f) { -#pragma omp parallel for schedule(static, 1) +#pragma omp parallel for schedule(static, 1) if (shards->size() > 1) for (int shard = 0; shard < shards->size(); ++shard) { f(shard, shards->at(shard)); } @@ -1045,7 +1045,7 @@ void ExecuteIndexShards(std::vector *shards, FunctionT f) { template ReduceT ReduceShards(std::vector *shards, FunctionT f) { std::vector sums(shards->size()); -#pragma omp parallel for schedule(static, 1) +#pragma omp parallel for schedule(static, 1) if (shards->size() > 1) for (int shard = 0; shard < shards->size(); ++shard) { sums[shard] = f(shards->at(shard)); } diff --git a/src/objective/regression_obj.cc b/src/objective/regression_obj.cc index 75af216a1..98d7b9147 100644 --- a/src/objective/regression_obj.cc +++ b/src/objective/regression_obj.cc @@ -55,9 +55,7 @@ class RegLossObj : public ObjFunction { avx::Float8 scale(param_.scale_pos_weight); const omp_ulong remainder = n % 8; - int nthread = omp_get_max_threads(); - // Use a maximum of 8 threads -#pragma omp parallel for schedule(static) num_threads(std::min(8, nthread)) +#pragma omp parallel for schedule(static) for (omp_ulong i = 0; i < n - remainder; i += 8) { avx::Float8 y(&info.labels_[i]); avx::Float8 p = Loss::PredTransform(avx::Float8(&preds_h[i])); @@ -77,9 +75,6 @@ class RegLossObj : public ObjFunction { gpair[i] = GradientPair(Loss::FirstOrderGradient(p, y) * w, Loss::SecondOrderGradient(p, y) * w); } - - // Reset omp max threads - omp_set_num_threads(nthread); } const char *DefaultEvalMetric() const override { return Loss::DefaultEvalMetric(); diff --git a/src/objective/regression_obj_gpu.cu b/src/objective/regression_obj_gpu.cu index 2dfc691e3..5d7c21ebd 100644 --- a/src/objective/regression_obj_gpu.cu +++ b/src/objective/regression_obj_gpu.cu @@ -136,7 +136,7 @@ class GPURegLossObj : public ObjFunction { } // run the kernel -#pragma omp parallel for schedule(static, 1) +#pragma omp parallel for schedule(static, 1) if (devices_.Size() > 1) for (int i = 0; i < devices_.Size(); ++i) { int d = devices_[i]; dh::safe_cuda(cudaSetDevice(d)); @@ -173,7 +173,7 @@ class GPURegLossObj : public ObjFunction { } void PredTransformDevice(HostDeviceVector* preds) { -#pragma omp parallel for schedule(static, 1) +#pragma omp parallel for schedule(static, 1) if (devices_.Size() > 1) for (int i = 0; i < devices_.Size(); ++i) { int d = devices_[i]; dh::safe_cuda(cudaSetDevice(d));