Fix non-openmp build. (#5566)

* Add test to Jenkins.
* Fix threading utils tests.
* Require thread library.
This commit is contained in:
Jiaming Yuan 2020-04-20 12:16:38 +08:00 committed by GitHub
parent b2827a80e1
commit ccd30e4491
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 52 additions and 24 deletions

View File

@ -95,6 +95,8 @@ if (USE_CUDA)
message(STATUS "CUDA GEN_CODE: ${GEN_CODE}") message(STATUS "CUDA GEN_CODE: ${GEN_CODE}")
endif (USE_CUDA) endif (USE_CUDA)
find_package(Threads REQUIRED)
if (USE_OPENMP) if (USE_OPENMP)
if (APPLE) if (APPLE)
# Require CMake 3.16+ on Mac OSX, as previous versions of CMake had trouble locating # Require CMake 3.16+ on Mac OSX, as previous versions of CMake had trouble locating
@ -128,7 +130,7 @@ foreach(lib rabit rabit_base rabit_empty rabit_mock rabit_mock_static)
# Explicitly link dmlc to rabit, so that configured header (build_config.h) # Explicitly link dmlc to rabit, so that configured header (build_config.h)
# from dmlc is correctly applied to rabit. # from dmlc is correctly applied to rabit.
if (TARGET ${lib}) if (TARGET ${lib})
target_link_libraries(${lib} dmlc) target_link_libraries(${lib} dmlc ${CMAKE_THREAD_LIBS_INIT})
endif (TARGET ${lib}) endif (TARGET ${lib})
endforeach() endforeach()
@ -138,6 +140,7 @@ if (R_LIB)
endif (R_LIB) endif (R_LIB)
# core xgboost # core xgboost
list(APPEND LINKED_LIBRARIES_PRIVATE Threads::Threads ${CMAKE_THREAD_LIBS_INIT})
add_subdirectory(${xgboost_SOURCE_DIR}/plugin) add_subdirectory(${xgboost_SOURCE_DIR}/plugin)
add_subdirectory(${xgboost_SOURCE_DIR}/src) add_subdirectory(${xgboost_SOURCE_DIR}/src)
target_link_libraries(objxgboost PUBLIC dmlc) target_link_libraries(objxgboost PUBLIC dmlc)

21
Jenkinsfile vendored
View File

@ -63,6 +63,7 @@ pipeline {
parallel ([ parallel ([
'build-cpu': { BuildCPU() }, 'build-cpu': { BuildCPU() },
'build-cpu-rabit-mock': { BuildCPUMock() }, 'build-cpu-rabit-mock': { BuildCPUMock() },
'build-cpu-non-omp': { BuildCPUNonOmp() },
'build-gpu-cuda9.0': { BuildCUDA(cuda_version: '9.0') }, 'build-gpu-cuda9.0': { BuildCUDA(cuda_version: '9.0') },
'build-gpu-cuda10.0': { BuildCUDA(cuda_version: '10.0') }, 'build-gpu-cuda10.0': { BuildCUDA(cuda_version: '10.0') },
'build-gpu-cuda10.1': { BuildCUDA(cuda_version: '10.1') }, 'build-gpu-cuda10.1': { BuildCUDA(cuda_version: '10.1') },
@ -215,12 +216,28 @@ def BuildCPUMock() {
sh """ sh """
${dockerRun} ${container_type} ${docker_binary} tests/ci_build/build_mock_cmake.sh ${dockerRun} ${container_type} ${docker_binary} tests/ci_build/build_mock_cmake.sh
""" """
echo 'Stashing rabit C++ test executable (xgboost)...' echo 'Stashing rabit C++ test executable (xgboost)...'
stash name: 'xgboost_rabit_tests', includes: 'xgboost' stash name: 'xgboost_rabit_tests', includes: 'xgboost'
deleteDir() deleteDir()
} }
} }
def BuildCPUNonOmp() {
node('linux && cpu') {
unstash name: 'srcs'
echo "Build CPU without OpenMP"
def container_type = "cpu"
def docker_binary = "docker"
sh """
${dockerRun} ${container_type} ${docker_binary} tests/ci_build/build_via_cmake.sh -DUSE_OPENMP=OFF
"""
echo "Running Non-OpenMP C++ test..."
sh """
${dockerRun} ${container_type} ${docker_binary} build/testxgboost
"""
deleteDir()
}
}
def BuildCUDA(args) { def BuildCUDA(args) {
node('linux && cpu') { node('linux && cpu') {
@ -259,7 +276,7 @@ def BuildJVMPackages(args) {
${docker_extra_params} ${dockerRun} ${container_type} ${docker_binary} tests/ci_build/build_jvm_packages.sh ${args.spark_version} ${docker_extra_params} ${dockerRun} ${container_type} ${docker_binary} tests/ci_build/build_jvm_packages.sh ${args.spark_version}
""" """
echo 'Stashing XGBoost4J JAR...' echo 'Stashing XGBoost4J JAR...'
stash name: 'xgboost4j_jar', includes: 'jvm-packages/xgboost4j/target/*.jar,jvm-packages/xgboost4j-spark/target/*.jar,jvm-packages/xgboost4j-example/target/*.jar' stash name: 'xgboost4j_jar', includes: "jvm-packages/xgboost4j/target/*.jar,jvm-packages/xgboost4j-spark/target/*.jar,jvm-packages/xgboost4j-example/target/*.jar"
deleteDir() deleteDir()
} }
} }

View File

@ -88,12 +88,15 @@ if (XGBOOST_BUILTIN_PREFETCH_PRESENT)
-DXGBOOST_BUILTIN_PREFETCH_PRESENT=1) -DXGBOOST_BUILTIN_PREFETCH_PRESENT=1)
endif (XGBOOST_BUILTIN_PREFETCH_PRESENT) endif (XGBOOST_BUILTIN_PREFETCH_PRESENT)
find_package(Threads REQUIRED)
list(APPEND SRC_LIBS Threads::Threads ${CMAKE_THREAD_LIBS_INIT})
if (USE_OPENMP OR USE_CUDA) # CUDA requires OpenMP if (USE_OPENMP OR USE_CUDA) # CUDA requires OpenMP
find_package(OpenMP REQUIRED) find_package(OpenMP REQUIRED)
list(APPEND SRC_LIBS OpenMP::OpenMP_CXX) list(APPEND SRC_LIBS OpenMP::OpenMP_CXX)
set(LINKED_LIBRARIES_PRIVATE "${LINKED_LIBRARIES_PRIVATE};${SRC_LIBS}" PARENT_SCOPE)
target_link_libraries(objxgboost PRIVATE OpenMP::OpenMP_CXX) target_link_libraries(objxgboost PRIVATE OpenMP::OpenMP_CXX)
endif (USE_OPENMP OR USE_CUDA) endif (USE_OPENMP OR USE_CUDA)
set(LINKED_LIBRARIES_PRIVATE "${LINKED_LIBRARIES_PRIVATE};${SRC_LIBS}" PARENT_SCOPE)
# For MSVC: Call msvc_use_static_runtime() once again to completely # For MSVC: Call msvc_use_static_runtime() once again to completely
# replace /MD with /MT. See https://github.com/dmlc/xgboost/issues/4462 # replace /MD with /MT. See https://github.com/dmlc/xgboost/issues/4462

View File

@ -227,8 +227,7 @@ class ColumnMatrix {
/* missing values make sense only for column with type kDenseColumn, /* missing values make sense only for column with type kDenseColumn,
and if no missing values were observed it could be handled much faster. */ and if no missing values were observed it could be handled much faster. */
if (noMissingValues) { if (noMissingValues) {
const int32_t nthread = omp_get_max_threads(); // NOLINT #pragma omp parallel for num_threads(omp_get_max_threads())
#pragma omp parallel for num_threads(nthread)
for (omp_ulong rid = 0; rid < nrow; ++rid) { for (omp_ulong rid = 0; rid < nrow; ++rid) {
const size_t ibegin = rid*nfeature; const size_t ibegin = rid*nfeature;
const size_t iend = (rid+1)*nfeature; const size_t iend = (rid+1)*nfeature;

View File

@ -110,10 +110,12 @@ class BlockedSpace2d {
// Wrapper to implement nested parallelism with simple omp parallel for // Wrapper to implement nested parallelism with simple omp parallel for
template<typename Func> template<typename Func>
void ParallelFor2d(const BlockedSpace2d& space, const int nthreads, Func func) { void ParallelFor2d(const BlockedSpace2d& space, int nthreads, Func func) {
const size_t num_blocks_in_space = space.Size(); const size_t num_blocks_in_space = space.Size();
nthreads = std::min(nthreads, omp_get_max_threads());
nthreads = std::max(nthreads, 1);
#pragma omp parallel num_threads(nthreads) #pragma omp parallel num_threads(nthreads)
{ {
size_t tid = omp_get_thread_num(); size_t tid = omp_get_thread_num();
size_t chunck_size = num_blocks_in_space / nthreads + !!(num_blocks_in_space % nthreads); size_t chunck_size = num_blocks_in_space / nthreads + !!(num_blocks_in_space % nthreads);

View File

@ -189,12 +189,7 @@ struct EvalAuc : public Metric {
rec[j - gptr[group_id]] = {h_preds[j], j}; rec[j - gptr[group_id]] = {h_preds[j], j};
} }
if (omp_in_parallel()) { XGBOOST_PARALLEL_SORT(rec.begin(), rec.end(), common::CmpFirst);
std::stable_sort(rec.begin(), rec.end(), common::CmpFirst);
} else {
XGBOOST_PARALLEL_SORT(rec.begin(), rec.end(), common::CmpFirst);
}
// calculate AUC // calculate AUC
double sum_pospair = 0.0; double sum_pospair = 0.0;
double sum_npos = 0.0, sum_nneg = 0.0, buf_pos = 0.0, buf_neg = 0.0; double sum_npos = 0.0, sum_nneg = 0.0, buf_pos = 0.0, buf_neg = 0.0;
@ -557,11 +552,7 @@ struct EvalAucPR : public Metric {
continue; continue;
} }
if (omp_in_parallel()) { XGBOOST_PARALLEL_SORT(rec.begin(), rec.end(), common::CmpFirst);
std::stable_sort(rec.begin(), rec.end(), common::CmpFirst);
} else {
XGBOOST_PARALLEL_SORT(rec.begin(), rec.end(), common::CmpFirst);
}
// calculate AUC // calculate AUC
double tp = 0.0, prevtp = 0.0, fp = 0.0, prevfp = 0.0, h = 0.0, a = 0.0, b = 0.0; double tp = 0.0, prevtp = 0.0, fp = 0.0, prevfp = 0.0, h = 0.0, a = 0.0, b = 0.0;

View File

@ -69,8 +69,8 @@ set_target_properties(
target_link_libraries(testxgboost target_link_libraries(testxgboost
PRIVATE PRIVATE
${GTEST_LIBRARIES} ${GTEST_LIBRARIES}
${LINKED_LIBRARIES_PRIVATE} ${LINKED_LIBRARIES_PRIVATE})
OpenMP::OpenMP_CXX)
target_compile_definitions(testxgboost PRIVATE ${XGBOOST_DEFINITIONS}) target_compile_definitions(testxgboost PRIVATE ${XGBOOST_DEFINITIONS})
set_output_directory(testxgboost ${xgboost_BINARY_DIR}) set_output_directory(testxgboost ${xgboost_BINARY_DIR})

View File

@ -38,7 +38,10 @@ TEST(ParallelFor2d, Test) {
return kDim2; return kDim2;
}, kGrainSize); }, kGrainSize);
ParallelFor2d(space, 4, [&](size_t i, Range1d r) { auto old = omp_get_max_threads();
omp_set_num_threads(4);
ParallelFor2d(space, omp_get_max_threads(), [&](size_t i, Range1d r) {
for (auto j = r.begin(); j < r.end(); ++j) { for (auto j = r.begin(); j < r.end(); ++j) {
matrix[i*kDim2 + j] += 1; matrix[i*kDim2 + j] += 1;
} }
@ -47,12 +50,17 @@ TEST(ParallelFor2d, Test) {
for (size_t i = 0; i < kDim1 * kDim2; i++) { for (size_t i = 0; i < kDim1 * kDim2; i++) {
ASSERT_EQ(matrix[i], 1); ASSERT_EQ(matrix[i], 1);
} }
omp_set_num_threads(old);
} }
TEST(ParallelFor2dNonUniform, Test) { TEST(ParallelFor2dNonUniform, Test) {
constexpr size_t kDim1 = 5; constexpr size_t kDim1 = 5;
constexpr size_t kGrainSize = 256; constexpr size_t kGrainSize = 256;
auto old = omp_get_max_threads();
omp_set_num_threads(4);
// here are quite non-uniform distribution in space // here are quite non-uniform distribution in space
// but ParallelFor2d should split them by blocks with max size = kGrainSize // but ParallelFor2d should split them by blocks with max size = kGrainSize
// and process in balanced manner (optimal performance) // and process in balanced manner (optimal performance)
@ -66,7 +74,7 @@ TEST(ParallelFor2dNonUniform, Test) {
working_space[i].resize(dim2[i], 0); working_space[i].resize(dim2[i], 0);
} }
ParallelFor2d(space, 4, [&](size_t i, Range1d r) { ParallelFor2d(space, omp_get_max_threads(), [&](size_t i, Range1d r) {
for (auto j = r.begin(); j < r.end(); ++j) { for (auto j = r.begin(); j < r.end(); ++j) {
working_space[i][j] += 1; working_space[i][j] += 1;
} }
@ -77,6 +85,8 @@ TEST(ParallelFor2dNonUniform, Test) {
ASSERT_EQ(working_space[i][j], 1); ASSERT_EQ(working_space[i][j], 1);
} }
} }
omp_set_num_threads(old);
} }
} // namespace common } // namespace common

View File

@ -301,9 +301,12 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrix(
batch_count++; batch_count++;
row_count += batch.Size(); row_count += batch.Size();
} }
#if defined(_OPENMP)
EXPECT_GE(batch_count, 2); EXPECT_GE(batch_count, 2);
EXPECT_EQ(row_count, dmat->Info().num_row_); EXPECT_EQ(row_count, dmat->Info().num_row_);
#else
#warning "External memory doesn't work with Non-OpenMP build "
#endif // defined(_OPENMP)
return dmat; return dmat;
} }