Fix rmm build (#7973)

- Optionally switch to c++17
- Use rmm CMake target.
- Workaround compiler errors.
- Fix GPUMetric inheritance.
- Run death tests even if it's built with RMM support.

Co-authored-by: jakirkham <jakirkham@gmail.com>
This commit is contained in:
Jiaming Yuan 2022-06-06 20:18:32 +08:00 committed by GitHub
parent 1ced638165
commit d48123d23b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 75 additions and 66 deletions

View File

@ -206,6 +206,10 @@ endif (JVM_BINDINGS)
# Plugin # Plugin
add_subdirectory(${xgboost_SOURCE_DIR}/plugin) add_subdirectory(${xgboost_SOURCE_DIR}/plugin)
if (PLUGIN_RMM)
find_package(rmm REQUIRED)
endif (PLUGIN_RMM)
#-- library #-- library
if (BUILD_STATIC_LIB) if (BUILD_STATIC_LIB)
add_library(xgboost STATIC) add_library(xgboost STATIC)

4
Jenkinsfile vendored
View File

@ -397,7 +397,7 @@ def TestCppGPU(args) {
node(nodeReq) { node(nodeReq) {
unstash name: "xgboost_cpp_tests_cuda${artifact_cuda_version}" unstash name: "xgboost_cpp_tests_cuda${artifact_cuda_version}"
unstash name: 'srcs' unstash name: 'srcs'
echo "Test C++, CUDA ${args.host_cuda_version}" echo "Test C++, CUDA ${args.host_cuda_version}, rmm: ${args.test_rmm}"
def container_type = "gpu" def container_type = "gpu"
def docker_binary = "nvidia-docker" def docker_binary = "nvidia-docker"
def docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}" def docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}"
@ -410,7 +410,7 @@ def TestCppGPU(args) {
docker_binary = "nvidia-docker" docker_binary = "nvidia-docker"
docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}" docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}"
sh """ sh """
${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "source activate gpu_test && build/testxgboost --use-rmm-pool --gtest_filter=-*DeathTest.*" ${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "source activate gpu_test && build/testxgboost --use-rmm-pool"
""" """
} }
deleteDir() deleteDir()

View File

@ -169,10 +169,17 @@ function(xgboost_set_cuda_flags target)
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=/utf-8>) $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=/utf-8>)
endif (MSVC) endif (MSVC)
if (PLUGIN_RMM)
set_target_properties(${target} PROPERTIES
CUDA_STANDARD 17
CUDA_STANDARD_REQUIRED ON
CUDA_SEPARABLE_COMPILATION OFF)
else ()
set_target_properties(${target} PROPERTIES set_target_properties(${target} PROPERTIES
CUDA_STANDARD 14 CUDA_STANDARD 14
CUDA_STANDARD_REQUIRED ON CUDA_STANDARD_REQUIRED ON
CUDA_SEPARABLE_COMPILATION OFF) CUDA_SEPARABLE_COMPILATION OFF)
endif (PLUGIN_RMM)
endfunction(xgboost_set_cuda_flags) endfunction(xgboost_set_cuda_flags)
macro(xgboost_link_nccl target) macro(xgboost_link_nccl target)
@ -189,10 +196,18 @@ endmacro(xgboost_link_nccl)
# compile options # compile options
macro(xgboost_target_properties target) macro(xgboost_target_properties target)
if (PLUGIN_RMM)
set_target_properties(${target} PROPERTIES
CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON
POSITION_INDEPENDENT_CODE ON)
else ()
set_target_properties(${target} PROPERTIES set_target_properties(${target} PROPERTIES
CXX_STANDARD 14 CXX_STANDARD 14
CXX_STANDARD_REQUIRED ON CXX_STANDARD_REQUIRED ON
POSITION_INDEPENDENT_CODE ON) POSITION_INDEPENDENT_CODE ON)
endif (PLUGIN_RMM)
if (HIDE_CXX_SYMBOLS) if (HIDE_CXX_SYMBOLS)
#-- Hide all C++ symbols #-- Hide all C++ symbols
set_target_properties(${target} PROPERTIES set_target_properties(${target} PROPERTIES
@ -247,6 +262,10 @@ macro(xgboost_target_defs target)
PRIVATE PRIVATE
-DXGBOOST_BUILTIN_PREFETCH_PRESENT=1) -DXGBOOST_BUILTIN_PREFETCH_PRESENT=1)
endif (XGBOOST_BUILTIN_PREFETCH_PRESENT) endif (XGBOOST_BUILTIN_PREFETCH_PRESENT)
if (PLUGIN_RMM)
target_compile_definitions(objxgboost PUBLIC -DXGBOOST_USE_RMM=1)
endif (PLUGIN_RMM)
endmacro(xgboost_target_defs) endmacro(xgboost_target_defs)
# handles dependencies # handles dependencies
@ -269,6 +288,10 @@ macro(xgboost_target_link_libraries target)
xgboost_set_cuda_flags(${target}) xgboost_set_cuda_flags(${target})
endif (USE_CUDA) endif (USE_CUDA)
if (PLUGIN_RMM)
target_link_libraries(${target} PRIVATE rmm::rmm)
endif (PLUGIN_RMM)
if (USE_NCCL) if (USE_NCCL)
xgboost_link_nccl(${target}) xgboost_link_nccl(${target})
endif (USE_NCCL) endif (USE_NCCL)

View File

@ -2,19 +2,6 @@ if (PLUGIN_DENSE_PARSER)
target_sources(objxgboost PRIVATE ${xgboost_SOURCE_DIR}/plugin/dense_parser/dense_libsvm.cc) target_sources(objxgboost PRIVATE ${xgboost_SOURCE_DIR}/plugin/dense_parser/dense_libsvm.cc)
endif (PLUGIN_DENSE_PARSER) endif (PLUGIN_DENSE_PARSER)
if (PLUGIN_RMM)
find_path(RMM_INCLUDE "rmm" HINTS "$ENV{RMM_ROOT}/include")
if (NOT RMM_INCLUDE)
message(FATAL_ERROR "Could not locate RMM library")
endif ()
message(STATUS "RMM: RMM_LIBRARY set to ${RMM_LIBRARY}")
message(STATUS "RMM: RMM_INCLUDE set to ${RMM_INCLUDE}")
target_include_directories(objxgboost PUBLIC ${RMM_INCLUDE})
target_compile_definitions(objxgboost PUBLIC -DXGBOOST_USE_RMM=1)
endif (PLUGIN_RMM)
if (PLUGIN_UPDATER_ONEAPI) if (PLUGIN_UPDATER_ONEAPI)
add_library(oneapi_plugin OBJECT add_library(oneapi_plugin OBJECT
${xgboost_SOURCE_DIR}/plugin/updater_oneapi/regression_obj_oneapi.cc ${xgboost_SOURCE_DIR}/plugin/updater_oneapi/regression_obj_oneapi.cc

View File

@ -130,12 +130,12 @@ void MetaInfo::SetInfoFromCUDA(Context const&, StringView key, Json array) {
} }
// uint info // uint info
if (key == "group") { if (key == "group") {
auto array_interface{ArrayInterface<1>(array)}; ArrayInterface<1> array_interface{array};
CopyGroupInfoImpl(array_interface, &group_ptr_); CopyGroupInfoImpl(array_interface, &group_ptr_);
data::ValidateQueryGroup(group_ptr_); data::ValidateQueryGroup(group_ptr_);
return; return;
} else if (key == "qid") { } else if (key == "qid") {
auto array_interface{ArrayInterface<1>(array)}; ArrayInterface<1> array_interface{array};
CopyQidImpl(array_interface, &group_ptr_); CopyQidImpl(array_interface, &group_ptr_);
data::ValidateQueryGroup(group_ptr_); data::ValidateQueryGroup(group_ptr_);
return; return;

View File

@ -1,5 +1,5 @@
/*! /*!
* Copyright 2018-2020 by Contributors * Copyright 2018-2022 by Contributors
* \file metric_common.h * \file metric_common.h
*/ */
#ifndef XGBOOST_METRIC_METRIC_COMMON_H_ #ifndef XGBOOST_METRIC_METRIC_COMMON_H_
@ -9,6 +9,7 @@
#include <string> #include <string>
#include "../common/common.h" #include "../common/common.h"
#include "xgboost/metric.h"
namespace xgboost { namespace xgboost {

View File

@ -27,7 +27,7 @@ DMLC_REGISTRY_FILE_TAG(rank_metric_gpu);
/*! \brief Evaluate rank list on GPU */ /*! \brief Evaluate rank list on GPU */
template <typename EvalMetricT> template <typename EvalMetricT>
struct EvalRankGpu : public Metric, public EvalRankConfig { struct EvalRankGpu : public GPUMetric, public EvalRankConfig {
public: public:
double Eval(const HostDeviceVector<bst_float> &preds, const MetaInfo &info, double Eval(const HostDeviceVector<bst_float> &preds, const MetaInfo &info,
bool distributed) override { bool distributed) override {

View File

@ -13,10 +13,7 @@ RUN \
apt-get install -y wget unzip bzip2 libgomp1 build-essential ninja-build git && \ apt-get install -y wget unzip bzip2 libgomp1 build-essential ninja-build git && \
# Python # Python
wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash Miniconda3.sh -b -p /opt/python && \ bash Miniconda3.sh -b -p /opt/python
# CMake
wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
RUN \ RUN \
@ -30,7 +27,7 @@ ENV PATH=/opt/python/bin:$PATH
# Create new Conda environment with RMM # Create new Conda environment with RMM
RUN \ RUN \
conda create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \ conda create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
python=3.8 rmm=21.10* cudatoolkit=$CUDA_VERSION_ARG python=3.9 rmm=22.06* cudatoolkit=$CUDA_VERSION_ARG cmake
ENV GOSU_VERSION 1.10 ENV GOSU_VERSION 1.10

View File

@ -252,7 +252,7 @@ __global__ void TestLastStaticKernel(Span<float> _span) {
_span.last(static_cast<Span<float>::index_type>(-1)); _span.last(static_cast<Span<float>::index_type>(-1));
} }
TEST(GPUSpan, FirstLast) { TEST(GPUSpanDeathTest, FirstLast) {
// We construct vectors multiple times since thrust can not recover from // We construct vectors multiple times since thrust can not recover from
// death test. // death test.
auto lambda_first_dy = []() { auto lambda_first_dy = []() {
@ -312,40 +312,37 @@ TEST(GPUSpan, FirstLast) {
output = testing::internal::GetCapturedStdout(); output = testing::internal::GetCapturedStdout();
} }
__global__ void TestFrontKernel(Span<float> _span) { namespace {
_span.front(); void TestFrontBack() {
}
__global__ void TestBackKernel(Span<float> _span) {
_span.back();
}
TEST(GPUSpan, FrontBack) {
dh::safe_cuda(cudaSetDevice(0));
Span<float> s; Span<float> s;
auto lambda_test_front = [=]() { EXPECT_DEATH(
{
// make sure the termination happens inside this test. // make sure the termination happens inside this test.
try { try {
TestFrontKernel<<<1, 1>>>(s); dh::LaunchN(1, [=] __device__(size_t) { s.front(); });
dh::safe_cuda(cudaDeviceSynchronize()); dh::safe_cuda(cudaDeviceSynchronize());
dh::safe_cuda(cudaGetLastError()); dh::safe_cuda(cudaGetLastError());
} catch (dmlc::Error const& e) { } catch (dmlc::Error const& e) {
std::terminate(); std::terminate();
} }
}; },
EXPECT_DEATH(lambda_test_front(), ""); "");
EXPECT_DEATH(
auto lambda_test_back = [=]() { {
try { try {
TestBackKernel<<<1, 1>>>(s); dh::LaunchN(1, [=] __device__(size_t) { s.back(); });
dh::safe_cuda(cudaDeviceSynchronize()); dh::safe_cuda(cudaDeviceSynchronize());
dh::safe_cuda(cudaGetLastError()); dh::safe_cuda(cudaGetLastError());
} catch (dmlc::Error const& e) { } catch (dmlc::Error const& e) {
std::terminate(); std::terminate();
} }
}; },
EXPECT_DEATH(lambda_test_back(), ""); "");
}
} // namespace
TEST(GPUSpanDeathTest, FrontBack) {
TestFrontBack();
} }
__global__ void TestSubspanDynamicKernel(Span<float> _span) { __global__ void TestSubspanDynamicKernel(Span<float> _span) {
@ -354,7 +351,7 @@ __global__ void TestSubspanDynamicKernel(Span<float> _span) {
__global__ void TestSubspanStaticKernel(Span<float> _span) { __global__ void TestSubspanStaticKernel(Span<float> _span) {
_span.subspan<16>(); _span.subspan<16>();
} }
TEST(GPUSpan, Subspan) { TEST(GPUSpanDeathTest, Subspan) {
auto lambda_subspan_dynamic = []() { auto lambda_subspan_dynamic = []() {
thrust::host_vector<float> h_vec (4); thrust::host_vector<float> h_vec (4);
InitializeRange(h_vec.begin(), h_vec.end()); InitializeRange(h_vec.begin(), h_vec.end());