Support building XGBoost with CUDA 11 (#5808)

* Change serialization test.
* Add CUDA 11 tests on Linux CI.

Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
Andy Adinets 2020-07-20 01:58:41 +02:00 committed by GitHub
parent ac9136ee49
commit b3d2e7644a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 128 additions and 75 deletions

76
Jenkinsfile vendored
View File

@ -6,6 +6,9 @@
// Command to run command inside a docker container
dockerRun = 'tests/ci_build/ci_build.sh'
// Which CUDA version to use when building reference distribution wheel
ref_cuda_ver = '10.0'
import groovy.transform.Field
@Field
@ -65,8 +68,13 @@ pipeline {
'build-cpu': { BuildCPU() },
'build-cpu-rabit-mock': { BuildCPUMock() },
'build-cpu-non-omp': { BuildCPUNonOmp() },
// Build reference, distribution-ready Python wheel with CUDA 10.0
// using CentOS 6 image
'build-gpu-cuda10.0': { BuildCUDA(cuda_version: '10.0') },
// The build-gpu-* builds below use Ubuntu image
'build-gpu-cuda10.1': { BuildCUDA(cuda_version: '10.1') },
'build-gpu-cuda10.2': { BuildCUDA(cuda_version: '10.2') },
'build-gpu-cuda11.0': { BuildCUDA(cuda_version: '11.0') },
'build-jvm-packages': { BuildJVMPackages(spark_version: '3.0.0') },
'build-jvm-doc': { BuildJVMDoc() }
])
@ -80,11 +88,12 @@ pipeline {
script {
parallel ([
'test-python-cpu': { TestPythonCPU() },
'test-python-gpu-cuda10.0': { TestPythonGPU(cuda_version: '10.0') },
'test-python-gpu-cuda10.1': { TestPythonGPU(cuda_version: '10.1') },
'test-python-mgpu-cuda10.1': { TestPythonGPU(cuda_version: '10.1', multi_gpu: true) },
'test-cpp-gpu': { TestCppGPU(cuda_version: '10.1') },
'test-cpp-mgpu': { TestCppGPU(cuda_version: '10.1', multi_gpu: true) },
'test-python-gpu-cuda10.0': { TestPythonGPU(host_cuda_version: '10.0') },
'test-python-gpu-cuda10.2': { TestPythonGPU(host_cuda_version: '10.2') },
'test-python-gpu-cuda11.0': { TestPythonGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') },
'test-python-mgpu-cuda10.2': { TestPythonGPU(host_cuda_version: '10.2', multi_gpu: true) },
'test-cpp-gpu-cuda10.2': { TestCppGPU(artifact_cuda_version: '10.2', host_cuda_version: '10.2') },
'test-cpp-gpu-cuda11.0': { TestCppGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') },
'test-jvm-jdk8': { CrossTestJVMwithJDK(jdk_version: '8', spark_version: '3.0.0') },
'test-jvm-jdk11': { CrossTestJVMwithJDK(jdk_version: '11') },
'test-jvm-jdk12': { CrossTestJVMwithJDK(jdk_version: '12') },
@ -123,6 +132,10 @@ def checkoutSrcs() {
}
}
def GetCUDABuildContainerType(cuda_version) {
return (cuda_version == ref_cuda_ver) ? 'gpu_build_centos6' : 'gpu_build'
}
def ClangTidy() {
node('linux && cpu_build') {
unstash name: 'srcs'
@ -244,7 +257,7 @@ def BuildCUDA(args) {
node('linux && cpu_build') {
unstash name: 'srcs'
echo "Build with CUDA ${args.cuda_version}"
def container_type = "gpu_build"
def container_type = GetCUDABuildContainerType(args.cuda_version)
def docker_binary = "docker"
def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}"
def arch_flag = ""
@ -254,20 +267,17 @@ def BuildCUDA(args) {
sh """
${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/build_via_cmake.sh -DUSE_CUDA=ON -DUSE_NCCL=ON -DOPEN_MP:BOOL=ON -DHIDE_CXX_SYMBOLS=ON ${arch_flag}
${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "cd python-package && rm -rf dist/* && python setup.py bdist_wheel --universal"
${dockerRun} ${container_type} ${docker_binary} ${docker_args} python3 tests/ci_build/rename_whl.py python-package/dist/*.whl ${commit_id} manylinux2010_x86_64
${dockerRun} ${container_type} ${docker_binary} ${docker_args} python tests/ci_build/rename_whl.py python-package/dist/*.whl ${commit_id} manylinux2010_x86_64
"""
// Stash wheel for CUDA 10.0 target
if (args.cuda_version == '10.0') {
echo 'Stashing Python wheel...'
stash name: 'xgboost_whl_cuda10', includes: 'python-package/dist/*.whl'
if (env.BRANCH_NAME == 'master' || env.BRANCH_NAME.startsWith('release')) {
echo 'Uploading Python wheel...'
path = ("${BRANCH_NAME}" == 'master') ? '' : "${BRANCH_NAME}/"
s3Upload bucket: 'xgboost-nightly-builds', path: path, acl: 'PublicRead', workingDir: 'python-package/dist', includePathPattern:'**/*.whl'
}
echo 'Stashing C++ test executable (testxgboost)...'
stash name: 'xgboost_cpp_tests', includes: 'build/testxgboost'
echo 'Stashing Python wheel...'
stash name: "xgboost_whl_cuda${args.cuda_version}", includes: 'python-package/dist/*.whl'
if (args.cuda_version == ref_cuda_ver && (env.BRANCH_NAME == 'master' || env.BRANCH_NAME.startsWith('release'))) {
echo 'Uploading Python wheel...'
path = ("${BRANCH_NAME}" == 'master') ? '' : "${BRANCH_NAME}/"
s3Upload bucket: 'xgboost-nightly-builds', path: path, acl: 'PublicRead', workingDir: 'python-package/dist', includePathPattern:'**/*.whl'
}
echo 'Stashing C++ test executable (testxgboost)...'
stash name: "xgboost_cpp_tests_cuda${args.cuda_version}", includes: 'build/testxgboost'
deleteDir()
}
}
@ -308,7 +318,7 @@ def BuildJVMDoc() {
def TestPythonCPU() {
node('linux && cpu') {
unstash name: 'xgboost_whl_cuda10'
unstash name: "xgboost_whl_cuda${ref_cuda_ver}"
unstash name: 'srcs'
unstash name: 'xgboost_cli'
echo "Test Python CPU"
@ -322,15 +332,16 @@ def TestPythonCPU() {
}
def TestPythonGPU(args) {
nodeReq = (args.multi_gpu) ? 'linux && mgpu' : 'linux && gpu'
def nodeReq = (args.multi_gpu) ? 'linux && mgpu' : 'linux && gpu'
def artifact_cuda_version = (args.artifact_cuda_version) ?: ref_cuda_ver
node(nodeReq) {
unstash name: 'xgboost_whl_cuda10'
unstash name: 'xgboost_cpp_tests'
unstash name: "xgboost_whl_cuda${artifact_cuda_version}"
unstash name: "xgboost_cpp_tests_cuda${artifact_cuda_version}"
unstash name: 'srcs'
echo "Test Python GPU: CUDA ${args.cuda_version}"
echo "Test Python GPU: CUDA ${args.host_cuda_version}"
def container_type = "gpu"
def docker_binary = "nvidia-docker"
def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}"
def docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}"
if (args.multi_gpu) {
echo "Using multiple GPUs"
sh """
@ -361,21 +372,16 @@ def TestCppRabit() {
}
def TestCppGPU(args) {
nodeReq = (args.multi_gpu) ? 'linux && mgpu' : 'linux && gpu'
def nodeReq = 'linux && mgpu'
def artifact_cuda_version = (args.artifact_cuda_version) ?: ref_cuda_ver
node(nodeReq) {
unstash name: 'xgboost_cpp_tests'
unstash name: "xgboost_cpp_tests_cuda${artifact_cuda_version}"
unstash name: 'srcs'
echo "Test C++, CUDA ${args.cuda_version}"
echo "Test C++, CUDA ${args.host_cuda_version}"
def container_type = "gpu"
def docker_binary = "nvidia-docker"
def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}"
if (args.multi_gpu) {
echo "Using multiple GPUs"
sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} build/testxgboost --gtest_filter=*.MGPU_*"
} else {
echo "Using a single GPU"
sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} build/testxgboost --gtest_filter=-*.MGPU_*"
}
def docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}"
sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} build/testxgboost"
deleteDir()
}
}

View File

@ -9,7 +9,9 @@ if (USE_CUDA)
file(GLOB_RECURSE CUDA_SOURCES *.cu *.cuh)
target_sources(objxgboost PRIVATE ${CUDA_SOURCES})
target_compile_definitions(objxgboost PRIVATE -DXGBOOST_USE_CUDA=1)
target_include_directories(objxgboost PRIVATE ${xgboost_SOURCE_DIR}/cub/)
if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.0)
target_include_directories(objxgboost PRIVATE ${xgboost_SOURCE_DIR}/cub/)
endif (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.0)
target_compile_options(objxgboost PRIVATE
$<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda>
$<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>

View File

@ -1,53 +1,30 @@
ARG CUDA_VERSION
FROM nvidia/cuda:$CUDA_VERSION-devel-centos6
FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu16.04
ARG CUDA_VERSION
# Environment
ENV DEBIAN_FRONTEND noninteractive
ENV DEVTOOLSET_URL_ROOT http://vault.centos.org/6.9/sclo/x86_64/rh/devtoolset-4/
SHELL ["/bin/bash", "-c"] # Use Bash as shell
# Install all basic requirements
RUN \
yum -y update && \
yum install -y tar unzip wget xz git centos-release-scl yum-utils && \
yum-config-manager --enable centos-sclo-rh-testing && \
yum -y update && \
yum install -y $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-5.3.1-6.1.el6.x86_64.rpm \
$DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-c++-5.3.1-6.1.el6.x86_64.rpm \
$DEVTOOLSET_URL_ROOT/devtoolset-4-binutils-2.25.1-8.el6.x86_64.rpm \
$DEVTOOLSET_URL_ROOT/devtoolset-4-runtime-4.1-3.sc1.el6.x86_64.rpm \
$DEVTOOLSET_URL_ROOT/devtoolset-4-libstdc++-devel-5.3.1-6.1.el6.x86_64.rpm && \
# Python
wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash Miniconda3.sh -b -p /opt/python && \
apt-get update && \
apt-get install -y tar unzip wget bzip2 libgomp1 git build-essential doxygen graphviz llvm libasan2 libidn11 liblz4-dev ninja-build && \
# CMake
wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
# Ninja
mkdir -p /usr/local && \
cd /usr/local/ && \
wget -nv -nc https://github.com/ninja-build/ninja/archive/v1.10.0.tar.gz --no-check-certificate && \
tar xf v1.10.0.tar.gz && mv ninja-1.10.0 ninja && rm -v v1.10.0.tar.gz && \
cd ninja && \
python ./configure.py --bootstrap
# Python
wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash Miniconda3.sh -b -p /opt/python
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
RUN \
export CUDA_SHORT=`echo $CUDA_VERSION | egrep -o '[0-9]+\.[0-9]'` && \
export NCCL_VERSION=2.4.8-1 && \
wget https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
yum -y update && \
yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT} && \
rm -f nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm;
export NCCL_VERSION=2.7.5-1 && \
apt-get update && \
apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT}
ENV PATH=/opt/python/bin:/usr/local/ninja:$PATH
ENV CC=/opt/rh/devtoolset-4/root/usr/bin/gcc
ENV CXX=/opt/rh/devtoolset-4/root/usr/bin/c++
ENV CPP=/opt/rh/devtoolset-4/root/usr/bin/cpp
# Install Python packages
RUN \
pip install numpy pytest scipy scikit-learn wheel kubernetes urllib3==1.22
ENV PATH=/opt/python/bin:$PATH
ENV GOSU_VERSION 1.10

View File

@ -0,0 +1,62 @@
ARG CUDA_VERSION
FROM nvidia/cuda:$CUDA_VERSION-devel-centos6
ARG CUDA_VERSION
# Environment
ENV DEBIAN_FRONTEND noninteractive
ENV DEVTOOLSET_URL_ROOT http://vault.centos.org/6.9/sclo/x86_64/rh/devtoolset-4/
# Install all basic requirements
RUN \
yum -y update && \
yum install -y tar unzip wget xz git centos-release-scl yum-utils && \
yum-config-manager --enable centos-sclo-rh-testing && \
yum -y update && \
yum install -y $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-5.3.1-6.1.el6.x86_64.rpm \
$DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-c++-5.3.1-6.1.el6.x86_64.rpm \
$DEVTOOLSET_URL_ROOT/devtoolset-4-binutils-2.25.1-8.el6.x86_64.rpm \
$DEVTOOLSET_URL_ROOT/devtoolset-4-runtime-4.1-3.sc1.el6.x86_64.rpm \
$DEVTOOLSET_URL_ROOT/devtoolset-4-libstdc++-devel-5.3.1-6.1.el6.x86_64.rpm && \
# Python
wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash Miniconda3.sh -b -p /opt/python && \
# CMake
wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \
bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
# Ninja
mkdir -p /usr/local && \
cd /usr/local/ && \
wget -nv -nc https://github.com/ninja-build/ninja/archive/v1.10.0.tar.gz --no-check-certificate && \
tar xf v1.10.0.tar.gz && mv ninja-1.10.0 ninja && rm -v v1.10.0.tar.gz && \
cd ninja && \
python ./configure.py --bootstrap
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
RUN \
export CUDA_SHORT=`echo $CUDA_VERSION | egrep -o '[0-9]+\.[0-9]'` && \
export NCCL_VERSION=2.4.8-1 && \
wget https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
yum -y update && \
yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT} && \
rm -f nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm;
ENV PATH=/opt/python/bin:/usr/local/ninja:$PATH
ENV CC=/opt/rh/devtoolset-4/root/usr/bin/gcc
ENV CXX=/opt/rh/devtoolset-4/root/usr/bin/c++
ENV CPP=/opt/rh/devtoolset-4/root/usr/bin/cpp
ENV GOSU_VERSION 1.10
# Install lightweight sudo (not bound to TTY)
RUN set -ex; \
wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
chmod +x /usr/local/bin/gosu && \
gosu nobody true
# Default entry-point to use if running locally
# It will preserve attributes of created files
COPY entrypoint.sh /scripts/
WORKDIR /workspace
ENTRYPOINT ["/scripts/entrypoint.sh"]

View File

@ -187,6 +187,10 @@ then
# that is associated with the particular branch or pull request
echo "docker tag ${DOCKER_IMG_NAME} ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${BRANCH_NAME}"
docker tag "${DOCKER_IMG_NAME}" "${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${BRANCH_NAME}"
echo "python3 -m awscli ecr create-repository --repository-name ${DOCKER_IMG_NAME} --region ${DOCKER_CACHE_ECR_REGION} || true"
python3 -m awscli ecr create-repository --repository-name ${DOCKER_IMG_NAME} --region ${DOCKER_CACHE_ECR_REGION} || true
echo "docker push ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${BRANCH_NAME}"
docker push "${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${BRANCH_NAME}"
if [[ $? != "0" ]]; then

View File

@ -19,8 +19,10 @@ target_link_libraries(testxgboost PRIVATE objxgboost)
if (USE_CUDA)
# OpenMP is mandatory for CUDA
find_package(OpenMP REQUIRED)
target_include_directories(testxgboost PRIVATE
${xgboost_SOURCE_DIR}/cub/)
if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.0)
target_include_directories(testxgboost PRIVATE
${xgboost_SOURCE_DIR}/cub/)
endif (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.0)
target_compile_options(testxgboost PRIVATE
$<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda>
$<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>

View File

@ -148,8 +148,8 @@ void TestLearnerSerialization(Args args, FeatureMap const& fmap, std::shared_ptr
// Binary is not tested, as it is NOT reproducible.
class SerializationTest : public ::testing::Test {
protected:
size_t constexpr static kRows = 10;
size_t constexpr static kCols = 10;
size_t constexpr static kRows = 15;
size_t constexpr static kCols = 15;
std::shared_ptr<DMatrix> p_dmat_;
FeatureMap fmap_;