Support building XGBoost with CUDA 11 (#5808)
* Change serialization test. * Add CUDA 11 tests on Linux CI. Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
@@ -1,53 +1,30 @@
|
||||
ARG CUDA_VERSION
|
||||
FROM nvidia/cuda:$CUDA_VERSION-devel-centos6
|
||||
FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu16.04
|
||||
ARG CUDA_VERSION
|
||||
|
||||
# Environment
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
ENV DEVTOOLSET_URL_ROOT http://vault.centos.org/6.9/sclo/x86_64/rh/devtoolset-4/
|
||||
SHELL ["/bin/bash", "-c"] # Use Bash as shell
|
||||
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
yum -y update && \
|
||||
yum install -y tar unzip wget xz git centos-release-scl yum-utils && \
|
||||
yum-config-manager --enable centos-sclo-rh-testing && \
|
||||
yum -y update && \
|
||||
yum install -y $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-5.3.1-6.1.el6.x86_64.rpm \
|
||||
$DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-c++-5.3.1-6.1.el6.x86_64.rpm \
|
||||
$DEVTOOLSET_URL_ROOT/devtoolset-4-binutils-2.25.1-8.el6.x86_64.rpm \
|
||||
$DEVTOOLSET_URL_ROOT/devtoolset-4-runtime-4.1-3.sc1.el6.x86_64.rpm \
|
||||
$DEVTOOLSET_URL_ROOT/devtoolset-4-libstdc++-devel-5.3.1-6.1.el6.x86_64.rpm && \
|
||||
# Python
|
||||
wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
|
||||
bash Miniconda3.sh -b -p /opt/python && \
|
||||
apt-get update && \
|
||||
apt-get install -y tar unzip wget bzip2 libgomp1 git build-essential doxygen graphviz llvm libasan2 libidn11 liblz4-dev ninja-build && \
|
||||
# CMake
|
||||
wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \
|
||||
bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
|
||||
# Ninja
|
||||
mkdir -p /usr/local && \
|
||||
cd /usr/local/ && \
|
||||
wget -nv -nc https://github.com/ninja-build/ninja/archive/v1.10.0.tar.gz --no-check-certificate && \
|
||||
tar xf v1.10.0.tar.gz && mv ninja-1.10.0 ninja && rm -v v1.10.0.tar.gz && \
|
||||
cd ninja && \
|
||||
python ./configure.py --bootstrap
|
||||
# Python
|
||||
wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
|
||||
bash Miniconda3.sh -b -p /opt/python
|
||||
|
||||
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
|
||||
RUN \
|
||||
export CUDA_SHORT=`echo $CUDA_VERSION | egrep -o '[0-9]+\.[0-9]'` && \
|
||||
export NCCL_VERSION=2.4.8-1 && \
|
||||
wget https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
|
||||
rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
|
||||
yum -y update && \
|
||||
yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT} && \
|
||||
rm -f nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm;
|
||||
export NCCL_VERSION=2.7.5-1 && \
|
||||
apt-get update && \
|
||||
apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT}
|
||||
|
||||
ENV PATH=/opt/python/bin:/usr/local/ninja:$PATH
|
||||
ENV CC=/opt/rh/devtoolset-4/root/usr/bin/gcc
|
||||
ENV CXX=/opt/rh/devtoolset-4/root/usr/bin/c++
|
||||
ENV CPP=/opt/rh/devtoolset-4/root/usr/bin/cpp
|
||||
|
||||
# Install Python packages
|
||||
RUN \
|
||||
pip install numpy pytest scipy scikit-learn wheel kubernetes urllib3==1.22
|
||||
ENV PATH=/opt/python/bin:$PATH
|
||||
|
||||
ENV GOSU_VERSION 1.10
|
||||
|
||||
|
||||
62
tests/ci_build/Dockerfile.gpu_build_centos6
Normal file
62
tests/ci_build/Dockerfile.gpu_build_centos6
Normal file
@@ -0,0 +1,62 @@
|
||||
ARG CUDA_VERSION
|
||||
FROM nvidia/cuda:$CUDA_VERSION-devel-centos6
|
||||
ARG CUDA_VERSION
|
||||
|
||||
# Environment
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
ENV DEVTOOLSET_URL_ROOT http://vault.centos.org/6.9/sclo/x86_64/rh/devtoolset-4/
|
||||
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
yum -y update && \
|
||||
yum install -y tar unzip wget xz git centos-release-scl yum-utils && \
|
||||
yum-config-manager --enable centos-sclo-rh-testing && \
|
||||
yum -y update && \
|
||||
yum install -y $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-5.3.1-6.1.el6.x86_64.rpm \
|
||||
$DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-c++-5.3.1-6.1.el6.x86_64.rpm \
|
||||
$DEVTOOLSET_URL_ROOT/devtoolset-4-binutils-2.25.1-8.el6.x86_64.rpm \
|
||||
$DEVTOOLSET_URL_ROOT/devtoolset-4-runtime-4.1-3.sc1.el6.x86_64.rpm \
|
||||
$DEVTOOLSET_URL_ROOT/devtoolset-4-libstdc++-devel-5.3.1-6.1.el6.x86_64.rpm && \
|
||||
# Python
|
||||
wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
|
||||
bash Miniconda3.sh -b -p /opt/python && \
|
||||
# CMake
|
||||
wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \
|
||||
bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
|
||||
# Ninja
|
||||
mkdir -p /usr/local && \
|
||||
cd /usr/local/ && \
|
||||
wget -nv -nc https://github.com/ninja-build/ninja/archive/v1.10.0.tar.gz --no-check-certificate && \
|
||||
tar xf v1.10.0.tar.gz && mv ninja-1.10.0 ninja && rm -v v1.10.0.tar.gz && \
|
||||
cd ninja && \
|
||||
python ./configure.py --bootstrap
|
||||
|
||||
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
|
||||
RUN \
|
||||
export CUDA_SHORT=`echo $CUDA_VERSION | egrep -o '[0-9]+\.[0-9]'` && \
|
||||
export NCCL_VERSION=2.4.8-1 && \
|
||||
wget https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
|
||||
rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
|
||||
yum -y update && \
|
||||
yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT} && \
|
||||
rm -f nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm;
|
||||
|
||||
ENV PATH=/opt/python/bin:/usr/local/ninja:$PATH
|
||||
ENV CC=/opt/rh/devtoolset-4/root/usr/bin/gcc
|
||||
ENV CXX=/opt/rh/devtoolset-4/root/usr/bin/c++
|
||||
ENV CPP=/opt/rh/devtoolset-4/root/usr/bin/cpp
|
||||
|
||||
ENV GOSU_VERSION 1.10
|
||||
|
||||
# Install lightweight sudo (not bound to TTY)
|
||||
RUN set -ex; \
|
||||
wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
|
||||
chmod +x /usr/local/bin/gosu && \
|
||||
gosu nobody true
|
||||
|
||||
# Default entry-point to use if running locally
|
||||
# It will preserve attributes of created files
|
||||
COPY entrypoint.sh /scripts/
|
||||
|
||||
WORKDIR /workspace
|
||||
ENTRYPOINT ["/scripts/entrypoint.sh"]
|
||||
@@ -187,6 +187,10 @@ then
|
||||
# that is associated with the particular branch or pull request
|
||||
echo "docker tag ${DOCKER_IMG_NAME} ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${BRANCH_NAME}"
|
||||
docker tag "${DOCKER_IMG_NAME}" "${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${BRANCH_NAME}"
|
||||
|
||||
echo "python3 -m awscli ecr create-repository --repository-name ${DOCKER_IMG_NAME} --region ${DOCKER_CACHE_ECR_REGION} || true"
|
||||
python3 -m awscli ecr create-repository --repository-name ${DOCKER_IMG_NAME} --region ${DOCKER_CACHE_ECR_REGION} || true
|
||||
|
||||
echo "docker push ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${BRANCH_NAME}"
|
||||
docker push "${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${BRANCH_NAME}"
|
||||
if [[ $? != "0" ]]; then
|
||||
|
||||
@@ -19,8 +19,10 @@ target_link_libraries(testxgboost PRIVATE objxgboost)
|
||||
if (USE_CUDA)
|
||||
# OpenMP is mandatory for CUDA
|
||||
find_package(OpenMP REQUIRED)
|
||||
target_include_directories(testxgboost PRIVATE
|
||||
${xgboost_SOURCE_DIR}/cub/)
|
||||
if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.0)
|
||||
target_include_directories(testxgboost PRIVATE
|
||||
${xgboost_SOURCE_DIR}/cub/)
|
||||
endif (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.0)
|
||||
target_compile_options(testxgboost PRIVATE
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda>
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>
|
||||
|
||||
@@ -148,8 +148,8 @@ void TestLearnerSerialization(Args args, FeatureMap const& fmap, std::shared_ptr
|
||||
// Binary is not tested, as it is NOT reproducible.
|
||||
class SerializationTest : public ::testing::Test {
|
||||
protected:
|
||||
size_t constexpr static kRows = 10;
|
||||
size_t constexpr static kCols = 10;
|
||||
size_t constexpr static kRows = 15;
|
||||
size_t constexpr static kCols = 15;
|
||||
std::shared_ptr<DMatrix> p_dmat_;
|
||||
FeatureMap fmap_;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user