From f121f2738f6c887b43767cbe3897639181972bb5 Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Mon, 5 Oct 2020 17:54:14 -0700 Subject: [PATCH] [CI] Fix Docker build for CUDA 11 (#6202) --- Jenkinsfile | 18 +++++++++--------- tests/ci_build/Dockerfile.clang_tidy | 5 +++-- tests/ci_build/Dockerfile.gpu | 7 ++++--- tests/ci_build/Dockerfile.gpu_build | 8 ++++---- tests/ci_build/Dockerfile.gpu_build_centos6 | 8 ++++---- tests/ci_build/Dockerfile.gpu_jvm | 5 +++-- tests/ci_build/Dockerfile.jvm_gpu_build | 8 ++++---- tests/ci_build/Dockerfile.rmm | 10 +++++----- 8 files changed, 36 insertions(+), 33 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 640eaca1b..fde997f76 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -132,7 +132,7 @@ def ClangTidy() { echo "Running clang-tidy job..." def container_type = "clang_tidy" def docker_binary = "docker" - def dockerArgs = "--build-arg CUDA_VERSION=10.1" + def dockerArgs = "--build-arg CUDA_VERSION_ARG=10.1" sh """ ${dockerRun} ${container_type} ${docker_binary} ${dockerArgs} python3 tests/ci_build/tidy.py """ @@ -205,7 +205,7 @@ def BuildCUDA(args) { echo "Build with CUDA ${args.cuda_version}" def container_type = GetCUDABuildContainerType(args.cuda_version) def docker_binary = "docker" - def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}" + def docker_args = "--build-arg CUDA_VERSION_ARG=${args.cuda_version}" def arch_flag = "" if (env.BRANCH_NAME != 'master' && !(env.BRANCH_NAME.startsWith('release'))) { arch_flag = "-DGPU_COMPUTE_VER=75" @@ -228,7 +228,7 @@ def BuildCUDA(args) { echo "Build with CUDA ${args.cuda_version} and RMM" container_type = "rmm" docker_binary = "docker" - docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}" + docker_args = "--build-arg CUDA_VERSION_ARG=${args.cuda_version}" sh """ rm -rf build/ ${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/build_via_cmake.sh --conda-env=gpu_test -DUSE_CUDA=ON -DUSE_NCCL=ON -DPLUGIN_RMM=ON ${arch_flag} @@ -250,7 +250,7 @@ def BuildJVMPackagesWithCUDA(args) { echo "Build XGBoost4J-Spark with Spark ${args.spark_version}, CUDA ${args.cuda_version}" def container_type = "jvm_gpu_build" def docker_binary = "nvidia-docker" - def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}" + def docker_args = "--build-arg CUDA_VERSION_ARG=${args.cuda_version}" def arch_flag = "" if (env.BRANCH_NAME != 'master' && !(env.BRANCH_NAME.startsWith('release'))) { arch_flag = "-DGPU_COMPUTE_VER=75" @@ -325,7 +325,7 @@ def TestPythonGPU(args) { echo "Test Python GPU: CUDA ${args.host_cuda_version}" def container_type = "gpu" def docker_binary = "nvidia-docker" - def docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}" + def docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}" def mgpu_indicator = (args.multi_gpu) ? 'mgpu' : 'gpu' // Allocate extra space in /dev/shm to enable NCCL def docker_extra_params = (args.multi_gpu) ? "CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g'" : '' @@ -363,7 +363,7 @@ def TestCppGPU(args) { echo "Test C++, CUDA ${args.host_cuda_version}" def container_type = "gpu" def docker_binary = "nvidia-docker" - def docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}" + def docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}" sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} build/testxgboost" if (args.test_rmm) { sh "rm -rfv build/" @@ -371,7 +371,7 @@ def TestCppGPU(args) { echo "Test C++, CUDA ${args.host_cuda_version} with RMM" container_type = "rmm" docker_binary = "nvidia-docker" - docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}" + docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}" sh """ ${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "source activate gpu_test && build/testxgboost --use-rmm-pool --gtest_filter=-*DeathTest.*" """ @@ -392,7 +392,7 @@ def CrossTestJVMwithJDKGPU(args) { } def container_type = "gpu_jvm" def docker_binary = "nvidia-docker" - def docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}" + def docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}" sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/test_jvm_gpu_cross.sh" deleteDir() } @@ -444,7 +444,7 @@ def DeployJVMPackages(args) { ${dockerRun} jvm docker tests/ci_build/deploy_jvm_packages.sh ${args.spark_version} 0 """ sh """ - ${dockerRun} jvm_gpu_build docker --build-arg CUDA_VERSION=10.0 tests/ci_build/deploy_jvm_packages.sh ${args.spark_version} 1 + ${dockerRun} jvm_gpu_build docker --build-arg CUDA_VERSION_ARG=10.0 tests/ci_build/deploy_jvm_packages.sh ${args.spark_version} 1 """ } deleteDir() diff --git a/tests/ci_build/Dockerfile.clang_tidy b/tests/ci_build/Dockerfile.clang_tidy index 2316feda1..661e9b925 100644 --- a/tests/ci_build/Dockerfile.clang_tidy +++ b/tests/ci_build/Dockerfile.clang_tidy @@ -1,5 +1,6 @@ -ARG CUDA_VERSION -FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu18.04 +ARG CUDA_VERSION_ARG +FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu18.04 +ARG CUDA_VERSION_ARG # Environment ENV DEBIAN_FRONTEND noninteractive diff --git a/tests/ci_build/Dockerfile.gpu b/tests/ci_build/Dockerfile.gpu index 6a1eace65..ce5e6cc59 100644 --- a/tests/ci_build/Dockerfile.gpu +++ b/tests/ci_build/Dockerfile.gpu @@ -1,5 +1,6 @@ -ARG CUDA_VERSION -FROM nvidia/cuda:$CUDA_VERSION-runtime-ubuntu18.04 +ARG CUDA_VERSION_ARG +FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu18.04 +ARG CUDA_VERSION_ARG # Environment ENV DEBIAN_FRONTEND noninteractive @@ -18,7 +19,7 @@ ENV PATH=/opt/python/bin:$PATH # Create new Conda environment with cuDF, Dask, and cuPy RUN \ conda create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \ - python=3.7 cudf=0.16* rmm=0.16* cudatoolkit=$CUDA_VERSION dask dask-cuda dask-cudf cupy \ + python=3.7 cudf=0.16* rmm=0.16* cudatoolkit=$CUDA_VERSION_ARG dask dask-cuda dask-cudf cupy \ numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis ENV GOSU_VERSION 1.10 diff --git a/tests/ci_build/Dockerfile.gpu_build b/tests/ci_build/Dockerfile.gpu_build index 8a741fc87..a52e62eb3 100644 --- a/tests/ci_build/Dockerfile.gpu_build +++ b/tests/ci_build/Dockerfile.gpu_build @@ -1,6 +1,6 @@ -ARG CUDA_VERSION -FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu16.04 -ARG CUDA_VERSION +ARG CUDA_VERSION_ARG +FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu16.04 +ARG CUDA_VERSION_ARG # Environment ENV DEBIAN_FRONTEND noninteractive @@ -19,7 +19,7 @@ RUN \ # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) RUN \ - export CUDA_SHORT=`echo $CUDA_VERSION | egrep -o '[0-9]+\.[0-9]'` && \ + export CUDA_SHORT=`echo $CUDA_VERSION_ARG | egrep -o '[0-9]+\.[0-9]'` && \ export NCCL_VERSION=2.7.5-1 && \ apt-get update && \ apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT} diff --git a/tests/ci_build/Dockerfile.gpu_build_centos6 b/tests/ci_build/Dockerfile.gpu_build_centos6 index e755ce12a..b2adb8a06 100644 --- a/tests/ci_build/Dockerfile.gpu_build_centos6 +++ b/tests/ci_build/Dockerfile.gpu_build_centos6 @@ -1,6 +1,6 @@ -ARG CUDA_VERSION -FROM nvidia/cuda:$CUDA_VERSION-devel-centos6 -ARG CUDA_VERSION +ARG CUDA_VERSION_ARG +FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos6 +ARG CUDA_VERSION_ARG # Environment ENV DEBIAN_FRONTEND noninteractive @@ -33,7 +33,7 @@ RUN \ # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) RUN \ - export CUDA_SHORT=`echo $CUDA_VERSION | egrep -o '[0-9]+\.[0-9]'` && \ + export CUDA_SHORT=`echo $CUDA_VERSION_ARG | egrep -o '[0-9]+\.[0-9]'` && \ export NCCL_VERSION=2.4.8-1 && \ wget https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ diff --git a/tests/ci_build/Dockerfile.gpu_jvm b/tests/ci_build/Dockerfile.gpu_jvm index acd7b9b86..f11e739b1 100644 --- a/tests/ci_build/Dockerfile.gpu_jvm +++ b/tests/ci_build/Dockerfile.gpu_jvm @@ -1,5 +1,6 @@ -ARG CUDA_VERSION -FROM nvidia/cuda:$CUDA_VERSION-runtime-ubuntu16.04 +ARG CUDA_VERSION_ARG +FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu16.04 +ARG CUDA_VERSION_ARG ARG JDK_VERSION=8 ARG SPARK_VERSION=3.0.0 diff --git a/tests/ci_build/Dockerfile.jvm_gpu_build b/tests/ci_build/Dockerfile.jvm_gpu_build index ed6c3d689..b0b46e6e9 100644 --- a/tests/ci_build/Dockerfile.jvm_gpu_build +++ b/tests/ci_build/Dockerfile.jvm_gpu_build @@ -1,6 +1,6 @@ -ARG CUDA_VERSION -FROM nvidia/cuda:$CUDA_VERSION-devel-centos6 -ARG CUDA_VERSION +ARG CUDA_VERSION_ARG +FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos6 +ARG CUDA_VERSION_ARG # Environment ENV DEBIAN_FRONTEND noninteractive @@ -30,7 +30,7 @@ RUN \ # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) RUN \ - export CUDA_SHORT=`echo $CUDA_VERSION | egrep -o '[0-9]+\.[0-9]'` && \ + export CUDA_SHORT=`echo $CUDA_VERSION_ARG | egrep -o '[0-9]+\.[0-9]'` && \ export NCCL_VERSION=2.4.8-1 && \ wget https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ diff --git a/tests/ci_build/Dockerfile.rmm b/tests/ci_build/Dockerfile.rmm index 65290be6a..beaed7355 100644 --- a/tests/ci_build/Dockerfile.rmm +++ b/tests/ci_build/Dockerfile.rmm @@ -1,6 +1,6 @@ -ARG CUDA_VERSION -FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu18.04 -ARG CUDA_VERSION +ARG CUDA_VERSION_ARG +FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu18.04 +ARG CUDA_VERSION_ARG # Environment ENV DEBIAN_FRONTEND noninteractive @@ -19,7 +19,7 @@ RUN \ # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) RUN \ - export CUDA_SHORT=`echo $CUDA_VERSION | egrep -o '[0-9]+\.[0-9]'` && \ + export CUDA_SHORT=`echo $CUDA_VERSION_ARG | egrep -o '[0-9]+\.[0-9]'` && \ export NCCL_VERSION=2.7.5-1 && \ apt-get update && \ apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT} @@ -29,7 +29,7 @@ ENV PATH=/opt/python/bin:$PATH # Create new Conda environment with RMM RUN \ conda create -n gpu_test -c nvidia -c rapidsai-nightly -c rapidsai -c conda-forge -c defaults \ - python=3.7 rmm=0.16* cudatoolkit=$CUDA_VERSION + python=3.7 rmm=0.16* cudatoolkit=$CUDA_VERSION_ARG ENV GOSU_VERSION 1.10