diff --git a/Jenkinsfile b/Jenkinsfile index 3ea3d9b11..43fc74cde 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -7,7 +7,7 @@ dockerRun = 'tests/ci_build/ci_build.sh' // Which CUDA version to use when building reference distribution wheel -ref_cuda_ver = '11.0' +ref_cuda_ver = '11.0.3' import groovy.transform.Field @@ -60,9 +60,9 @@ pipeline { 'build-cpu-rabit-mock': { BuildCPUMock() }, // Build reference, distribution-ready Python wheel with CUDA 11.0 // using CentOS 7 image - 'build-gpu-cuda11.0': { BuildCUDA(cuda_version: '11.0', build_rmm: true) }, - 'build-gpu-rpkg': { BuildRPackageWithCUDA(cuda_version: '11.0') }, - 'build-jvm-packages-gpu-cuda11.0': { BuildJVMPackagesWithCUDA(spark_version: '3.0.1', cuda_version: '11.0') }, + 'build-gpu-cuda11.0': { BuildCUDA(cuda_version: '11.0.3', build_rmm: true) }, + 'build-gpu-rpkg': { BuildRPackageWithCUDA(cuda_version: '11.0.3') }, + 'build-jvm-packages-gpu-cuda11.0': { BuildJVMPackagesWithCUDA(spark_version: '3.0.1', cuda_version: '11.0.3') }, 'build-jvm-packages': { BuildJVMPackages(spark_version: '3.0.1') }, 'build-jvm-doc': { BuildJVMDoc() } ]) @@ -77,9 +77,9 @@ pipeline { 'test-python-cpu': { TestPythonCPU() }, 'test-python-cpu-arm64': { TestPythonCPUARM64() }, // artifact_cuda_version doesn't apply to RMM tests; RMM tests will always match CUDA version between artifact and host env - 'test-python-gpu-cuda11.0': { TestPythonGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0', test_rmm: true) }, - 'test-python-mgpu-cuda11.0': { TestPythonGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0', multi_gpu: true, test_rmm: true) }, - 'test-cpp-gpu-cuda11.0': { TestCppGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0', test_rmm: true) }, + 'test-python-gpu-cuda11.0': { TestPythonGPU(artifact_cuda_version: '11.0.3', host_cuda_version: '11.0.3', test_rmm: true) }, + 'test-python-mgpu-cuda11.0': { TestPythonGPU(artifact_cuda_version: '11.0.3', host_cuda_version: '11.0.3', multi_gpu: true, test_rmm: true) }, + 'test-cpp-gpu-cuda11.0': { TestCppGPU(artifact_cuda_version: '11.0.3', host_cuda_version: '11.0.3', test_rmm: true) }, 'test-jvm-jdk8': { CrossTestJVMwithJDK(jdk_version: '8', spark_version: '3.0.0') } ]) } @@ -123,7 +123,7 @@ def ClangTidy() { echo "Running clang-tidy job..." def container_type = "clang_tidy" def docker_binary = "docker" - def dockerArgs = "--build-arg CUDA_VERSION_ARG=11.0" + def dockerArgs = "--build-arg CUDA_VERSION_ARG=11.0.3" sh """ ${dockerRun} ${container_type} ${docker_binary} ${dockerArgs} python3 tests/ci_build/tidy.py --cuda-archs 75 """ @@ -445,7 +445,7 @@ def DeployJVMPackages(args) { if (env.BRANCH_NAME == 'master' || env.BRANCH_NAME.startsWith('release')) { echo 'Deploying to xgboost-maven-repo S3 repo...' sh """ - ${dockerRun} jvm_gpu_build docker --build-arg CUDA_VERSION_ARG=11.0 tests/ci_build/deploy_jvm_packages.sh ${args.spark_version} + ${dockerRun} jvm_gpu_build docker --build-arg CUDA_VERSION_ARG=11.0.3 tests/ci_build/deploy_jvm_packages.sh ${args.spark_version} """ } deleteDir() diff --git a/src/common/device_helpers.cu b/src/common/device_helpers.cu index ec69bc900..eabd0ef59 100644 --- a/src/common/device_helpers.cu +++ b/src/common/device_helpers.cu @@ -38,6 +38,9 @@ void AllReducer::Init(int _device_ordinal) { int32_t const rank = rabit::GetRank(); int32_t const world = rabit::GetWorldSize(); + if (world == 1) { + return; + } std::vector uuids(world * kUuidLength, 0); auto s_uuid = xgboost::common::Span{uuids.data(), uuids.size()}; diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh index 9adf866fe..9c6fd45fd 100644 --- a/src/common/device_helpers.cuh +++ b/src/common/device_helpers.cuh @@ -775,13 +775,16 @@ class AllReducer { */ void AllReduceSum(const double *sendbuff, double *recvbuff, int count) { + if (rabit::GetWorldSize() == 1) { + return; + } #ifdef XGBOOST_USE_NCCL CHECK(initialised_); dh::safe_cuda(cudaSetDevice(device_ordinal_)); dh::safe_nccl(ncclAllReduce(sendbuff, recvbuff, count, ncclDouble, ncclSum, comm_, stream_)); allreduce_bytes_ += count * sizeof(double); allreduce_calls_ += 1; -#endif +#endif // XGBOOST_USE_NCCL } /** @@ -796,9 +799,12 @@ class AllReducer { void AllGather(uint32_t const* data, size_t length, dh::caching_device_vector* recvbuf) { + size_t world = rabit::GetWorldSize(); + if (world == 1) { + return; + } #ifdef XGBOOST_USE_NCCL CHECK(initialised_); - size_t world = rabit::GetWorldSize(); recvbuf->resize(length * world); safe_nccl(ncclAllGather(data, recvbuf->data().get(), length, ncclUint32, comm_, stream_)); @@ -813,9 +819,11 @@ class AllReducer { * \param recvbuff The recvbuff. * \param count Number of elements. */ - void AllReduceSum(const float *sendbuff, float *recvbuff, int count) { #ifdef XGBOOST_USE_NCCL + if (rabit::GetWorldSize() == 1) { + return; + } CHECK(initialised_); dh::safe_cuda(cudaSetDevice(device_ordinal_)); dh::safe_nccl(ncclAllReduce(sendbuff, recvbuff, count, ncclFloat, ncclSum, comm_, stream_)); @@ -836,6 +844,9 @@ class AllReducer { void AllReduceSum(const int64_t *sendbuff, int64_t *recvbuff, int count) { #ifdef XGBOOST_USE_NCCL + if (rabit::GetWorldSize() == 1) { + return; + } CHECK(initialised_); dh::safe_cuda(cudaSetDevice(device_ordinal_)); @@ -845,6 +856,9 @@ class AllReducer { void AllReduceSum(const uint32_t *sendbuff, uint32_t *recvbuff, int count) { #ifdef XGBOOST_USE_NCCL + if (rabit::GetWorldSize() == 1) { + return; + } CHECK(initialised_); dh::safe_cuda(cudaSetDevice(device_ordinal_)); @@ -853,6 +867,9 @@ class AllReducer { } void AllReduceSum(const uint64_t *sendbuff, uint64_t *recvbuff, int count) { + if (rabit::GetWorldSize() == 1) { + return; + } #ifdef XGBOOST_USE_NCCL CHECK(initialised_); @@ -867,12 +884,15 @@ class AllReducer { std::enable_if_t::value && !std::is_same::value> // NOLINT * = nullptr> - void AllReduceSum(const T *sendbuff, T *recvbuff, int count) { // NOLINT + void AllReduceSum(const T *sendbuff, T *recvbuff, int count) { // NOLINT #ifdef XGBOOST_USE_NCCL + if (rabit::GetWorldSize() == 1) { + return; + } CHECK(initialised_); dh::safe_cuda(cudaSetDevice(device_ordinal_)); - static_assert(sizeof(unsigned long long) == sizeof(uint64_t), ""); // NOLINT + static_assert(sizeof(unsigned long long) == sizeof(uint64_t), ""); // NOLINT dh::safe_nccl(ncclAllReduce(sendbuff, recvbuff, count, ncclUint64, ncclSum, comm_, stream_)); #endif } diff --git a/tests/ci_build/Dockerfile.cpu b/tests/ci_build/Dockerfile.cpu index 49346f7fc..4210f63f0 100644 --- a/tests/ci_build/Dockerfile.cpu +++ b/tests/ci_build/Dockerfile.cpu @@ -10,13 +10,13 @@ RUN \ apt-get install -y software-properties-common && \ add-apt-repository ppa:ubuntu-toolchain-r/test && \ apt-get update && \ - apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libasan2 libidn11 ninja-build gcc-8 g++-8 && \ + apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libasan2 libidn11 ninja-build gcc-8 g++-8 openjdk-8-jdk-headless && \ # CMake wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \ bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ # Python - wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ - bash Miniconda3.sh -b -p /opt/python + wget https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh && \ + bash Mambaforge-Linux-x86_64.sh -b -p /opt/python ENV PATH=/opt/python/bin:$PATH ENV CC=gcc-8 @@ -24,10 +24,11 @@ ENV CXX=g++-8 ENV CPP=cpp-8 ENV GOSU_VERSION 1.10 +ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/ # Create new Conda environment COPY conda_env/cpu_test.yml /scripts/ -RUN conda env create -n cpu_test --file=/scripts/cpu_test.yml +RUN mamba env create -n cpu_test --file=/scripts/cpu_test.yml # Install lightweight sudo (not bound to TTY) RUN set -ex; \ diff --git a/tests/ci_build/Dockerfile.gpu b/tests/ci_build/Dockerfile.gpu index 0dff1a4c5..ea4452564 100644 --- a/tests/ci_build/Dockerfile.gpu +++ b/tests/ci_build/Dockerfile.gpu @@ -10,7 +10,7 @@ SHELL ["/bin/bash", "-c"] # Use Bash as shell RUN \ apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub && \ apt-get update && \ - apt-get install -y wget unzip bzip2 libgomp1 build-essential && \ + apt-get install -y wget unzip bzip2 libgomp1 build-essential openjdk-8-jdk-headless && \ # Python wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ bash Miniconda3.sh -b -p /opt/python @@ -19,11 +19,14 @@ ENV PATH=/opt/python/bin:$PATH # Create new Conda environment with cuDF, Dask, and cuPy RUN \ - conda create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \ - python=3.8 cudf=21.10* rmm=21.10* cudatoolkit=$CUDA_VERSION_ARG dask dask-cuda=21.10* dask-cudf=21.10* cupy=9.1* \ - numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis + conda install -c conda-forge mamba && \ + mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \ + python=3.8 cudf=22.04* rmm=22.04* cudatoolkit=$CUDA_VERSION_ARG dask dask-cuda=22.04* dask-cudf=22.04* cupy \ + numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \ + pyspark cloudpickle cuda-python=11.7.0 ENV GOSU_VERSION 1.10 +ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/ # Install lightweight sudo (not bound to TTY) RUN set -ex; \ diff --git a/tests/ci_build/Dockerfile.gpu_build b/tests/ci_build/Dockerfile.gpu_build index 2f463ce83..b9eaa0a59 100644 --- a/tests/ci_build/Dockerfile.gpu_build +++ b/tests/ci_build/Dockerfile.gpu_build @@ -24,7 +24,7 @@ RUN \ # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) RUN \ export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \ - export NCCL_VERSION=2.7.5-1 && \ + export NCCL_VERSION=2.13.4-1 && \ apt-get update && \ apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT} diff --git a/tests/ci_build/Dockerfile.gpu_build_centos7 b/tests/ci_build/Dockerfile.gpu_build_centos7 index 138edacc2..611a0d5d5 100644 --- a/tests/ci_build/Dockerfile.gpu_build_centos7 +++ b/tests/ci_build/Dockerfile.gpu_build_centos7 @@ -4,7 +4,6 @@ ARG CUDA_VERSION_ARG # Install all basic requirements RUN \ - rpm --erase gpg-pubkey-7fa2af80* && \ curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/D42D0685.pub | sed '/^Version/d' \ > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \ yum install -y epel-release centos-release-scl && \ @@ -22,7 +21,7 @@ RUN \ # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) RUN \ export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \ - export NCCL_VERSION=2.7.3-1 && \ + export NCCL_VERSION=2.13.4-1 && \ wget -nv -nc https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ yum -y update && \ diff --git a/tests/ci_build/Dockerfile.jvm_gpu_build b/tests/ci_build/Dockerfile.jvm_gpu_build index 587b60bbc..cddbb1f65 100644 --- a/tests/ci_build/Dockerfile.jvm_gpu_build +++ b/tests/ci_build/Dockerfile.jvm_gpu_build @@ -4,7 +4,6 @@ ARG CUDA_VERSION_ARG # Install all basic requirements RUN \ - rpm --erase gpg-pubkey-7fa2af80* && \ curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/D42D0685.pub | sed '/^Version/d' \ > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \ yum install -y epel-release centos-release-scl && \ @@ -25,12 +24,10 @@ RUN \ # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) RUN \ export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \ - export NCCL_VERSION=2.8.3-1 && \ - wget -nv -nc https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ - rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ + export NCCL_VERSION=2.13.4-1 && \ + yum-config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \ yum -y update && \ - yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT} && \ - rm -f nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm; + yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT} ENV PATH=/opt/python/bin:/opt/maven/bin:$PATH ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc diff --git a/tests/ci_build/Dockerfile.rmm b/tests/ci_build/Dockerfile.rmm index 0c0166a7c..237aa11b7 100644 --- a/tests/ci_build/Dockerfile.rmm +++ b/tests/ci_build/Dockerfile.rmm @@ -18,7 +18,7 @@ RUN \ # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) RUN \ export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \ - export NCCL_VERSION=2.7.5-1 && \ + export NCCL_VERSION=2.13.4-1 && \ apt-get update && \ apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT} @@ -27,7 +27,7 @@ ENV PATH=/opt/python/bin:$PATH # Create new Conda environment with RMM RUN \ conda create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \ - python=3.9 rmm=22.06* cudatoolkit=$CUDA_VERSION_ARG cmake + python=3.9 rmm=22.04* cudatoolkit=$CUDA_VERSION_ARG cmake ENV GOSU_VERSION 1.10 diff --git a/tests/ci_build/build_python_wheels.sh b/tests/ci_build/build_python_wheels.sh index b56cde7e1..abe520708 100644 --- a/tests/ci_build/build_python_wheels.sh +++ b/tests/ci_build/build_python_wheels.sh @@ -29,13 +29,15 @@ if [[ "$platform_id" == macosx_* ]]; then setup_env_var='CIBW_TARGET_OSX_ARM64=1' # extra flag to be passed to setup.py export PYTHON_CROSSENV=1 export MACOSX_DEPLOYMENT_TARGET=12.0 - OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/11.1.0/download/osx-arm64/llvm-openmp-11.1.0-hf3c4609_1.tar.bz2" + #OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/11.1.0/download/osx-arm64/llvm-openmp-11.1.0-hf3c4609_1.tar.bz2" + OPENMP_URL="https://xgboost-ci-jenkins-artifacts.s3.us-west-2.amazonaws.com/llvm-openmp-11.1.0-hf3c4609_1-osx-arm64.tar.bz2" elif [[ "$platform_id" == macosx_x86_64 ]]; then # MacOS, Intel wheel_tag=macosx_10_15_x86_64.macosx_11_0_x86_64.macosx_12_0_x86_64 cpython_ver=37 export MACOSX_DEPLOYMENT_TARGET=10.13 - OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/11.1.0/download/osx-64/llvm-openmp-11.1.0-hda6cdc1_1.tar.bz2" + #OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/11.1.0/download/osx-64/llvm-openmp-11.1.0-hda6cdc1_1.tar.bz2" + OPENMP_URL="https://xgboost-ci-jenkins-artifacts.s3.us-west-2.amazonaws.com/llvm-openmp-11.1.0-hda6cdc1_1-osx-64.tar.bz2" else echo "Platform not supported: $platform_id" exit 3 diff --git a/tests/ci_build/conda_env/cpu_test.yml b/tests/ci_build/conda_env/cpu_test.yml index 3180a6685..08d3a0c91 100644 --- a/tests/ci_build/conda_env/cpu_test.yml +++ b/tests/ci_build/conda_env/cpu_test.yml @@ -30,15 +30,13 @@ dependencies: - jsonschema - boto3 - awscli -- numba -- llvmlite - py-ubjson - cffi - pyarrow -- protobuf<=3.20 +- protobuf +- pyspark>=3.3.0 +- cloudpickle +- shap +- modin - pip: - - shap - - ipython # required by shap at import time. - - sphinx_rtd_theme - datatable - - modin[all] diff --git a/tests/ci_build/entrypoint.sh b/tests/ci_build/entrypoint.sh index 8e830399f..a0c5f56bb 100755 --- a/tests/ci_build/entrypoint.sh +++ b/tests/ci_build/entrypoint.sh @@ -20,9 +20,9 @@ else fi if [[ -n $CI_BUILD_UID ]] && [[ -n $CI_BUILD_GID ]]; then - groupadd -o -g "${CI_BUILD_GID}" "${CI_BUILD_GROUP}" + groupadd -o -g "${CI_BUILD_GID}" "${CI_BUILD_GROUP}" || true useradd -o -m -g "${CI_BUILD_GID}" -u "${CI_BUILD_UID}" \ - "${CI_BUILD_USER}" + "${CI_BUILD_USER}" || true export HOME="/home/${CI_BUILD_USER}" shopt -s dotglob cp -r /root/* "$HOME/" diff --git a/tests/ci_build/rename_whl.py b/tests/ci_build/rename_whl.py index f4f8bd41f..ec0b1d0e4 100644 --- a/tests/ci_build/rename_whl.py +++ b/tests/ci_build/rename_whl.py @@ -42,4 +42,4 @@ with cd(dirname): filesize = os.path.getsize(new_name) / 1024 / 1024 # MB msg = f"Limit of wheel size set by PyPI is exceeded. {new_name}: {filesize}" - assert filesize <= 200, msg + assert filesize <= 300, msg