diff --git a/tests/buildkite/conftest.sh b/tests/buildkite/conftest.sh index a8d568a0d..185b4a356 100755 --- a/tests/buildkite/conftest.sh +++ b/tests/buildkite/conftest.sh @@ -22,10 +22,10 @@ function set_buildkite_env_vars_in_container { set -x -CUDA_VERSION=11.8.0 -NCCL_VERSION=2.16.5-1 -RAPIDS_VERSION=24.06 -DEV_RAPIDS_VERSION=24.08 +CUDA_VERSION=12.4.1 +NCCL_VERSION=2.22.3-1 +RAPIDS_VERSION=24.08 +DEV_RAPIDS_VERSION=24.10 SPARK_VERSION=3.5.1 JDK_VERSION=8 R_VERSION=4.3.2 diff --git a/tests/ci_build/Dockerfile.cpu b/tests/ci_build/Dockerfile.cpu index c81697ff6..22db93572 100644 --- a/tests/ci_build/Dockerfile.cpu +++ b/tests/ci_build/Dockerfile.cpu @@ -2,7 +2,7 @@ FROM ubuntu:22.04 # Environment ENV DEBIAN_FRONTEND=noninteractive -SHELL ["/bin/bash", "-c"] # Use Bash as shell +SHELL ["/bin/bash", "-c"] # Install all basic requirements RUN \ @@ -24,9 +24,14 @@ ENV GOSU_VERSION=1.10 ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/ # Install gRPC -RUN git clone -b v1.49.1 https://github.com/grpc/grpc.git \ - --recurse-submodules --depth 1 --shallow-submodules && \ +# Patch Abseil to apply https://github.com/abseil/abseil-cpp/issues/1629 +RUN git clone -b v1.65.4 https://github.com/grpc/grpc.git \ + --recurse-submodules --depth 1 && \ pushd grpc && \ + pushd third_party/abseil-cpp && \ + git fetch origin master && \ + git cherry-pick -n cfde5f74e276049727f9556f13473a59fe77d9eb && \ + popd && \ cmake -S . -B build -GNinja -DCMAKE_INSTALL_PREFIX=/opt/grpc -DCMAKE_CXX_VISIBILITY_PRESET=hidden && \ cmake --build build --target install && \ popd && \ diff --git a/tests/ci_build/Dockerfile.gpu b/tests/ci_build/Dockerfile.gpu index f2cfe2ef9..884fc924c 100644 --- a/tests/ci_build/Dockerfile.gpu +++ b/tests/ci_build/Dockerfile.gpu @@ -6,7 +6,7 @@ ARG NCCL_VERSION_ARG # Environment ENV DEBIAN_FRONTEND=noninteractive -SHELL ["/bin/bash", "-c"] # Use Bash as shell +SHELL ["/bin/bash", "-c"] # Install all basic requirements RUN \ @@ -22,8 +22,9 @@ ENV PATH=/opt/miniforge/bin:$PATH # Create new Conda environment with cuDF, Dask, and cuPy RUN \ export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \ + export CUDA_SHORT_VER=$(echo "$CUDA_VERSION_ARG" | grep -o -E '[0-9]+\.[0-9]') && \ mamba create -y -n gpu_test -c rapidsai -c conda-forge -c nvidia \ - python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \ + python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cuda-version=$CUDA_SHORT_VER \ "nccl>=${NCCL_SHORT_VER}" \ dask \ dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \ diff --git a/tests/ci_build/Dockerfile.gpu_build_rockylinux8 b/tests/ci_build/Dockerfile.gpu_build_rockylinux8 index 5f8a5bfec..8869fb468 100644 --- a/tests/ci_build/Dockerfile.gpu_build_rockylinux8 +++ b/tests/ci_build/Dockerfile.gpu_build_rockylinux8 @@ -36,27 +36,38 @@ ENV CUDAHOSTCXX=/opt/rh/gcc-toolset-10/root/usr/bin/c++ ENV GOSU_VERSION=1.10 -# Install RMM (Patch out -Werror) +# Install gRPC +# Patch Abseil to apply https://github.com/abseil/abseil-cpp/issues/1629 +RUN git clone -b v1.65.4 https://github.com/grpc/grpc.git \ + --recurse-submodules --depth 1 && \ + pushd grpc && \ + pushd third_party/abseil-cpp && \ + git fetch origin master && \ + git cherry-pick -n cfde5f74e276049727f9556f13473a59fe77d9eb && \ + popd && \ + cmake -S . -B build -GNinja -DCMAKE_INSTALL_PREFIX=/opt/grpc -DCMAKE_CXX_VISIBILITY_PRESET=hidden && \ + cmake --build build --target install && \ + popd && \ + rm -rf grpc + +# Install RMM +# Patch out -Werror +# Patch CCCL 2.5.0 to apply https://github.com/NVIDIA/cccl/pull/1957 RUN git clone -b v${RAPIDS_VERSION_ARG}.00 https://github.com/rapidsai/rmm.git --recurse-submodules --depth 1 && \ pushd rmm && \ find . -name CMakeLists.txt -print0 | xargs -0 sed -i 's/-Werror//g' && \ mkdir build && \ pushd build && \ cmake .. -GNinja -DCMAKE_INSTALL_PREFIX=/opt/rmm -DCUDA_STATIC_RUNTIME=ON && \ + pushd _deps/cccl-src/ && \ + git fetch origin main && \ + git cherry-pick -n 9fcb32c228865f21f2b002b29d38a06b4c6fbd73 && \ + popd && \ cmake --build . --target install && \ popd && \ popd && \ rm -rf rmm -# Install gRPC -RUN git clone -b v1.49.1 https://github.com/grpc/grpc.git \ - --recurse-submodules --depth 1 && \ - pushd grpc && \ - cmake -S . -B build -GNinja -DCMAKE_INSTALL_PREFIX=/opt/grpc -DCMAKE_CXX_VISIBILITY_PRESET=hidden && \ - cmake --build build --target install && \ - popd && \ - rm -rf grpc - # Install lightweight sudo (not bound to TTY) RUN set -ex; \ wget -nv -nc -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \ diff --git a/tests/ci_build/Dockerfile.gpu_dev_ver b/tests/ci_build/Dockerfile.gpu_dev_ver index edf1f0cf1..d23c5e83c 100644 --- a/tests/ci_build/Dockerfile.gpu_dev_ver +++ b/tests/ci_build/Dockerfile.gpu_dev_ver @@ -25,8 +25,9 @@ ENV PATH=/opt/miniforge/bin:$PATH # Create new Conda environment with dev versions of cuDF, Dask, and cuPy RUN \ export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \ + export CUDA_SHORT_VER=$(echo "$CUDA_VERSION_ARG" | grep -o -E '[0-9]+\.[0-9]') && \ mamba create -y -n gpu_test -c rapidsai-nightly -c conda-forge -c nvidia \ - python=3.10 "cudf=$RAPIDS_VERSION_ARG.*" "rmm=$RAPIDS_VERSION_ARG.*" cudatoolkit=$CUDA_VERSION_ARG \ + python=3.10 "cudf=$RAPIDS_VERSION_ARG.*" "rmm=$RAPIDS_VERSION_ARG.*" cuda-version=$CUDA_SHORT_VER \ "nccl>=${NCCL_SHORT_VER}" \ dask \ "dask-cuda=$RAPIDS_VERSION_ARG.*" "dask-cudf=$RAPIDS_VERSION_ARG.*" cupy \ diff --git a/tests/ci_build/test_python.sh b/tests/ci_build/test_python.sh index dd43f43a9..a1a023046 100755 --- a/tests/ci_build/test_python.sh +++ b/tests/ci_build/test_python.sh @@ -56,6 +56,7 @@ case "$suite" in set -x install_xgboost setup_pyspark_envs + python -c 'from cupy.cuda import jitify; jitify._init_module()' pytest -v -s -rxXs --fulltrace --durations=0 -m "not mgpu" ${args} tests/python-gpu unset_pyspark_envs uninstall_xgboost @@ -67,6 +68,7 @@ case "$suite" in set -x install_xgboost setup_pyspark_envs + python -c 'from cupy.cuda import jitify; jitify._init_module()' pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/python-gpu pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/test_distributed/test_gpu_with_dask pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/test_distributed/test_gpu_with_spark