[breaking][CI] Use CTK 12.4 (#10697)

This commit is contained in:
Philip Hyunsu Cho 2024-08-21 19:59:34 -07:00 committed by GitHub
parent 142bdc73ec
commit cd83fe6033
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 40 additions and 20 deletions

View File

@ -22,10 +22,10 @@ function set_buildkite_env_vars_in_container {
set -x set -x
CUDA_VERSION=11.8.0 CUDA_VERSION=12.4.1
NCCL_VERSION=2.16.5-1 NCCL_VERSION=2.22.3-1
RAPIDS_VERSION=24.06 RAPIDS_VERSION=24.08
DEV_RAPIDS_VERSION=24.08 DEV_RAPIDS_VERSION=24.10
SPARK_VERSION=3.5.1 SPARK_VERSION=3.5.1
JDK_VERSION=8 JDK_VERSION=8
R_VERSION=4.3.2 R_VERSION=4.3.2

View File

@ -2,7 +2,7 @@ FROM ubuntu:22.04
# Environment # Environment
ENV DEBIAN_FRONTEND=noninteractive ENV DEBIAN_FRONTEND=noninteractive
SHELL ["/bin/bash", "-c"] # Use Bash as shell SHELL ["/bin/bash", "-c"]
# Install all basic requirements # Install all basic requirements
RUN \ RUN \
@ -24,9 +24,14 @@ ENV GOSU_VERSION=1.10
ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/ ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/
# Install gRPC # Install gRPC
RUN git clone -b v1.49.1 https://github.com/grpc/grpc.git \ # Patch Abseil to apply https://github.com/abseil/abseil-cpp/issues/1629
--recurse-submodules --depth 1 --shallow-submodules && \ RUN git clone -b v1.65.4 https://github.com/grpc/grpc.git \
--recurse-submodules --depth 1 && \
pushd grpc && \ pushd grpc && \
pushd third_party/abseil-cpp && \
git fetch origin master && \
git cherry-pick -n cfde5f74e276049727f9556f13473a59fe77d9eb && \
popd && \
cmake -S . -B build -GNinja -DCMAKE_INSTALL_PREFIX=/opt/grpc -DCMAKE_CXX_VISIBILITY_PRESET=hidden && \ cmake -S . -B build -GNinja -DCMAKE_INSTALL_PREFIX=/opt/grpc -DCMAKE_CXX_VISIBILITY_PRESET=hidden && \
cmake --build build --target install && \ cmake --build build --target install && \
popd && \ popd && \

View File

@ -6,7 +6,7 @@ ARG NCCL_VERSION_ARG
# Environment # Environment
ENV DEBIAN_FRONTEND=noninteractive ENV DEBIAN_FRONTEND=noninteractive
SHELL ["/bin/bash", "-c"] # Use Bash as shell SHELL ["/bin/bash", "-c"]
# Install all basic requirements # Install all basic requirements
RUN \ RUN \
@ -22,8 +22,9 @@ ENV PATH=/opt/miniforge/bin:$PATH
# Create new Conda environment with cuDF, Dask, and cuPy # Create new Conda environment with cuDF, Dask, and cuPy
RUN \ RUN \
export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \ export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \
export CUDA_SHORT_VER=$(echo "$CUDA_VERSION_ARG" | grep -o -E '[0-9]+\.[0-9]') && \
mamba create -y -n gpu_test -c rapidsai -c conda-forge -c nvidia \ mamba create -y -n gpu_test -c rapidsai -c conda-forge -c nvidia \
python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \ python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cuda-version=$CUDA_SHORT_VER \
"nccl>=${NCCL_SHORT_VER}" \ "nccl>=${NCCL_SHORT_VER}" \
dask \ dask \
dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \ dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \

View File

@ -36,27 +36,38 @@ ENV CUDAHOSTCXX=/opt/rh/gcc-toolset-10/root/usr/bin/c++
ENV GOSU_VERSION=1.10 ENV GOSU_VERSION=1.10
# Install RMM (Patch out -Werror) # Install gRPC
# Patch Abseil to apply https://github.com/abseil/abseil-cpp/issues/1629
RUN git clone -b v1.65.4 https://github.com/grpc/grpc.git \
--recurse-submodules --depth 1 && \
pushd grpc && \
pushd third_party/abseil-cpp && \
git fetch origin master && \
git cherry-pick -n cfde5f74e276049727f9556f13473a59fe77d9eb && \
popd && \
cmake -S . -B build -GNinja -DCMAKE_INSTALL_PREFIX=/opt/grpc -DCMAKE_CXX_VISIBILITY_PRESET=hidden && \
cmake --build build --target install && \
popd && \
rm -rf grpc
# Install RMM
# Patch out -Werror
# Patch CCCL 2.5.0 to apply https://github.com/NVIDIA/cccl/pull/1957
RUN git clone -b v${RAPIDS_VERSION_ARG}.00 https://github.com/rapidsai/rmm.git --recurse-submodules --depth 1 && \ RUN git clone -b v${RAPIDS_VERSION_ARG}.00 https://github.com/rapidsai/rmm.git --recurse-submodules --depth 1 && \
pushd rmm && \ pushd rmm && \
find . -name CMakeLists.txt -print0 | xargs -0 sed -i 's/-Werror//g' && \ find . -name CMakeLists.txt -print0 | xargs -0 sed -i 's/-Werror//g' && \
mkdir build && \ mkdir build && \
pushd build && \ pushd build && \
cmake .. -GNinja -DCMAKE_INSTALL_PREFIX=/opt/rmm -DCUDA_STATIC_RUNTIME=ON && \ cmake .. -GNinja -DCMAKE_INSTALL_PREFIX=/opt/rmm -DCUDA_STATIC_RUNTIME=ON && \
pushd _deps/cccl-src/ && \
git fetch origin main && \
git cherry-pick -n 9fcb32c228865f21f2b002b29d38a06b4c6fbd73 && \
popd && \
cmake --build . --target install && \ cmake --build . --target install && \
popd && \ popd && \
popd && \ popd && \
rm -rf rmm rm -rf rmm
# Install gRPC
RUN git clone -b v1.49.1 https://github.com/grpc/grpc.git \
--recurse-submodules --depth 1 && \
pushd grpc && \
cmake -S . -B build -GNinja -DCMAKE_INSTALL_PREFIX=/opt/grpc -DCMAKE_CXX_VISIBILITY_PRESET=hidden && \
cmake --build build --target install && \
popd && \
rm -rf grpc
# Install lightweight sudo (not bound to TTY) # Install lightweight sudo (not bound to TTY)
RUN set -ex; \ RUN set -ex; \
wget -nv -nc -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \ wget -nv -nc -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \

View File

@ -25,8 +25,9 @@ ENV PATH=/opt/miniforge/bin:$PATH
# Create new Conda environment with dev versions of cuDF, Dask, and cuPy # Create new Conda environment with dev versions of cuDF, Dask, and cuPy
RUN \ RUN \
export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \ export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \
export CUDA_SHORT_VER=$(echo "$CUDA_VERSION_ARG" | grep -o -E '[0-9]+\.[0-9]') && \
mamba create -y -n gpu_test -c rapidsai-nightly -c conda-forge -c nvidia \ mamba create -y -n gpu_test -c rapidsai-nightly -c conda-forge -c nvidia \
python=3.10 "cudf=$RAPIDS_VERSION_ARG.*" "rmm=$RAPIDS_VERSION_ARG.*" cudatoolkit=$CUDA_VERSION_ARG \ python=3.10 "cudf=$RAPIDS_VERSION_ARG.*" "rmm=$RAPIDS_VERSION_ARG.*" cuda-version=$CUDA_SHORT_VER \
"nccl>=${NCCL_SHORT_VER}" \ "nccl>=${NCCL_SHORT_VER}" \
dask \ dask \
"dask-cuda=$RAPIDS_VERSION_ARG.*" "dask-cudf=$RAPIDS_VERSION_ARG.*" cupy \ "dask-cuda=$RAPIDS_VERSION_ARG.*" "dask-cudf=$RAPIDS_VERSION_ARG.*" cupy \

View File

@ -56,6 +56,7 @@ case "$suite" in
set -x set -x
install_xgboost install_xgboost
setup_pyspark_envs setup_pyspark_envs
python -c 'from cupy.cuda import jitify; jitify._init_module()'
pytest -v -s -rxXs --fulltrace --durations=0 -m "not mgpu" ${args} tests/python-gpu pytest -v -s -rxXs --fulltrace --durations=0 -m "not mgpu" ${args} tests/python-gpu
unset_pyspark_envs unset_pyspark_envs
uninstall_xgboost uninstall_xgboost
@ -67,6 +68,7 @@ case "$suite" in
set -x set -x
install_xgboost install_xgboost
setup_pyspark_envs setup_pyspark_envs
python -c 'from cupy.cuda import jitify; jitify._init_module()'
pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/python-gpu pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/python-gpu
pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/test_distributed/test_gpu_with_dask pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/test_distributed/test_gpu_with_dask
pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/test_distributed/test_gpu_with_spark pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/test_distributed/test_gpu_with_spark