[breaking][CI] Use CTK 12.4 (#10697)
This commit is contained in:
parent
142bdc73ec
commit
cd83fe6033
@ -22,10 +22,10 @@ function set_buildkite_env_vars_in_container {
|
||||
|
||||
set -x
|
||||
|
||||
CUDA_VERSION=11.8.0
|
||||
NCCL_VERSION=2.16.5-1
|
||||
RAPIDS_VERSION=24.06
|
||||
DEV_RAPIDS_VERSION=24.08
|
||||
CUDA_VERSION=12.4.1
|
||||
NCCL_VERSION=2.22.3-1
|
||||
RAPIDS_VERSION=24.08
|
||||
DEV_RAPIDS_VERSION=24.10
|
||||
SPARK_VERSION=3.5.1
|
||||
JDK_VERSION=8
|
||||
R_VERSION=4.3.2
|
||||
|
||||
@ -2,7 +2,7 @@ FROM ubuntu:22.04
|
||||
|
||||
# Environment
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
SHELL ["/bin/bash", "-c"] # Use Bash as shell
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
@ -24,9 +24,14 @@ ENV GOSU_VERSION=1.10
|
||||
ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/
|
||||
|
||||
# Install gRPC
|
||||
RUN git clone -b v1.49.1 https://github.com/grpc/grpc.git \
|
||||
--recurse-submodules --depth 1 --shallow-submodules && \
|
||||
# Patch Abseil to apply https://github.com/abseil/abseil-cpp/issues/1629
|
||||
RUN git clone -b v1.65.4 https://github.com/grpc/grpc.git \
|
||||
--recurse-submodules --depth 1 && \
|
||||
pushd grpc && \
|
||||
pushd third_party/abseil-cpp && \
|
||||
git fetch origin master && \
|
||||
git cherry-pick -n cfde5f74e276049727f9556f13473a59fe77d9eb && \
|
||||
popd && \
|
||||
cmake -S . -B build -GNinja -DCMAKE_INSTALL_PREFIX=/opt/grpc -DCMAKE_CXX_VISIBILITY_PRESET=hidden && \
|
||||
cmake --build build --target install && \
|
||||
popd && \
|
||||
|
||||
@ -6,7 +6,7 @@ ARG NCCL_VERSION_ARG
|
||||
|
||||
# Environment
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
SHELL ["/bin/bash", "-c"] # Use Bash as shell
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
@ -22,8 +22,9 @@ ENV PATH=/opt/miniforge/bin:$PATH
|
||||
# Create new Conda environment with cuDF, Dask, and cuPy
|
||||
RUN \
|
||||
export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \
|
||||
export CUDA_SHORT_VER=$(echo "$CUDA_VERSION_ARG" | grep -o -E '[0-9]+\.[0-9]') && \
|
||||
mamba create -y -n gpu_test -c rapidsai -c conda-forge -c nvidia \
|
||||
python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
|
||||
python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cuda-version=$CUDA_SHORT_VER \
|
||||
"nccl>=${NCCL_SHORT_VER}" \
|
||||
dask \
|
||||
dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
|
||||
|
||||
@ -36,27 +36,38 @@ ENV CUDAHOSTCXX=/opt/rh/gcc-toolset-10/root/usr/bin/c++
|
||||
|
||||
ENV GOSU_VERSION=1.10
|
||||
|
||||
# Install RMM (Patch out -Werror)
|
||||
# Install gRPC
|
||||
# Patch Abseil to apply https://github.com/abseil/abseil-cpp/issues/1629
|
||||
RUN git clone -b v1.65.4 https://github.com/grpc/grpc.git \
|
||||
--recurse-submodules --depth 1 && \
|
||||
pushd grpc && \
|
||||
pushd third_party/abseil-cpp && \
|
||||
git fetch origin master && \
|
||||
git cherry-pick -n cfde5f74e276049727f9556f13473a59fe77d9eb && \
|
||||
popd && \
|
||||
cmake -S . -B build -GNinja -DCMAKE_INSTALL_PREFIX=/opt/grpc -DCMAKE_CXX_VISIBILITY_PRESET=hidden && \
|
||||
cmake --build build --target install && \
|
||||
popd && \
|
||||
rm -rf grpc
|
||||
|
||||
# Install RMM
|
||||
# Patch out -Werror
|
||||
# Patch CCCL 2.5.0 to apply https://github.com/NVIDIA/cccl/pull/1957
|
||||
RUN git clone -b v${RAPIDS_VERSION_ARG}.00 https://github.com/rapidsai/rmm.git --recurse-submodules --depth 1 && \
|
||||
pushd rmm && \
|
||||
find . -name CMakeLists.txt -print0 | xargs -0 sed -i 's/-Werror//g' && \
|
||||
mkdir build && \
|
||||
pushd build && \
|
||||
cmake .. -GNinja -DCMAKE_INSTALL_PREFIX=/opt/rmm -DCUDA_STATIC_RUNTIME=ON && \
|
||||
pushd _deps/cccl-src/ && \
|
||||
git fetch origin main && \
|
||||
git cherry-pick -n 9fcb32c228865f21f2b002b29d38a06b4c6fbd73 && \
|
||||
popd && \
|
||||
cmake --build . --target install && \
|
||||
popd && \
|
||||
popd && \
|
||||
rm -rf rmm
|
||||
|
||||
# Install gRPC
|
||||
RUN git clone -b v1.49.1 https://github.com/grpc/grpc.git \
|
||||
--recurse-submodules --depth 1 && \
|
||||
pushd grpc && \
|
||||
cmake -S . -B build -GNinja -DCMAKE_INSTALL_PREFIX=/opt/grpc -DCMAKE_CXX_VISIBILITY_PRESET=hidden && \
|
||||
cmake --build build --target install && \
|
||||
popd && \
|
||||
rm -rf grpc
|
||||
|
||||
# Install lightweight sudo (not bound to TTY)
|
||||
RUN set -ex; \
|
||||
wget -nv -nc -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
|
||||
|
||||
@ -25,8 +25,9 @@ ENV PATH=/opt/miniforge/bin:$PATH
|
||||
# Create new Conda environment with dev versions of cuDF, Dask, and cuPy
|
||||
RUN \
|
||||
export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \
|
||||
export CUDA_SHORT_VER=$(echo "$CUDA_VERSION_ARG" | grep -o -E '[0-9]+\.[0-9]') && \
|
||||
mamba create -y -n gpu_test -c rapidsai-nightly -c conda-forge -c nvidia \
|
||||
python=3.10 "cudf=$RAPIDS_VERSION_ARG.*" "rmm=$RAPIDS_VERSION_ARG.*" cudatoolkit=$CUDA_VERSION_ARG \
|
||||
python=3.10 "cudf=$RAPIDS_VERSION_ARG.*" "rmm=$RAPIDS_VERSION_ARG.*" cuda-version=$CUDA_SHORT_VER \
|
||||
"nccl>=${NCCL_SHORT_VER}" \
|
||||
dask \
|
||||
"dask-cuda=$RAPIDS_VERSION_ARG.*" "dask-cudf=$RAPIDS_VERSION_ARG.*" cupy \
|
||||
|
||||
@ -56,6 +56,7 @@ case "$suite" in
|
||||
set -x
|
||||
install_xgboost
|
||||
setup_pyspark_envs
|
||||
python -c 'from cupy.cuda import jitify; jitify._init_module()'
|
||||
pytest -v -s -rxXs --fulltrace --durations=0 -m "not mgpu" ${args} tests/python-gpu
|
||||
unset_pyspark_envs
|
||||
uninstall_xgboost
|
||||
@ -67,6 +68,7 @@ case "$suite" in
|
||||
set -x
|
||||
install_xgboost
|
||||
setup_pyspark_envs
|
||||
python -c 'from cupy.cuda import jitify; jitify._init_module()'
|
||||
pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/python-gpu
|
||||
pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/test_distributed/test_gpu_with_dask
|
||||
pytest -v -s -rxXs --fulltrace --durations=0 -m "mgpu" ${args} tests/test_distributed/test_gpu_with_spark
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user