[CI] Add nightly CI job to test against dev version of deps (#10351)

* [CI] Add nightly CI job to test against dev version of deps

* Update build-containers.sh

* Add build step

* Wait for build artifact

* Try pinning dask

* Address reviewers' comments

* Fix unbound variable error

* Specify dev version exactly

* Pin dask=2024.1.1
This commit is contained in:
Philip Hyunsu Cho 2024-06-03 19:28:55 -07:00 committed by GitHub
parent eb6622ff7a
commit 4057f861c1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 120 additions and 18 deletions

View File

@ -20,16 +20,16 @@ case "${container}" in
cpu) cpu)
;; ;;
gpu) gpu|gpu_build_centos7)
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION" BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION" BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION" BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
;; ;;
gpu_build_centos7) gpu_dev_ver)
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION" BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION" BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION" BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$DEV_RAPIDS_VERSION"
;; ;;
jvm_gpu_build) jvm_gpu_build)

View File

@ -25,6 +25,7 @@ set -x
CUDA_VERSION=11.8.0 CUDA_VERSION=11.8.0
NCCL_VERSION=2.16.5-1 NCCL_VERSION=2.16.5-1
RAPIDS_VERSION=24.04 RAPIDS_VERSION=24.04
DEV_RAPIDS_VERSION=24.06
SPARK_VERSION=3.4.0 SPARK_VERSION=3.4.0
JDK_VERSION=8 JDK_VERSION=8
R_VERSION=4.3.2 R_VERSION=4.3.2

View File

@ -0,0 +1,37 @@
# Nightly CI pipeline, to test against dev versions of dependencies
env:
DOCKER_CACHE_ECR_ID: "492475357299"
DOCKER_CACHE_ECR_REGION: "us-west-2"
DISABLE_RELEASE: "1"
# Skip uploading artifacts to S3 bucket
# Also, don't build all CUDA archs; just build sm_75
USE_DEPS_DEV_VER: "1"
# Use dev versions of RAPIDS and other dependencies
steps:
#### -------- CONTAINER BUILD --------
- label: ":docker: Build containers"
commands:
- "tests/buildkite/build-containers.sh gpu_build_centos7"
- "tests/buildkite/build-containers.sh gpu_dev_ver"
key: build-containers
agents:
queue: linux-amd64-cpu
- wait
- label: ":console: Build CUDA"
command: "tests/buildkite/build-cuda.sh"
key: build-cuda
agents:
queue: linux-amd64-cpu
- wait
- label: ":console: Test Python package, single GPU"
command: "tests/buildkite/test-python-gpu.sh gpu"
key: test-python-gpu
agents:
queue: linux-amd64-gpu
- label: ":console: Test Python package, 4 GPUs"
command: "tests/buildkite/test-python-gpu.sh mgpu"
key: test-python-mgpu
agents:
queue: linux-amd64-mgpu

View File

@ -22,9 +22,18 @@ chmod +x build/testxgboost
# Allocate extra space in /dev/shm to enable NCCL # Allocate extra space in /dev/shm to enable NCCL
export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g' export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g'
command_wrapper="tests/ci_build/ci_build.sh gpu --use-gpus --build-arg "` if [[ -z "${USE_DEPS_DEV_VER-}" ]]
then
container_tag='gpu'
rapids_version=${RAPIDS_VERSION}
else
container_tag='gpu_dev_ver'
rapids_version=${DEV_RAPIDS_VERSION}
fi
command_wrapper="tests/ci_build/ci_build.sh ${container_tag} --use-gpus --build-arg "`
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "` `"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION --build-arg "` `"RAPIDS_VERSION_ARG=${rapids_version} --build-arg "`
`"NCCL_VERSION_ARG=$NCCL_VERSION" `"NCCL_VERSION_ARG=$NCCL_VERSION"
# Run specified test suite # Run specified test suite

View File

@ -4,7 +4,10 @@ set -euo pipefail
LATEST_RAPIDS_VERSION=$(gh api repos/rapidsai/cuml/releases/latest --jq '.name' | sed -e 's/^v\([[:digit:]]\+\.[[:digit:]]\+\).*/\1/') LATEST_RAPIDS_VERSION=$(gh api repos/rapidsai/cuml/releases/latest --jq '.name' | sed -e 's/^v\([[:digit:]]\+\.[[:digit:]]\+\).*/\1/')
echo "LATEST_RAPIDS_VERSION = $LATEST_RAPIDS_VERSION" echo "LATEST_RAPIDS_VERSION = $LATEST_RAPIDS_VERSION"
DEV_RAPIDS_VERSION=$(date +%Y-%m-%d -d "20${LATEST_RAPIDS_VERSION//./-}-01 + 2 month" | cut -c3-7 | tr - .)
echo "DEV_RAPIDS_VERSION = $DEV_RAPIDS_VERSION"
PARENT_PATH=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P ) PARENT_PATH=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P )
sed -i "s/^RAPIDS_VERSION=[[:digit:]]\+\.[[:digit:]]\+/RAPIDS_VERSION=${LATEST_RAPIDS_VERSION}/" $PARENT_PATH/conftest.sh sed -i "s/^RAPIDS_VERSION=[[:digit:]]\+\.[[:digit:]]\+/RAPIDS_VERSION=${LATEST_RAPIDS_VERSION}/" $PARENT_PATH/conftest.sh
sed -i "s/^DEV_RAPIDS_VERSION=[[:digit:]]\+\.[[:digit:]]\+/DEV_RAPIDS_VERSION=${DEV_RAPIDS_VERSION}/" $PARENT_PATH/conftest.sh

View File

@ -10,7 +10,7 @@ RUN \
yum update -y && \ yum update -y && \
yum install -y devtoolset-9 && \ yum install -y devtoolset-9 && \
# Python # Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-aarch64.sh && \ wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-aarch64.sh && \
bash conda.sh -b -p /opt/mambaforge bash conda.sh -b -p /opt/mambaforge
ENV PATH=/opt/mambaforge/bin:$PATH ENV PATH=/opt/mambaforge/bin:$PATH
@ -23,7 +23,7 @@ ENV GOSU_VERSION 1.10
COPY conda_env/aarch64_test.yml /scripts/ COPY conda_env/aarch64_test.yml /scripts/
RUN mamba create -n aarch64_test && \ RUN mamba create -n aarch64_test && \
mamba env update -n aarch64_test --file=/scripts/aarch64_test.yml && \ mamba env update -n aarch64_test --file=/scripts/aarch64_test.yml && \
mamba clean --all mamba clean --all --yes
# Install lightweight sudo (not bound to TTY) # Install lightweight sudo (not bound to TTY)
RUN set -ex; \ RUN set -ex; \

View File

@ -12,7 +12,7 @@ RUN \
apt-get update && \ apt-get update && \
apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libidn12 cmake ninja-build gcc-9 g++-9 openjdk-8-jdk-headless && \ apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libidn12 cmake ninja-build gcc-9 g++-9 openjdk-8-jdk-headless && \
# Python # Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \ wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge bash conda.sh -b -p /opt/mambaforge
ENV PATH=/opt/mambaforge/bin:$PATH ENV PATH=/opt/mambaforge/bin:$PATH
@ -36,7 +36,7 @@ RUN git clone -b v1.49.1 https://github.com/grpc/grpc.git \
COPY conda_env/linux_cpu_test.yml /scripts/ COPY conda_env/linux_cpu_test.yml /scripts/
RUN mamba create -n linux_cpu_test && \ RUN mamba create -n linux_cpu_test && \
mamba env update -n linux_cpu_test --file=/scripts/linux_cpu_test.yml && \ mamba env update -n linux_cpu_test --file=/scripts/linux_cpu_test.yml && \
mamba clean --all && \ mamba clean --all --yes && \
conda run --no-capture-output -n linux_cpu_test pip install buildkite-test-collector conda run --no-capture-output -n linux_cpu_test pip install buildkite-test-collector
# Install lightweight sudo (not bound to TTY) # Install lightweight sudo (not bound to TTY)

View File

@ -14,7 +14,7 @@ RUN \
apt-get update && \ apt-get update && \
apt-get install -y wget unzip bzip2 libgomp1 build-essential openjdk-8-jdk-headless && \ apt-get install -y wget unzip bzip2 libgomp1 build-essential openjdk-8-jdk-headless && \
# Python # Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \ wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge bash conda.sh -b -p /opt/mambaforge
ENV PATH=/opt/mambaforge/bin:$PATH ENV PATH=/opt/mambaforge/bin:$PATH
@ -22,14 +22,14 @@ ENV PATH=/opt/mambaforge/bin:$PATH
# Create new Conda environment with cuDF, Dask, and cuPy # Create new Conda environment with cuDF, Dask, and cuPy
RUN \ RUN \
export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \ export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \
mamba create -y -n gpu_test -c rapidsai -c nvidia -c conda-forge \ mamba create -y -n gpu_test -c rapidsai -c conda-forge -c nvidia \
python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \ python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
"nccl>=${NCCL_SHORT_VER}" \ "nccl>=${NCCL_SHORT_VER}" \
dask=2024.1.1 \ dask=2024.1.1 \
dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \ dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \ numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
"pyspark>=3.4.0" cloudpickle cuda-python && \ "pyspark>=3.4.0" cloudpickle cuda-python && \
mamba clean --all && \ mamba clean --all --yes && \
conda run --no-capture-output -n gpu_test pip install buildkite-test-collector conda run --no-capture-output -n gpu_test pip install buildkite-test-collector
ENV GOSU_VERSION 1.10 ENV GOSU_VERSION 1.10

View File

@ -13,7 +13,7 @@ RUN \
yum -y update && \ yum -y update && \
yum install -y tar unzip wget xz git which ninja-build devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ && \ yum install -y tar unzip wget xz git which ninja-build devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ && \
# Python # Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \ wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge && \ bash conda.sh -b -p /opt/mambaforge && \
/opt/mambaforge/bin/python -m pip install awscli && \ /opt/mambaforge/bin/python -m pip install awscli && \
# CMake # CMake

View File

@ -35,7 +35,7 @@ RUN \
run \ run \
# Python # Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \ wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge && \ bash conda.sh -b -p /opt/mambaforge && \
/opt/mambaforge/bin/python -m pip install auditwheel awscli && \ /opt/mambaforge/bin/python -m pip install auditwheel awscli && \
# CMake # CMake

View File

@ -0,0 +1,52 @@
# Container to test XGBoost against dev versions of dependencies
ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu22.04
ARG CUDA_VERSION_ARG
ARG RAPIDS_VERSION_ARG
# Should be first 4 digits of the dev version (e.g. 24.06)
ARG NCCL_VERSION_ARG
# Environment
ENV DEBIAN_FRONTEND noninteractive
SHELL ["/bin/bash", "-c"] # Use Bash as shell
# Install all basic requirements
RUN \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub && \
apt-get update && \
apt-get install -y wget unzip bzip2 libgomp1 build-essential openjdk-8-jdk-headless && \
# Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge
ENV PATH=/opt/mambaforge/bin:$PATH
# Create new Conda environment with dev versions of cuDF, Dask, and cuPy
RUN \
export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \
mamba create -y -n gpu_test -c rapidsai-nightly -c conda-forge -c nvidia \
python=3.10 "cudf=$RAPIDS_VERSION_ARG.*" "rmm=$RAPIDS_VERSION_ARG.*" cudatoolkit=$CUDA_VERSION_ARG \
"nccl>=${NCCL_SHORT_VER}" \
dask=2024.1.1 \
"dask-cuda=$RAPIDS_VERSION_ARG.*" "dask-cudf=$RAPIDS_VERSION_ARG.*" cupy \
numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
"pyspark>=3.4.0" cloudpickle cuda-python && \
mamba clean --all --yes && \
conda run --no-capture-output -n gpu_test pip install buildkite-test-collector
ENV GOSU_VERSION 1.10
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
# Install lightweight sudo (not bound to TTY)
RUN set -ex; \
wget -nv -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
chmod +x /usr/local/bin/gosu && \
gosu nobody true
# Default entry-point to use if running locally
# It will preserve attributes of created files
COPY entrypoint.sh /scripts/
WORKDIR /workspace
ENTRYPOINT ["/scripts/entrypoint.sh"]

View File

@ -9,7 +9,7 @@ RUN \
devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ \ devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ \
devtoolset-9-runtime devtoolset-9-libstdc++-devel && \ devtoolset-9-runtime devtoolset-9-libstdc++-devel && \
# Python # Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \ wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge && \ bash conda.sh -b -p /opt/mambaforge && \
# CMake # CMake
wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \ wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \

View File

@ -13,7 +13,7 @@ RUN \
apt-get update && \ apt-get update && \
apt-get install -y tar unzip wget openjdk-$JDK_VERSION-jdk libgomp1 && \ apt-get install -y tar unzip wget openjdk-$JDK_VERSION-jdk libgomp1 && \
# Python # Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \ wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge && \ bash conda.sh -b -p /opt/mambaforge && \
/opt/mambaforge/bin/pip install awscli && \ /opt/mambaforge/bin/pip install awscli && \
# Maven # Maven

View File

@ -12,7 +12,7 @@ RUN \
yum -y update && \ yum -y update && \
yum install -y tar unzip wget xz git which ninja-build java-1.8.0-openjdk-devel devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ && \ yum install -y tar unzip wget xz git which ninja-build java-1.8.0-openjdk-devel devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ && \
# Python # Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \ wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge && \ bash conda.sh -b -p /opt/mambaforge && \
# CMake # CMake
wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \ wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \