[CI] Add nightly CI job to test against dev version of deps (#10351)
* [CI] Add nightly CI job to test against dev version of deps * Update build-containers.sh * Add build step * Wait for build artifact * Try pinning dask * Address reviewers' comments * Fix unbound variable error * Specify dev version exactly * Pin dask=2024.1.1
This commit is contained in:
parent
eb6622ff7a
commit
4057f861c1
@ -20,16 +20,16 @@ case "${container}" in
|
||||
cpu)
|
||||
;;
|
||||
|
||||
gpu)
|
||||
gpu|gpu_build_centos7)
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
|
||||
;;
|
||||
|
||||
gpu_build_centos7)
|
||||
gpu_dev_ver)
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$DEV_RAPIDS_VERSION"
|
||||
;;
|
||||
|
||||
jvm_gpu_build)
|
||||
|
||||
@ -25,6 +25,7 @@ set -x
|
||||
CUDA_VERSION=11.8.0
|
||||
NCCL_VERSION=2.16.5-1
|
||||
RAPIDS_VERSION=24.04
|
||||
DEV_RAPIDS_VERSION=24.06
|
||||
SPARK_VERSION=3.4.0
|
||||
JDK_VERSION=8
|
||||
R_VERSION=4.3.2
|
||||
|
||||
37
tests/buildkite/pipeline-nightly.yml
Normal file
37
tests/buildkite/pipeline-nightly.yml
Normal file
@ -0,0 +1,37 @@
|
||||
# Nightly CI pipeline, to test against dev versions of dependencies
|
||||
|
||||
env:
|
||||
DOCKER_CACHE_ECR_ID: "492475357299"
|
||||
DOCKER_CACHE_ECR_REGION: "us-west-2"
|
||||
DISABLE_RELEASE: "1"
|
||||
# Skip uploading artifacts to S3 bucket
|
||||
# Also, don't build all CUDA archs; just build sm_75
|
||||
USE_DEPS_DEV_VER: "1"
|
||||
# Use dev versions of RAPIDS and other dependencies
|
||||
steps:
|
||||
#### -------- CONTAINER BUILD --------
|
||||
- label: ":docker: Build containers"
|
||||
commands:
|
||||
- "tests/buildkite/build-containers.sh gpu_build_centos7"
|
||||
- "tests/buildkite/build-containers.sh gpu_dev_ver"
|
||||
key: build-containers
|
||||
agents:
|
||||
queue: linux-amd64-cpu
|
||||
- wait
|
||||
|
||||
- label: ":console: Build CUDA"
|
||||
command: "tests/buildkite/build-cuda.sh"
|
||||
key: build-cuda
|
||||
agents:
|
||||
queue: linux-amd64-cpu
|
||||
- wait
|
||||
- label: ":console: Test Python package, single GPU"
|
||||
command: "tests/buildkite/test-python-gpu.sh gpu"
|
||||
key: test-python-gpu
|
||||
agents:
|
||||
queue: linux-amd64-gpu
|
||||
- label: ":console: Test Python package, 4 GPUs"
|
||||
command: "tests/buildkite/test-python-gpu.sh mgpu"
|
||||
key: test-python-mgpu
|
||||
agents:
|
||||
queue: linux-amd64-mgpu
|
||||
@ -22,10 +22,19 @@ chmod +x build/testxgboost
|
||||
# Allocate extra space in /dev/shm to enable NCCL
|
||||
export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g'
|
||||
|
||||
command_wrapper="tests/ci_build/ci_build.sh gpu --use-gpus --build-arg "`
|
||||
if [[ -z "${USE_DEPS_DEV_VER-}" ]]
|
||||
then
|
||||
container_tag='gpu'
|
||||
rapids_version=${RAPIDS_VERSION}
|
||||
else
|
||||
container_tag='gpu_dev_ver'
|
||||
rapids_version=${DEV_RAPIDS_VERSION}
|
||||
fi
|
||||
|
||||
command_wrapper="tests/ci_build/ci_build.sh ${container_tag} --use-gpus --build-arg "`
|
||||
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
|
||||
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION --build-arg "`
|
||||
`"NCCL_VERSION_ARG=$NCCL_VERSION"
|
||||
`"RAPIDS_VERSION_ARG=${rapids_version} --build-arg "`
|
||||
`"NCCL_VERSION_ARG=$NCCL_VERSION"
|
||||
|
||||
# Run specified test suite
|
||||
case "$suite" in
|
||||
|
||||
@ -4,7 +4,10 @@ set -euo pipefail
|
||||
|
||||
LATEST_RAPIDS_VERSION=$(gh api repos/rapidsai/cuml/releases/latest --jq '.name' | sed -e 's/^v\([[:digit:]]\+\.[[:digit:]]\+\).*/\1/')
|
||||
echo "LATEST_RAPIDS_VERSION = $LATEST_RAPIDS_VERSION"
|
||||
DEV_RAPIDS_VERSION=$(date +%Y-%m-%d -d "20${LATEST_RAPIDS_VERSION//./-}-01 + 2 month" | cut -c3-7 | tr - .)
|
||||
echo "DEV_RAPIDS_VERSION = $DEV_RAPIDS_VERSION"
|
||||
|
||||
PARENT_PATH=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P )
|
||||
|
||||
sed -i "s/^RAPIDS_VERSION=[[:digit:]]\+\.[[:digit:]]\+/RAPIDS_VERSION=${LATEST_RAPIDS_VERSION}/" $PARENT_PATH/conftest.sh
|
||||
sed -i "s/^DEV_RAPIDS_VERSION=[[:digit:]]\+\.[[:digit:]]\+/DEV_RAPIDS_VERSION=${DEV_RAPIDS_VERSION}/" $PARENT_PATH/conftest.sh
|
||||
|
||||
@ -10,7 +10,7 @@ RUN \
|
||||
yum update -y && \
|
||||
yum install -y devtoolset-9 && \
|
||||
# Python
|
||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-aarch64.sh && \
|
||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-aarch64.sh && \
|
||||
bash conda.sh -b -p /opt/mambaforge
|
||||
|
||||
ENV PATH=/opt/mambaforge/bin:$PATH
|
||||
@ -23,7 +23,7 @@ ENV GOSU_VERSION 1.10
|
||||
COPY conda_env/aarch64_test.yml /scripts/
|
||||
RUN mamba create -n aarch64_test && \
|
||||
mamba env update -n aarch64_test --file=/scripts/aarch64_test.yml && \
|
||||
mamba clean --all
|
||||
mamba clean --all --yes
|
||||
|
||||
# Install lightweight sudo (not bound to TTY)
|
||||
RUN set -ex; \
|
||||
|
||||
@ -12,7 +12,7 @@ RUN \
|
||||
apt-get update && \
|
||||
apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libidn12 cmake ninja-build gcc-9 g++-9 openjdk-8-jdk-headless && \
|
||||
# Python
|
||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
|
||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
|
||||
bash conda.sh -b -p /opt/mambaforge
|
||||
|
||||
ENV PATH=/opt/mambaforge/bin:$PATH
|
||||
@ -36,7 +36,7 @@ RUN git clone -b v1.49.1 https://github.com/grpc/grpc.git \
|
||||
COPY conda_env/linux_cpu_test.yml /scripts/
|
||||
RUN mamba create -n linux_cpu_test && \
|
||||
mamba env update -n linux_cpu_test --file=/scripts/linux_cpu_test.yml && \
|
||||
mamba clean --all && \
|
||||
mamba clean --all --yes && \
|
||||
conda run --no-capture-output -n linux_cpu_test pip install buildkite-test-collector
|
||||
|
||||
# Install lightweight sudo (not bound to TTY)
|
||||
|
||||
@ -14,7 +14,7 @@ RUN \
|
||||
apt-get update && \
|
||||
apt-get install -y wget unzip bzip2 libgomp1 build-essential openjdk-8-jdk-headless && \
|
||||
# Python
|
||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
|
||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
|
||||
bash conda.sh -b -p /opt/mambaforge
|
||||
|
||||
ENV PATH=/opt/mambaforge/bin:$PATH
|
||||
@ -22,14 +22,14 @@ ENV PATH=/opt/mambaforge/bin:$PATH
|
||||
# Create new Conda environment with cuDF, Dask, and cuPy
|
||||
RUN \
|
||||
export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \
|
||||
mamba create -y -n gpu_test -c rapidsai -c nvidia -c conda-forge \
|
||||
mamba create -y -n gpu_test -c rapidsai -c conda-forge -c nvidia \
|
||||
python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
|
||||
"nccl>=${NCCL_SHORT_VER}" \
|
||||
dask=2024.1.1 \
|
||||
dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
|
||||
numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
|
||||
"pyspark>=3.4.0" cloudpickle cuda-python && \
|
||||
mamba clean --all && \
|
||||
mamba clean --all --yes && \
|
||||
conda run --no-capture-output -n gpu_test pip install buildkite-test-collector
|
||||
|
||||
ENV GOSU_VERSION 1.10
|
||||
|
||||
@ -13,7 +13,7 @@ RUN \
|
||||
yum -y update && \
|
||||
yum install -y tar unzip wget xz git which ninja-build devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ && \
|
||||
# Python
|
||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
|
||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
|
||||
bash conda.sh -b -p /opt/mambaforge && \
|
||||
/opt/mambaforge/bin/python -m pip install awscli && \
|
||||
# CMake
|
||||
|
||||
@ -35,7 +35,7 @@ RUN \
|
||||
|
||||
run \
|
||||
# Python
|
||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
|
||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
|
||||
bash conda.sh -b -p /opt/mambaforge && \
|
||||
/opt/mambaforge/bin/python -m pip install auditwheel awscli && \
|
||||
# CMake
|
||||
|
||||
52
tests/ci_build/Dockerfile.gpu_dev_ver
Normal file
52
tests/ci_build/Dockerfile.gpu_dev_ver
Normal file
@ -0,0 +1,52 @@
|
||||
# Container to test XGBoost against dev versions of dependencies
|
||||
|
||||
ARG CUDA_VERSION_ARG
|
||||
FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu22.04
|
||||
ARG CUDA_VERSION_ARG
|
||||
ARG RAPIDS_VERSION_ARG
|
||||
# Should be first 4 digits of the dev version (e.g. 24.06)
|
||||
ARG NCCL_VERSION_ARG
|
||||
|
||||
# Environment
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
SHELL ["/bin/bash", "-c"] # Use Bash as shell
|
||||
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub && \
|
||||
apt-get update && \
|
||||
apt-get install -y wget unzip bzip2 libgomp1 build-essential openjdk-8-jdk-headless && \
|
||||
# Python
|
||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
|
||||
bash conda.sh -b -p /opt/mambaforge
|
||||
|
||||
ENV PATH=/opt/mambaforge/bin:$PATH
|
||||
|
||||
# Create new Conda environment with dev versions of cuDF, Dask, and cuPy
|
||||
RUN \
|
||||
export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \
|
||||
mamba create -y -n gpu_test -c rapidsai-nightly -c conda-forge -c nvidia \
|
||||
python=3.10 "cudf=$RAPIDS_VERSION_ARG.*" "rmm=$RAPIDS_VERSION_ARG.*" cudatoolkit=$CUDA_VERSION_ARG \
|
||||
"nccl>=${NCCL_SHORT_VER}" \
|
||||
dask=2024.1.1 \
|
||||
"dask-cuda=$RAPIDS_VERSION_ARG.*" "dask-cudf=$RAPIDS_VERSION_ARG.*" cupy \
|
||||
numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
|
||||
"pyspark>=3.4.0" cloudpickle cuda-python && \
|
||||
mamba clean --all --yes && \
|
||||
conda run --no-capture-output -n gpu_test pip install buildkite-test-collector
|
||||
|
||||
ENV GOSU_VERSION 1.10
|
||||
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
|
||||
|
||||
# Install lightweight sudo (not bound to TTY)
|
||||
RUN set -ex; \
|
||||
wget -nv -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
|
||||
chmod +x /usr/local/bin/gosu && \
|
||||
gosu nobody true
|
||||
|
||||
# Default entry-point to use if running locally
|
||||
# It will preserve attributes of created files
|
||||
COPY entrypoint.sh /scripts/
|
||||
|
||||
WORKDIR /workspace
|
||||
ENTRYPOINT ["/scripts/entrypoint.sh"]
|
||||
@ -9,7 +9,7 @@ RUN \
|
||||
devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ \
|
||||
devtoolset-9-runtime devtoolset-9-libstdc++-devel && \
|
||||
# Python
|
||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
|
||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
|
||||
bash conda.sh -b -p /opt/mambaforge && \
|
||||
# CMake
|
||||
wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \
|
||||
|
||||
@ -13,7 +13,7 @@ RUN \
|
||||
apt-get update && \
|
||||
apt-get install -y tar unzip wget openjdk-$JDK_VERSION-jdk libgomp1 && \
|
||||
# Python
|
||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
|
||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
|
||||
bash conda.sh -b -p /opt/mambaforge && \
|
||||
/opt/mambaforge/bin/pip install awscli && \
|
||||
# Maven
|
||||
|
||||
@ -12,7 +12,7 @@ RUN \
|
||||
yum -y update && \
|
||||
yum install -y tar unzip wget xz git which ninja-build java-1.8.0-openjdk-devel devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ && \
|
||||
# Python
|
||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
|
||||
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
|
||||
bash conda.sh -b -p /opt/mambaforge && \
|
||||
# CMake
|
||||
wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user