diff --git a/tests/buildkite/build-containers.sh b/tests/buildkite/build-containers.sh index 9aec33d1f..1f8c587c9 100755 --- a/tests/buildkite/build-containers.sh +++ b/tests/buildkite/build-containers.sh @@ -20,16 +20,16 @@ case "${container}" in cpu) ;; - gpu) + gpu|gpu_build_centos7) BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION" BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION" BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION" ;; - gpu_build_centos7) + gpu_dev_ver) BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION" BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION" - BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION" + BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$DEV_RAPIDS_VERSION" ;; jvm_gpu_build) diff --git a/tests/buildkite/conftest.sh b/tests/buildkite/conftest.sh index 44043910b..ea7a5a00c 100755 --- a/tests/buildkite/conftest.sh +++ b/tests/buildkite/conftest.sh @@ -25,6 +25,7 @@ set -x CUDA_VERSION=11.8.0 NCCL_VERSION=2.16.5-1 RAPIDS_VERSION=24.04 +DEV_RAPIDS_VERSION=24.06 SPARK_VERSION=3.4.0 JDK_VERSION=8 R_VERSION=4.3.2 diff --git a/tests/buildkite/pipeline-nightly.yml b/tests/buildkite/pipeline-nightly.yml new file mode 100644 index 000000000..495d1b3e4 --- /dev/null +++ b/tests/buildkite/pipeline-nightly.yml @@ -0,0 +1,37 @@ +# Nightly CI pipeline, to test against dev versions of dependencies + +env: + DOCKER_CACHE_ECR_ID: "492475357299" + DOCKER_CACHE_ECR_REGION: "us-west-2" + DISABLE_RELEASE: "1" + # Skip uploading artifacts to S3 bucket + # Also, don't build all CUDA archs; just build sm_75 + USE_DEPS_DEV_VER: "1" + # Use dev versions of RAPIDS and other dependencies +steps: + #### -------- CONTAINER BUILD -------- + - label: ":docker: Build containers" + commands: + - "tests/buildkite/build-containers.sh gpu_build_centos7" + - "tests/buildkite/build-containers.sh gpu_dev_ver" + key: build-containers + agents: + queue: linux-amd64-cpu + - wait + + - label: ":console: Build CUDA" + command: "tests/buildkite/build-cuda.sh" + key: build-cuda + agents: + queue: linux-amd64-cpu + - wait + - label: ":console: Test Python package, single GPU" + command: "tests/buildkite/test-python-gpu.sh gpu" + key: test-python-gpu + agents: + queue: linux-amd64-gpu + - label: ":console: Test Python package, 4 GPUs" + command: "tests/buildkite/test-python-gpu.sh mgpu" + key: test-python-mgpu + agents: + queue: linux-amd64-mgpu diff --git a/tests/buildkite/test-python-gpu.sh b/tests/buildkite/test-python-gpu.sh index bb61a980d..d7bd729a2 100755 --- a/tests/buildkite/test-python-gpu.sh +++ b/tests/buildkite/test-python-gpu.sh @@ -22,10 +22,19 @@ chmod +x build/testxgboost # Allocate extra space in /dev/shm to enable NCCL export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g' -command_wrapper="tests/ci_build/ci_build.sh gpu --use-gpus --build-arg "` +if [[ -z "${USE_DEPS_DEV_VER-}" ]] +then + container_tag='gpu' + rapids_version=${RAPIDS_VERSION} +else + container_tag='gpu_dev_ver' + rapids_version=${DEV_RAPIDS_VERSION} +fi + +command_wrapper="tests/ci_build/ci_build.sh ${container_tag} --use-gpus --build-arg "` `"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "` - `"RAPIDS_VERSION_ARG=$RAPIDS_VERSION --build-arg "` - `"NCCL_VERSION_ARG=$NCCL_VERSION" + `"RAPIDS_VERSION_ARG=${rapids_version} --build-arg "` + `"NCCL_VERSION_ARG=$NCCL_VERSION" # Run specified test suite case "$suite" in diff --git a/tests/buildkite/update-rapids.sh b/tests/buildkite/update-rapids.sh index f617ccd11..f6a2675bd 100755 --- a/tests/buildkite/update-rapids.sh +++ b/tests/buildkite/update-rapids.sh @@ -4,7 +4,10 @@ set -euo pipefail LATEST_RAPIDS_VERSION=$(gh api repos/rapidsai/cuml/releases/latest --jq '.name' | sed -e 's/^v\([[:digit:]]\+\.[[:digit:]]\+\).*/\1/') echo "LATEST_RAPIDS_VERSION = $LATEST_RAPIDS_VERSION" +DEV_RAPIDS_VERSION=$(date +%Y-%m-%d -d "20${LATEST_RAPIDS_VERSION//./-}-01 + 2 month" | cut -c3-7 | tr - .) +echo "DEV_RAPIDS_VERSION = $DEV_RAPIDS_VERSION" PARENT_PATH=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P ) sed -i "s/^RAPIDS_VERSION=[[:digit:]]\+\.[[:digit:]]\+/RAPIDS_VERSION=${LATEST_RAPIDS_VERSION}/" $PARENT_PATH/conftest.sh +sed -i "s/^DEV_RAPIDS_VERSION=[[:digit:]]\+\.[[:digit:]]\+/DEV_RAPIDS_VERSION=${DEV_RAPIDS_VERSION}/" $PARENT_PATH/conftest.sh diff --git a/tests/ci_build/Dockerfile.aarch64 b/tests/ci_build/Dockerfile.aarch64 index 9b06e1c83..f028e635b 100644 --- a/tests/ci_build/Dockerfile.aarch64 +++ b/tests/ci_build/Dockerfile.aarch64 @@ -10,7 +10,7 @@ RUN \ yum update -y && \ yum install -y devtoolset-9 && \ # Python - wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-aarch64.sh && \ + wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-aarch64.sh && \ bash conda.sh -b -p /opt/mambaforge ENV PATH=/opt/mambaforge/bin:$PATH @@ -23,7 +23,7 @@ ENV GOSU_VERSION 1.10 COPY conda_env/aarch64_test.yml /scripts/ RUN mamba create -n aarch64_test && \ mamba env update -n aarch64_test --file=/scripts/aarch64_test.yml && \ - mamba clean --all + mamba clean --all --yes # Install lightweight sudo (not bound to TTY) RUN set -ex; \ diff --git a/tests/ci_build/Dockerfile.cpu b/tests/ci_build/Dockerfile.cpu index fa9ea772d..6ca574f51 100644 --- a/tests/ci_build/Dockerfile.cpu +++ b/tests/ci_build/Dockerfile.cpu @@ -12,7 +12,7 @@ RUN \ apt-get update && \ apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libidn12 cmake ninja-build gcc-9 g++-9 openjdk-8-jdk-headless && \ # Python - wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \ + wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \ bash conda.sh -b -p /opt/mambaforge ENV PATH=/opt/mambaforge/bin:$PATH @@ -36,7 +36,7 @@ RUN git clone -b v1.49.1 https://github.com/grpc/grpc.git \ COPY conda_env/linux_cpu_test.yml /scripts/ RUN mamba create -n linux_cpu_test && \ mamba env update -n linux_cpu_test --file=/scripts/linux_cpu_test.yml && \ - mamba clean --all && \ + mamba clean --all --yes && \ conda run --no-capture-output -n linux_cpu_test pip install buildkite-test-collector # Install lightweight sudo (not bound to TTY) diff --git a/tests/ci_build/Dockerfile.gpu b/tests/ci_build/Dockerfile.gpu index 255dd9d71..f68ba9d6b 100644 --- a/tests/ci_build/Dockerfile.gpu +++ b/tests/ci_build/Dockerfile.gpu @@ -14,7 +14,7 @@ RUN \ apt-get update && \ apt-get install -y wget unzip bzip2 libgomp1 build-essential openjdk-8-jdk-headless && \ # Python - wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \ + wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \ bash conda.sh -b -p /opt/mambaforge ENV PATH=/opt/mambaforge/bin:$PATH @@ -22,14 +22,14 @@ ENV PATH=/opt/mambaforge/bin:$PATH # Create new Conda environment with cuDF, Dask, and cuPy RUN \ export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \ - mamba create -y -n gpu_test -c rapidsai -c nvidia -c conda-forge \ + mamba create -y -n gpu_test -c rapidsai -c conda-forge -c nvidia \ python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \ "nccl>=${NCCL_SHORT_VER}" \ dask=2024.1.1 \ dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \ numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \ "pyspark>=3.4.0" cloudpickle cuda-python && \ - mamba clean --all && \ + mamba clean --all --yes && \ conda run --no-capture-output -n gpu_test pip install buildkite-test-collector ENV GOSU_VERSION 1.10 diff --git a/tests/ci_build/Dockerfile.gpu_build_centos7 b/tests/ci_build/Dockerfile.gpu_build_centos7 index 16445de2a..993402725 100644 --- a/tests/ci_build/Dockerfile.gpu_build_centos7 +++ b/tests/ci_build/Dockerfile.gpu_build_centos7 @@ -13,7 +13,7 @@ RUN \ yum -y update && \ yum install -y tar unzip wget xz git which ninja-build devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ && \ # Python - wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \ + wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \ bash conda.sh -b -p /opt/mambaforge && \ /opt/mambaforge/bin/python -m pip install awscli && \ # CMake diff --git a/tests/ci_build/Dockerfile.gpu_build_r_centos7 b/tests/ci_build/Dockerfile.gpu_build_r_centos7 index 7c95f09b5..bfe1cf221 100644 --- a/tests/ci_build/Dockerfile.gpu_build_r_centos7 +++ b/tests/ci_build/Dockerfile.gpu_build_r_centos7 @@ -35,7 +35,7 @@ RUN \ run \ # Python - wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \ + wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \ bash conda.sh -b -p /opt/mambaforge && \ /opt/mambaforge/bin/python -m pip install auditwheel awscli && \ # CMake diff --git a/tests/ci_build/Dockerfile.gpu_dev_ver b/tests/ci_build/Dockerfile.gpu_dev_ver new file mode 100644 index 000000000..a592d4891 --- /dev/null +++ b/tests/ci_build/Dockerfile.gpu_dev_ver @@ -0,0 +1,52 @@ +# Container to test XGBoost against dev versions of dependencies + +ARG CUDA_VERSION_ARG +FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu22.04 +ARG CUDA_VERSION_ARG +ARG RAPIDS_VERSION_ARG + # Should be first 4 digits of the dev version (e.g. 24.06) +ARG NCCL_VERSION_ARG + +# Environment +ENV DEBIAN_FRONTEND noninteractive +SHELL ["/bin/bash", "-c"] # Use Bash as shell + +# Install all basic requirements +RUN \ + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub && \ + apt-get update && \ + apt-get install -y wget unzip bzip2 libgomp1 build-essential openjdk-8-jdk-headless && \ + # Python + wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \ + bash conda.sh -b -p /opt/mambaforge + +ENV PATH=/opt/mambaforge/bin:$PATH + +# Create new Conda environment with dev versions of cuDF, Dask, and cuPy +RUN \ + export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \ + mamba create -y -n gpu_test -c rapidsai-nightly -c conda-forge -c nvidia \ + python=3.10 "cudf=$RAPIDS_VERSION_ARG.*" "rmm=$RAPIDS_VERSION_ARG.*" cudatoolkit=$CUDA_VERSION_ARG \ + "nccl>=${NCCL_SHORT_VER}" \ + dask=2024.1.1 \ + "dask-cuda=$RAPIDS_VERSION_ARG.*" "dask-cudf=$RAPIDS_VERSION_ARG.*" cupy \ + numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \ + "pyspark>=3.4.0" cloudpickle cuda-python && \ + mamba clean --all --yes && \ + conda run --no-capture-output -n gpu_test pip install buildkite-test-collector + +ENV GOSU_VERSION 1.10 +ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/ + +# Install lightweight sudo (not bound to TTY) +RUN set -ex; \ + wget -nv -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \ + chmod +x /usr/local/bin/gosu && \ + gosu nobody true + +# Default entry-point to use if running locally +# It will preserve attributes of created files +COPY entrypoint.sh /scripts/ + +WORKDIR /workspace +ENTRYPOINT ["/scripts/entrypoint.sh"] diff --git a/tests/ci_build/Dockerfile.jvm b/tests/ci_build/Dockerfile.jvm index a115fd52c..4f447cbba 100644 --- a/tests/ci_build/Dockerfile.jvm +++ b/tests/ci_build/Dockerfile.jvm @@ -9,7 +9,7 @@ RUN \ devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ \ devtoolset-9-runtime devtoolset-9-libstdc++-devel && \ # Python - wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \ + wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \ bash conda.sh -b -p /opt/mambaforge && \ # CMake wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \ diff --git a/tests/ci_build/Dockerfile.jvm_cross b/tests/ci_build/Dockerfile.jvm_cross index 43988872d..9ba7b6e69 100644 --- a/tests/ci_build/Dockerfile.jvm_cross +++ b/tests/ci_build/Dockerfile.jvm_cross @@ -13,7 +13,7 @@ RUN \ apt-get update && \ apt-get install -y tar unzip wget openjdk-$JDK_VERSION-jdk libgomp1 && \ # Python - wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \ + wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \ bash conda.sh -b -p /opt/mambaforge && \ /opt/mambaforge/bin/pip install awscli && \ # Maven diff --git a/tests/ci_build/Dockerfile.jvm_gpu_build b/tests/ci_build/Dockerfile.jvm_gpu_build index cee418942..7bd49b5a9 100644 --- a/tests/ci_build/Dockerfile.jvm_gpu_build +++ b/tests/ci_build/Dockerfile.jvm_gpu_build @@ -12,7 +12,7 @@ RUN \ yum -y update && \ yum install -y tar unzip wget xz git which ninja-build java-1.8.0-openjdk-devel devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ && \ # Python - wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \ + wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \ bash conda.sh -b -p /opt/mambaforge && \ # CMake wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \