[CI] Update machine images (#9932)

This commit is contained in:
Philip Hyunsu Cho 2023-12-29 11:15:38 -08:00 committed by GitHub
parent a7226c0222
commit ef8bdaa047
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 82 additions and 80 deletions

View File

@ -65,7 +65,10 @@ function(create_rlib_for_msvc)
execute_process(COMMAND ${DLLTOOL_EXE} execute_process(COMMAND ${DLLTOOL_EXE}
"--input-def" "${CMAKE_CURRENT_BINARY_DIR}/R.def" "--input-def" "${CMAKE_CURRENT_BINARY_DIR}/R.def"
"--output-lib" "${CMAKE_CURRENT_BINARY_DIR}/R.lib") "--output-lib" "${CMAKE_CURRENT_BINARY_DIR}/R.lib"
"--temp-prefix" "Rlibtemp"
COMMAND_ECHO STDOUT
COMMAND_ERROR_IS_FATAL ANY)
endfunction() endfunction()

View File

@ -59,7 +59,7 @@ For your convenience, we provide the wrapper script ``tests/ci_build/ci_build.sh
.. code-block:: bash .. code-block:: bash
tests/ci_build/ci_build.sh <CONTAINER_TYPE> <DOCKER_BINARY> --build-arg <BUILD_ARG> \ tests/ci_build/ci_build.sh <CONTAINER_TYPE> --use-gpus --build-arg <BUILD_ARG> \
<COMMAND> ... <COMMAND> ...
where: where:
@ -68,8 +68,7 @@ where:
container definition (Dockerfile) located at ``tests/ci_build/Dockerfile.<CONTAINER_TYPE>``. container definition (Dockerfile) located at ``tests/ci_build/Dockerfile.<CONTAINER_TYPE>``.
For example, setting the container type to ``gpu`` will cause the script to load the Dockerfile For example, setting the container type to ``gpu`` will cause the script to load the Dockerfile
``tests/ci_build/Dockerfile.gpu``. ``tests/ci_build/Dockerfile.gpu``.
* ``<DOCKER_BINARY>`` must be either ``docker`` or ``nvidia-docker``. Choose ``nvidia-docker`` * Specify ``--use-gpus`` to run any GPU code. This flag will grant the container access to all NVIDIA GPUs in the base machine. Omit the flag if the access to GPUs is not necessary.
as long as you need to run any GPU code.
* ``<BUILD_ARG>`` is a build argument to be passed to Docker. Must be of form ``VAR=VALUE``. * ``<BUILD_ARG>`` is a build argument to be passed to Docker. Must be of form ``VAR=VALUE``.
Example: ``--build-arg CUDA_VERSION_ARG=11.0``. You can pass multiple ``--build-arg``. Example: ``--build-arg CUDA_VERSION_ARG=11.0``. You can pass multiple ``--build-arg``.
* ``<COMMAND>`` is the command to run inside the Docker container. This can be more than one argument. * ``<COMMAND>`` is the command to run inside the Docker container. This can be more than one argument.
@ -83,7 +82,7 @@ arguments to Docker. For example:
# Allocate extra space in /dev/shm to enable NCCL # Allocate extra space in /dev/shm to enable NCCL
export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g' export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g'
# Run multi-GPU test suite # Run multi-GPU test suite
tests/ci_build/ci_build.sh gpu nvidia-docker --build-arg CUDA_VERSION_ARG=11.0 \ tests/ci_build/ci_build.sh gpu --use-gpus --build-arg CUDA_VERSION_ARG=11.0 \
tests/ci_build/test_python.sh mgpu tests/ci_build/test_python.sh mgpu
To pass multiple extra arguments: To pass multiple extra arguments:

View File

@ -22,6 +22,7 @@ case "${container}" in
gpu) gpu)
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION" BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION" BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
;; ;;
@ -43,4 +44,4 @@ case "${container}" in
esac esac
# Run a no-op command. This will simply build the container and push it to the private registry # Run a no-op command. This will simply build the container and push it to the private registry
tests/ci_build/ci_build.sh ${container} docker ${BUILD_ARGS} bash tests/ci_build/ci_build.sh ${container} ${BUILD_ARGS} bash

View File

@ -8,7 +8,7 @@ echo "--- Build CPU code targeting ARM64"
source tests/buildkite/conftest.sh source tests/buildkite/conftest.sh
command_wrapper="tests/ci_build/ci_build.sh aarch64 docker" command_wrapper="tests/ci_build/ci_build.sh aarch64"
echo "--- Build libxgboost from the source" echo "--- Build libxgboost from the source"
$command_wrapper tests/ci_build/build_via_cmake.sh --conda-env=aarch64_test \ $command_wrapper tests/ci_build/build_via_cmake.sh --conda-env=aarch64_test \

View File

@ -6,7 +6,7 @@ echo "--- Build CPU code"
source tests/buildkite/conftest.sh source tests/buildkite/conftest.sh
command_wrapper="tests/ci_build/ci_build.sh cpu docker" command_wrapper="tests/ci_build/ci_build.sh cpu"
$command_wrapper rm -fv dmlc-core/include/dmlc/build_config_default.h $command_wrapper rm -fv dmlc-core/include/dmlc/build_config_default.h
# This step is not necessary, but here we include it, to ensure that # This step is not necessary, but here we include it, to ensure that

View File

@ -15,7 +15,7 @@ else
arch_flag="" arch_flag=""
fi fi
command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg "` command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 --build-arg "`
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "` `"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
`"NCCL_VERSION_ARG=$NCCL_VERSION --build-arg "` `"NCCL_VERSION_ARG=$NCCL_VERSION --build-arg "`
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION" `"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
@ -40,13 +40,13 @@ $command_wrapper python tests/ci_build/rename_whl.py python-package/dist/*.whl \
${BUILDKITE_COMMIT} ${WHEEL_TAG} ${BUILDKITE_COMMIT} ${WHEEL_TAG}
echo "--- Audit binary wheel to ensure it's compliant with manylinux2014 standard" echo "--- Audit binary wheel to ensure it's compliant with manylinux2014 standard"
tests/ci_build/ci_build.sh auditwheel_x86_64 docker auditwheel repair \ tests/ci_build/ci_build.sh auditwheel_x86_64 auditwheel repair \
--plat ${WHEEL_TAG} python-package/dist/*.whl --plat ${WHEEL_TAG} python-package/dist/*.whl
$command_wrapper python tests/ci_build/rename_whl.py wheelhouse/*.whl \ $command_wrapper python tests/ci_build/rename_whl.py wheelhouse/*.whl \
${BUILDKITE_COMMIT} ${WHEEL_TAG} ${BUILDKITE_COMMIT} ${WHEEL_TAG}
mv -v wheelhouse/*.whl python-package/dist/ mv -v wheelhouse/*.whl python-package/dist/
# Make sure that libgomp.so is vendored in the wheel # Make sure that libgomp.so is vendored in the wheel
tests/ci_build/ci_build.sh auditwheel_x86_64 docker bash -c \ tests/ci_build/ci_build.sh auditwheel_x86_64 bash -c \
"unzip -l python-package/dist/*.whl | grep libgomp || exit -1" "unzip -l python-package/dist/*.whl | grep libgomp || exit -1"
echo "--- Upload Python wheel" echo "--- Upload Python wheel"

View File

@ -15,7 +15,7 @@ else
arch_flag="" arch_flag=""
fi fi
command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg "` command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 --build-arg "`
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "` `"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
`"NCCL_VERSION_ARG=$NCCL_VERSION --build-arg "` `"NCCL_VERSION_ARG=$NCCL_VERSION --build-arg "`
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION" `"RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
@ -39,13 +39,13 @@ $command_wrapper python tests/ci_build/rename_whl.py python-package/dist/*.whl \
${BUILDKITE_COMMIT} ${WHEEL_TAG} ${BUILDKITE_COMMIT} ${WHEEL_TAG}
echo "--- Audit binary wheel to ensure it's compliant with manylinux2014 standard" echo "--- Audit binary wheel to ensure it's compliant with manylinux2014 standard"
tests/ci_build/ci_build.sh auditwheel_x86_64 docker auditwheel repair \ tests/ci_build/ci_build.sh auditwheel_x86_64 auditwheel repair \
--plat ${WHEEL_TAG} python-package/dist/*.whl --plat ${WHEEL_TAG} python-package/dist/*.whl
$command_wrapper python tests/ci_build/rename_whl.py wheelhouse/*.whl \ $command_wrapper python tests/ci_build/rename_whl.py wheelhouse/*.whl \
${BUILDKITE_COMMIT} ${WHEEL_TAG} ${BUILDKITE_COMMIT} ${WHEEL_TAG}
mv -v wheelhouse/*.whl python-package/dist/ mv -v wheelhouse/*.whl python-package/dist/
# Make sure that libgomp.so is vendored in the wheel # Make sure that libgomp.so is vendored in the wheel
tests/ci_build/ci_build.sh auditwheel_x86_64 docker bash -c \ tests/ci_build/ci_build.sh auditwheel_x86_64 bash -c \
"unzip -l python-package/dist/*.whl | grep libgomp || exit -1" "unzip -l python-package/dist/*.whl | grep libgomp || exit -1"
echo "--- Upload Python wheel" echo "--- Upload Python wheel"

View File

@ -6,7 +6,7 @@ source tests/buildkite/conftest.sh
echo "--- Build XGBoost R package with CUDA" echo "--- Build XGBoost R package with CUDA"
tests/ci_build/ci_build.sh gpu_build_r_centos7 docker \ tests/ci_build/ci_build.sh gpu_build_r_centos7 \
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \ --build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
--build-arg R_VERSION_ARG=${R_VERSION} \ --build-arg R_VERSION_ARG=${R_VERSION} \
tests/ci_build/build_r_pkg_with_cuda.sh \ tests/ci_build/build_r_pkg_with_cuda.sh \

View File

@ -5,7 +5,7 @@ set -euo pipefail
source tests/buildkite/conftest.sh source tests/buildkite/conftest.sh
echo "--- Build JVM packages doc" echo "--- Build JVM packages doc"
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_doc.sh ${BRANCH_NAME} tests/ci_build/ci_build.sh jvm tests/ci_build/build_jvm_doc.sh ${BRANCH_NAME}
if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
then then
echo "--- Upload JVM packages doc" echo "--- Upload JVM packages doc"

View File

@ -13,7 +13,7 @@ else
arch_flag="" arch_flag=""
fi fi
tests/ci_build/ci_build.sh jvm_gpu_build nvidia-docker \ tests/ci_build/ci_build.sh jvm_gpu_build --use-gpus \
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \ --build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
--build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \ --build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \
tests/ci_build/build_jvm_packages.sh \ tests/ci_build/build_jvm_packages.sh \

View File

@ -5,13 +5,13 @@ set -euo pipefail
source tests/buildkite/conftest.sh source tests/buildkite/conftest.sh
echo "--- Build XGBoost JVM packages scala 2.12" echo "--- Build XGBoost JVM packages scala 2.12"
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \ tests/ci_build/ci_build.sh jvm tests/ci_build/build_jvm_packages.sh \
${SPARK_VERSION} ${SPARK_VERSION}
echo "--- Build XGBoost JVM packages scala 2.13" echo "--- Build XGBoost JVM packages scala 2.13"
tests/ci_build/ci_build.sh jvm docker tests/ci_build/build_jvm_packages.sh \ tests/ci_build/ci_build.sh jvm tests/ci_build/build_jvm_packages.sh \
${SPARK_VERSION} "" "" "true" ${SPARK_VERSION} "" "" "true"
echo "--- Stash XGBoost4J JARs" echo "--- Stash XGBoost4J JARs"

View File

@ -7,7 +7,7 @@ source tests/buildkite/conftest.sh
if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
then then
echo "--- Deploy JVM packages to xgboost-maven-repo S3 repo" echo "--- Deploy JVM packages to xgboost-maven-repo S3 repo"
tests/ci_build/ci_build.sh jvm_gpu_build docker \ tests/ci_build/ci_build.sh jvm_gpu_build \
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \ --build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
--build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \ --build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \
tests/ci_build/deploy_jvm_packages.sh ${SPARK_VERSION} tests/ci_build/deploy_jvm_packages.sh ${SPARK_VERSION}

View File

@ -63,7 +63,7 @@ def format_params(args, *, stack_id, agent_iam_policy):
params["BuildkiteAgentToken"] = args.agent_token params["BuildkiteAgentToken"] = args.agent_token
params["VpcId"] = default_vpc.id params["VpcId"] = default_vpc.id
params["Subnets"] = ",".join(subnets) params["Subnets"] = ",".join(subnets)
params["ManagedPolicyARN"] = agent_iam_policy params["ManagedPolicyARNs"] = agent_iam_policy
params.update(COMMON_STACK_PARAMS) params.update(COMMON_STACK_PARAMS)
return [{"ParameterKey": k, "ParameterValue": v} for k, v in params.items()] return [{"ParameterKey": k, "ParameterValue": v} for k, v in params.items()]

View File

@ -1,34 +1,34 @@
AMI_ID = { AMI_ID = {
# Managed by XGBoost team # Managed by XGBoost team
"linux-amd64-gpu": { "linux-amd64-gpu": {
"us-west-2": "ami-094271bed4788ddb5", "us-west-2": "ami-08c3bc1dd5ec8bc5c",
}, },
"linux-amd64-mgpu": { "linux-amd64-mgpu": {
"us-west-2": "ami-094271bed4788ddb5", "us-west-2": "ami-08c3bc1dd5ec8bc5c",
}, },
"windows-gpu": { "windows-gpu": {
"us-west-2": "ami-0839681594a1d7627", "us-west-2": "ami-03c7f2156f93b22a7",
}, },
"windows-cpu": { "windows-cpu": {
"us-west-2": "ami-0839681594a1d7627", "us-west-2": "ami-03c7f2156f93b22a7",
}, },
# Managed by BuildKite # Managed by BuildKite
# from https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml # from https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml
"linux-amd64-cpu": { "linux-amd64-cpu": {
"us-west-2": "ami-00f2127550cf03658", "us-west-2": "ami-015e64acb52b3e595",
}, },
"pipeline-loader": { "pipeline-loader": {
"us-west-2": "ami-00f2127550cf03658", "us-west-2": "ami-015e64acb52b3e595",
}, },
"linux-arm64-cpu": { "linux-arm64-cpu": {
"us-west-2": "ami-0c5789068f4a2d1b5", "us-west-2": "ami-0884e9c23a2fa98d0",
}, },
} }
STACK_PARAMS = { STACK_PARAMS = {
"linux-amd64-gpu": { "linux-amd64-gpu": {
"InstanceOperatingSystem": "linux", "InstanceOperatingSystem": "linux",
"InstanceType": "g4dn.xlarge", "InstanceTypes": "g4dn.xlarge",
"AgentsPerInstance": "1", "AgentsPerInstance": "1",
"MinSize": "0", "MinSize": "0",
"MaxSize": "8", "MaxSize": "8",
@ -38,7 +38,7 @@ STACK_PARAMS = {
}, },
"linux-amd64-mgpu": { "linux-amd64-mgpu": {
"InstanceOperatingSystem": "linux", "InstanceOperatingSystem": "linux",
"InstanceType": "g4dn.12xlarge", "InstanceTypes": "g4dn.12xlarge",
"AgentsPerInstance": "1", "AgentsPerInstance": "1",
"MinSize": "0", "MinSize": "0",
"MaxSize": "1", "MaxSize": "1",
@ -48,7 +48,7 @@ STACK_PARAMS = {
}, },
"windows-gpu": { "windows-gpu": {
"InstanceOperatingSystem": "windows", "InstanceOperatingSystem": "windows",
"InstanceType": "g4dn.2xlarge", "InstanceTypes": "g4dn.2xlarge",
"AgentsPerInstance": "1", "AgentsPerInstance": "1",
"MinSize": "0", "MinSize": "0",
"MaxSize": "2", "MaxSize": "2",
@ -58,7 +58,7 @@ STACK_PARAMS = {
}, },
"windows-cpu": { "windows-cpu": {
"InstanceOperatingSystem": "windows", "InstanceOperatingSystem": "windows",
"InstanceType": "c5a.2xlarge", "InstanceTypes": "c5a.2xlarge",
"AgentsPerInstance": "1", "AgentsPerInstance": "1",
"MinSize": "0", "MinSize": "0",
"MaxSize": "2", "MaxSize": "2",
@ -68,7 +68,7 @@ STACK_PARAMS = {
}, },
"linux-amd64-cpu": { "linux-amd64-cpu": {
"InstanceOperatingSystem": "linux", "InstanceOperatingSystem": "linux",
"InstanceType": "c5a.4xlarge", "InstanceTypes": "c5a.4xlarge",
"AgentsPerInstance": "1", "AgentsPerInstance": "1",
"MinSize": "0", "MinSize": "0",
"MaxSize": "16", "MaxSize": "16",
@ -78,7 +78,7 @@ STACK_PARAMS = {
}, },
"pipeline-loader": { "pipeline-loader": {
"InstanceOperatingSystem": "linux", "InstanceOperatingSystem": "linux",
"InstanceType": "t3a.micro", "InstanceTypes": "t3a.micro",
"AgentsPerInstance": "1", "AgentsPerInstance": "1",
"MinSize": "2", "MinSize": "2",
"MaxSize": "2", "MaxSize": "2",
@ -88,7 +88,7 @@ STACK_PARAMS = {
}, },
"linux-arm64-cpu": { "linux-arm64-cpu": {
"InstanceOperatingSystem": "linux", "InstanceOperatingSystem": "linux",
"InstanceType": "c6g.4xlarge", "InstanceTypes": "c6g.4xlarge",
"AgentsPerInstance": "1", "AgentsPerInstance": "1",
"MinSize": "0", "MinSize": "0",
"MaxSize": "8", "MaxSize": "8",

View File

@ -12,15 +12,13 @@ phases:
- | - |
yum groupinstall -y "Development tools" yum groupinstall -y "Development tools"
yum install -y kernel-devel-$(uname -r) yum install -y kernel-devel-$(uname -r)
dnf install -y kernel-modules-extra
aws s3 cp --recursive s3://ec2-linux-nvidia-drivers/latest/ . aws s3 cp --recursive s3://ec2-linux-nvidia-drivers/latest/ .
chmod +x NVIDIA-Linux-x86_64*.run chmod +x NVIDIA-Linux-x86_64*.run
CC=/usr/bin/gcc10-cc ./NVIDIA-Linux-x86_64*.run --silent ./NVIDIA-Linux-x86_64*.run --silent
amazon-linux-extras install docker curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | tee /etc/yum.repos.d/nvidia-container-toolkit.repo
systemctl --now enable docker yum install -y nvidia-container-toolkit
distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
&& curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.repo \
| sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
yum clean expire-cache yum clean expire-cache
yum install -y nvidia-docker2 nvidia-ctk runtime configure --runtime=docker
systemctl restart docker systemctl restart docker

View File

@ -15,9 +15,9 @@ phases:
choco --version choco --version
choco feature enable -n=allowGlobalConfirmation choco feature enable -n=allowGlobalConfirmation
# CMake 3.25 # CMake 3.27
Write-Host '>>> Installing CMake 3.25...' Write-Host '>>> Installing CMake 3.27...'
choco install cmake --version 3.25.2 --installargs "ADD_CMAKE_TO_PATH=System" choco install cmake --version 3.27.9 --installargs "ADD_CMAKE_TO_PATH=System"
if ($LASTEXITCODE -ne 0) { throw "Last command failed" } if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Notepad++ # Notepad++
@ -25,15 +25,14 @@ phases:
choco install notepadplusplus choco install notepadplusplus
if ($LASTEXITCODE -ne 0) { throw "Last command failed" } if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Miniconda # Mambaforge
Write-Host '>>> Installing Miniconda...' Write-Host '>>> Installing Mambaforge...'
choco install miniconda3 /RegisterPython:1 /D:C:\tools\miniconda3 choco install mambaforge /RegisterPython:1 /D:C:\tools\mambaforge
C:\tools\miniconda3\Scripts\conda.exe init --user --system C:\tools\mambaforge\Scripts\conda.exe init --user --system
if ($LASTEXITCODE -ne 0) { throw "Last command failed" } if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
. "C:\Windows\System32\WindowsPowerShell\v1.0\profile.ps1" . "C:\Windows\System32\WindowsPowerShell\v1.0\profile.ps1"
if ($LASTEXITCODE -ne 0) { throw "Last command failed" } if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
conda config --set auto_activate_base false conda config --set auto_activate_base false
conda config --prepend channels conda-forge
# Install Java 11 # Install Java 11
Write-Host '>>> Installing Java 11...' Write-Host '>>> Installing Java 11...'
@ -59,15 +58,9 @@ phases:
choco install cuda --version=11.8.0.52206 choco install cuda --version=11.8.0.52206
if ($LASTEXITCODE -ne 0) { throw "Last command failed" } if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Install Python packages
Write-Host '>>> Installing Python packages...'
conda activate
conda install -y mamba
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
# Install R # Install R
Write-Host '>>> Installing R...' Write-Host '>>> Installing R...'
choco install r.project --version=3.6.3 choco install r.project --version=4.3.2
if ($LASTEXITCODE -ne 0) { throw "Last command failed" } if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
choco install rtools --version=3.5.0.4 choco install rtools --version=4.3.5550
if ($LASTEXITCODE -ne 0) { throw "Last command failed" } if ($LASTEXITCODE -ne 0) { throw "Last command failed" }

View File

@ -6,6 +6,6 @@ echo "--- Run clang-tidy"
source tests/buildkite/conftest.sh source tests/buildkite/conftest.sh
tests/ci_build/ci_build.sh clang_tidy docker \ tests/ci_build/ci_build.sh clang_tidy \
--build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \ --build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \
python3 tests/ci_build/tidy.py --cuda-archs 75 python3 tests/ci_build/tidy.py --cuda-archs 75

View File

@ -7,7 +7,7 @@ source tests/buildkite/conftest.sh
echo "--- Run Google Tests with CUDA, using a GPU" echo "--- Run Google Tests with CUDA, using a GPU"
buildkite-agent artifact download "build/testxgboost" . --step build-cuda buildkite-agent artifact download "build/testxgboost" . --step build-cuda
chmod +x build/testxgboost chmod +x build/testxgboost
tests/ci_build/ci_build.sh gpu nvidia-docker \ tests/ci_build/ci_build.sh gpu --use-gpus \
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \ --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \ --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION \ --build-arg NCCL_VERSION_ARG=$NCCL_VERSION \
@ -17,7 +17,7 @@ echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"
rm -rfv build/ rm -rfv build/
buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
chmod +x build/testxgboost chmod +x build/testxgboost
tests/ci_build/ci_build.sh gpu nvidia-docker \ tests/ci_build/ci_build.sh gpu --use-gpus \
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \ --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \ --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION \ --build-arg NCCL_VERSION_ARG=$NCCL_VERSION \

View File

@ -10,7 +10,7 @@ export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g'
echo "--- Run Google Tests with CUDA, using multiple GPUs" echo "--- Run Google Tests with CUDA, using multiple GPUs"
buildkite-agent artifact download "build/testxgboost" . --step build-cuda buildkite-agent artifact download "build/testxgboost" . --step build-cuda
chmod +x build/testxgboost chmod +x build/testxgboost
tests/ci_build/ci_build.sh gpu nvidia-docker \ tests/ci_build/ci_build.sh gpu --use-gpus \
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \ --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \ --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
--build-arg NCCL_VERSION_ARG=$NCCL_VERSION \ --build-arg NCCL_VERSION_ARG=$NCCL_VERSION \

View File

@ -9,5 +9,5 @@ buildkite-agent artifact download "jvm-packages/xgboost4j/target/*.jar" . --step
buildkite-agent artifact download "jvm-packages/xgboost4j-spark/target/*.jar" . --step build-jvm-packages buildkite-agent artifact download "jvm-packages/xgboost4j-spark/target/*.jar" . --step build-jvm-packages
buildkite-agent artifact download "jvm-packages/xgboost4j-example/target/*.jar" . --step build-jvm-packages buildkite-agent artifact download "jvm-packages/xgboost4j-example/target/*.jar" . --step build-jvm-packages
export CI_DOCKER_EXTRA_PARAMS_INIT='-e RUN_INTEGRATION_TEST=1' export CI_DOCKER_EXTRA_PARAMS_INIT='-e RUN_INTEGRATION_TEST=1'
tests/ci_build/ci_build.sh jvm_cross docker --build-arg JDK_VERSION=${JDK_VERSION} \ tests/ci_build/ci_build.sh jvm_cross --build-arg JDK_VERSION=${JDK_VERSION} \
--build-arg SPARK_VERSION=${SPARK_VERSION} tests/ci_build/test_jvm_cross.sh --build-arg SPARK_VERSION=${SPARK_VERSION} tests/ci_build/test_jvm_cross.sh

View File

@ -8,4 +8,4 @@ echo "--- Test Python CPU ARM64"
buildkite-agent artifact download "python-package/dist/*.whl" . --step build-cpu-arm64 buildkite-agent artifact download "python-package/dist/*.whl" . --step build-cpu-arm64
buildkite-agent artifact download "xgboost" . --step build-cpu-arm64 buildkite-agent artifact download "xgboost" . --step build-cpu-arm64
chmod +x ./xgboost chmod +x ./xgboost
tests/ci_build/ci_build.sh aarch64 docker tests/ci_build/test_python.sh cpu-arm64 tests/ci_build/ci_build.sh aarch64 tests/ci_build/test_python.sh cpu-arm64

View File

@ -13,4 +13,4 @@ chmod +x ./xgboost
export BUILDKITE_ANALYTICS_TOKEN=$(get_aws_secret buildkite/test_analytics/cpu) export BUILDKITE_ANALYTICS_TOKEN=$(get_aws_secret buildkite/test_analytics/cpu)
set_buildkite_env_vars_in_container set_buildkite_env_vars_in_container
tests/ci_build/ci_build.sh cpu docker tests/ci_build/test_python.sh cpu tests/ci_build/ci_build.sh cpu tests/ci_build/test_python.sh cpu

View File

@ -22,7 +22,7 @@ chmod +x build/testxgboost
# Allocate extra space in /dev/shm to enable NCCL # Allocate extra space in /dev/shm to enable NCCL
export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g' export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g'
command_wrapper="tests/ci_build/ci_build.sh gpu nvidia-docker --build-arg "` command_wrapper="tests/ci_build/ci_build.sh gpu --use-gpus --build-arg "`
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "` `"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION --build-arg "` `"RAPIDS_VERSION_ARG=$RAPIDS_VERSION --build-arg "`
`"NCCL_VERSION_ARG=$NCCL_VERSION" `"NCCL_VERSION_ARG=$NCCL_VERSION"

View File

@ -18,7 +18,7 @@ mv xgboost/ xgboost_rpack/
mkdir build mkdir build
cd build cd build
cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON -DR_LIB=ON -DLIBR_HOME="c:\\Program Files\\R\\R-3.6.3" cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON -DR_LIB=ON -DLIBR_HOME="c:\\Program Files\\R\\R-4.3.2" -DCMAKE_PREFIX_PATH="C:\\rtools43\\x86_64-w64-mingw32.static.posix\\bin"
cmake --build . --config Release --parallel cmake --build . --config Release --parallel
cd .. cd ..
@ -32,5 +32,5 @@ cp -v lib/xgboost.dll xgboost_rpack/src/
echo 'all:' > xgboost_rpack/src/Makefile echo 'all:' > xgboost_rpack/src/Makefile
echo 'all:' > xgboost_rpack/src/Makefile.win echo 'all:' > xgboost_rpack/src/Makefile.win
mv xgboost_rpack/ xgboost/ mv xgboost_rpack/ xgboost/
/c/Rtools/bin/tar -cvf xgboost_r_gpu_win64_${commit_hash}.tar xgboost/ /c/Rtools43/usr/bin/tar -cvf xgboost_r_gpu_win64_${commit_hash}.tar xgboost/
/c/Rtools/bin/gzip -9c xgboost_r_gpu_win64_${commit_hash}.tar > xgboost_r_gpu_win64_${commit_hash}.tar.gz /c/Rtools43/usr/bin/gzip -9c xgboost_r_gpu_win64_${commit_hash}.tar > xgboost_r_gpu_win64_${commit_hash}.tar.gz

View File

@ -2,14 +2,14 @@
# #
# Execute command within a docker container # Execute command within a docker container
# #
# Usage: ci_build.sh <CONTAINER_TYPE> <DOCKER_BINARY> # Usage: ci_build.sh <CONTAINER_TYPE> [--use-gpus]
# [--dockerfile <DOCKERFILE_PATH>] [-it] # [--dockerfile <DOCKERFILE_PATH>] [-it]
# [--build-arg <BUILD_ARG>] <COMMAND> # [--build-arg <BUILD_ARG>] <COMMAND>
# #
# CONTAINER_TYPE: Type of the docker container used the run the build: e.g., # CONTAINER_TYPE: Type of the docker container used the run the build: e.g.,
# (cpu | gpu) # (cpu | gpu)
# #
# DOCKER_BINARY: Command to invoke docker, e.g. (docker | nvidia-docker). # --use-gpus: Whether to grant the container access to NVIDIA GPUs.
# #
# DOCKERFILE_PATH: (Optional) Path to the Dockerfile used for docker build. If # DOCKERFILE_PATH: (Optional) Path to the Dockerfile used for docker build. If
# this optional value is not supplied (via the --dockerfile # this optional value is not supplied (via the --dockerfile
@ -29,9 +29,12 @@ shift 1
DOCKERFILE_PATH="${SCRIPT_DIR}/Dockerfile.${CONTAINER_TYPE}" DOCKERFILE_PATH="${SCRIPT_DIR}/Dockerfile.${CONTAINER_TYPE}"
DOCKER_CONTEXT_PATH="${SCRIPT_DIR}" DOCKER_CONTEXT_PATH="${SCRIPT_DIR}"
# Get docker binary command (should be either docker or nvidia-docker) GPU_FLAG=''
DOCKER_BINARY="$1" if [[ "$1" == "--use-gpus" ]]; then
shift 1 echo "Using NVIDIA GPUs"
GPU_FLAG='--gpus all'
shift 1
fi
if [[ "$1" == "--dockerfile" ]]; then if [[ "$1" == "--dockerfile" ]]; then
DOCKERFILE_PATH="$2" DOCKERFILE_PATH="$2"
@ -144,21 +147,21 @@ then
DOCKER_CACHE_REPO="${DOCKER_CACHE_ECR_ID}.dkr.ecr.${DOCKER_CACHE_ECR_REGION}.amazonaws.com" DOCKER_CACHE_REPO="${DOCKER_CACHE_ECR_ID}.dkr.ecr.${DOCKER_CACHE_ECR_REGION}.amazonaws.com"
echo "Using AWS ECR; repo URL = ${DOCKER_CACHE_REPO}" echo "Using AWS ECR; repo URL = ${DOCKER_CACHE_REPO}"
# Login for Docker registry # Login for Docker registry
echo "\$(aws ecr get-login --no-include-email --region ${DOCKER_CACHE_ECR_REGION} --registry-ids ${DOCKER_CACHE_ECR_ID})" echo "aws ecr get-login-password --region ${DOCKER_CACHE_ECR_REGION} | docker login --username AWS --password-stdin ${DOCKER_CACHE_REPO}"
$(aws ecr get-login --no-include-email --region ${DOCKER_CACHE_ECR_REGION} --registry-ids ${DOCKER_CACHE_ECR_ID}) aws ecr get-login-password --region ${DOCKER_CACHE_ECR_REGION} | docker login --username AWS --password-stdin ${DOCKER_CACHE_REPO}
# Pull pre-build container from Docker build cache, # Pull pre-build container from Docker build cache,
# if one exists for the particular branch or pull request # if one exists for the particular branch or pull request
DOCKER_TAG="${BRANCH_NAME//\//-}" # Slashes are not allow in Docker tag DOCKER_TAG="${BRANCH_NAME//\//-}" # Slashes are not allow in Docker tag
echo "docker pull --quiet ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${DOCKER_TAG}" echo "docker pull --quiet ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${DOCKER_TAG}"
if time docker pull --quiet "${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${DOCKER_TAG}" if time docker pull --quiet "${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${DOCKER_TAG}"
then then
CACHE_FROM_CMD="--cache-from ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${DOCKER_TAG}" CACHE_FROM_CMD="--cache-from ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:${DOCKER_TAG} --build-arg BUILDKIT_INLINE_CACHE=1"
else else
# If the build cache is empty of the particular branch or pull request, # If the build cache is empty of the particular branch or pull request,
# use the build cache associated with the master branch # use the build cache associated with the master branch
echo "docker pull --quiet ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:master" echo "docker pull --quiet ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:master"
docker pull --quiet "${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:master" || true docker pull --quiet "${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:master" || true
CACHE_FROM_CMD="--cache-from ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:master" CACHE_FROM_CMD="--cache-from ${DOCKER_CACHE_REPO}/${DOCKER_IMG_NAME}:master --build-arg BUILDKIT_INLINE_CACHE=1"
fi fi
else else
CACHE_FROM_CMD='' CACHE_FROM_CMD=''
@ -166,11 +169,15 @@ fi
echo "docker build \ echo "docker build \
${CI_DOCKER_BUILD_ARG} \ ${CI_DOCKER_BUILD_ARG} \
--progress=plain \
--ulimit nofile=1024000:1024000 \
-t ${DOCKER_IMG_NAME} \ -t ${DOCKER_IMG_NAME} \
-f ${DOCKERFILE_PATH} ${DOCKER_CONTEXT_PATH} \ -f ${DOCKERFILE_PATH} ${DOCKER_CONTEXT_PATH} \
${CACHE_FROM_CMD}" ${CACHE_FROM_CMD}"
docker build \ docker build \
${CI_DOCKER_BUILD_ARG} \ ${CI_DOCKER_BUILD_ARG} \
--progress=plain \
--ulimit nofile=1024000:1024000 \
-t "${DOCKER_IMG_NAME}" \ -t "${DOCKER_IMG_NAME}" \
-f "${DOCKERFILE_PATH}" "${DOCKER_CONTEXT_PATH}" \ -f "${DOCKERFILE_PATH}" "${DOCKER_CONTEXT_PATH}" \
${CACHE_FROM_CMD} ${CACHE_FROM_CMD}
@ -231,7 +238,8 @@ echo "Running '${COMMAND[*]}' inside ${DOCKER_IMG_NAME}..."
# and share the PID namespace (--pid=host) so the process inside does not have # and share the PID namespace (--pid=host) so the process inside does not have
# pid 1 and SIGKILL is propagated to the process inside (jenkins can kill it). # pid 1 and SIGKILL is propagated to the process inside (jenkins can kill it).
set -x set -x
${DOCKER_BINARY} run --rm --pid=host \ docker run --rm --pid=host \
${GPU_FLAG} \
-v "${WORKSPACE}":/workspace \ -v "${WORKSPACE}":/workspace \
-w /workspace \ -w /workspace \
${USER_IDS} \ ${USER_IDS} \

View File

@ -165,7 +165,7 @@ TEST(SegmentedUnique, Regression) {
} }
} }
TEST(Allocator, OOM) { TEST(Allocator, DISABLED_OOM) {
auto size = dh::AvailableMemory(0) * 4; auto size = dh::AvailableMemory(0) * 4;
ASSERT_THROW({dh::caching_device_vector<char> vec(size);}, dmlc::Error); ASSERT_THROW({dh::caching_device_vector<char> vec(size);}, dmlc::Error);
ASSERT_THROW({dh::device_vector<char> vec(size);}, dmlc::Error); ASSERT_THROW({dh::device_vector<char> vec(size);}, dmlc::Error);