xgboost/tests/buildkite/test-python-gpu.sh
Jiaming Yuan 0715ab3c10
Use dlopen to load NCCL. (#9796)
This PR adds optional support for loading nccl with `dlopen` as an alternative of compile time linking. This is to address the size bloat issue with the PyPI binary release.
- Add CMake option to load `nccl` at runtime.
- Add an NCCL stub.

After this, `nccl` will be fetched from PyPI when using pip to install XGBoost, either by a user or by `pyproject.toml`. Others who want to link the nccl at compile time can continue to do so without any change.

At the moment, this is Linux only since we only support MNMG on Linux.
2023-11-22 19:27:31 +08:00

51 lines
1.3 KiB
Bash
Executable File

#!/bin/bash
set -euo pipefail
if [ "$#" -lt 1 ]
then
suite=''
args=''
else
suite=$1
shift 1
args="$@"
fi
source tests/buildkite/conftest.sh
echo "--- Fetch build artifacts"
buildkite-agent artifact download "python-package/dist/*.whl" . --step build-cuda
buildkite-agent artifact download "build/testxgboost" . --step build-cuda
chmod +x build/testxgboost
# Allocate extra space in /dev/shm to enable NCCL
export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g'
command_wrapper="tests/ci_build/ci_build.sh gpu nvidia-docker --build-arg "`
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION --build-arg "`
`"NCCL_VERSION_ARG=$NCCL_VERSION"
# Run specified test suite
case "$suite" in
gpu)
export BUILDKITE_ANALYTICS_TOKEN=$(get_aws_secret buildkite/test_analytics/gpu)
set_buildkite_env_vars_in_container
echo "--- Test XGBoost Python package, single GPU"
$command_wrapper tests/ci_build/test_python.sh $suite
;;
mgpu)
export BUILDKITE_ANALYTICS_TOKEN=$(get_aws_secret buildkite/test_analytics/mgpu)
set_buildkite_env_vars_in_container
echo "--- Test XGBoost Python package, 4 GPUs"
$command_wrapper tests/ci_build/test_python.sh $suite
;;
*)
echo "Usage: $0 {gpu|mgpu} [extra args to pass to pytest]"
exit 1
;;
esac