Fix quantile tests running on multi-gpus (#8775)

* Fix quantile tests running on multi-gpus

* Run some gtests with multiple GPUs

* fix mgpu test naming

* Instruct NCCL to print extra logs

* Allocate extra space in /dev/shm to enable NCCL

* use gtest_skip to skip mgpu tests

---------

Co-authored-by: Hyunsu Philip Cho <chohyu01@cs.washington.edu>
This commit is contained in:
Rong Ou
2023-02-12 17:00:26 -08:00
committed by GitHub
parent 225b3158f6
commit ed91e775ec
3 changed files with 50 additions and 32 deletions

View File

@@ -36,6 +36,11 @@ steps:
queue: linux-amd64-mgpu
- wait
#### -------- TEST --------
- label: ":console: Run Google Tests"
command: "tests/buildkite/test-cpp-mgpu.sh"
key: test-cpp-mgpu
agents:
queue: linux-amd64-mgpu
- label: ":console: Test Python package, 4 GPUs"
command: "tests/buildkite/test-python-gpu.sh mgpu"
key: test-python-mgpu

View File

@@ -0,0 +1,16 @@
#!/bin/bash
set -euo pipefail
source tests/buildkite/conftest.sh
# Allocate extra space in /dev/shm to enable NCCL
export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g'
echo "--- Run Google Tests with CUDA, using multiple GPUs"
buildkite-agent artifact download "build/testxgboost" . --step build-cuda
chmod +x build/testxgboost
tests/ci_build/ci_build.sh gpu nvidia-docker \
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
build/testxgboost --gtest_filter=*MGPU*