Fix quantile tests running on multi-gpus (#8775)
* Fix quantile tests running on multi-gpus * Run some gtests with multiple GPUs * fix mgpu test naming * Instruct NCCL to print extra logs * Allocate extra space in /dev/shm to enable NCCL * use gtest_skip to skip mgpu tests --------- Co-authored-by: Hyunsu Philip Cho <chohyu01@cs.washington.edu>
This commit is contained in:
@@ -36,6 +36,11 @@ steps:
|
||||
queue: linux-amd64-mgpu
|
||||
- wait
|
||||
#### -------- TEST --------
|
||||
- label: ":console: Run Google Tests"
|
||||
command: "tests/buildkite/test-cpp-mgpu.sh"
|
||||
key: test-cpp-mgpu
|
||||
agents:
|
||||
queue: linux-amd64-mgpu
|
||||
- label: ":console: Test Python package, 4 GPUs"
|
||||
command: "tests/buildkite/test-python-gpu.sh mgpu"
|
||||
key: test-python-mgpu
|
||||
|
||||
16
tests/buildkite/test-cpp-mgpu.sh
Executable file
16
tests/buildkite/test-cpp-mgpu.sh
Executable file
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
source tests/buildkite/conftest.sh
|
||||
|
||||
# Allocate extra space in /dev/shm to enable NCCL
|
||||
export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g'
|
||||
|
||||
echo "--- Run Google Tests with CUDA, using multiple GPUs"
|
||||
buildkite-agent artifact download "build/testxgboost" . --step build-cuda
|
||||
chmod +x build/testxgboost
|
||||
tests/ci_build/ci_build.sh gpu nvidia-docker \
|
||||
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
|
||||
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
|
||||
build/testxgboost --gtest_filter=*MGPU*
|
||||
Reference in New Issue
Block a user