RMM integration plugin (#5873)
* [CI] Add RMM as an optional dependency * Replace caching allocator with pool allocator from RMM * Revert "Replace caching allocator with pool allocator from RMM" This reverts commit e15845d4e72e890c2babe31a988b26503a7d9038. * Use rmm::mr::get_default_resource() * Try setting default resource (doesn't work yet) * Allocate pool_mr in the heap * Prevent leaking pool_mr handle * Separate EXPECT_DEATH() in separate test suite suffixed DeathTest * Turn off death tests for RMM * Address reviewer's feedback * Prevent leaking of cuda_mr * Fix Jenkinsfile syntax * Remove unnecessary function in Jenkinsfile * [CI] Install NCCL into RMM container * Run Python tests * Try building with RMM, CUDA 10.0 * Do not use RMM for CUDA 10.0 target * Actually test for test_rmm flag * Fix TestPythonGPU * Use CNMeM allocator, since pool allocator doesn't yet support multiGPU * Use 10.0 container to build RMM-enabled XGBoost * Revert "Use 10.0 container to build RMM-enabled XGBoost" This reverts commit 789021fa31112e25b683aef39fff375403060141. * Fix Jenkinsfile * [CI] Assign larger /dev/shm to NCCL * Use 10.2 artifact to run multi-GPU Python tests * Add CUDA 10.0 -> 11.0 cross-version test; remove CUDA 10.0 target * Rename Conda env rmm_test -> gpu_test * Use env var to opt into CNMeM pool for C++ tests * Use identical CUDA version for RMM builds and tests * Use Pytest fixtures to enable RMM pool in Python tests * Move RMM to plugin/CMakeLists.txt; use PLUGIN_RMM * Use per-device MR; use command arg in gtest * Set CMake prefix path to use Conda env * Use 0.15 nightly version of RMM * Remove unnecessary header * Fix a unit test when cudf is missing * Add RMM demos * Remove print() * Use HostDeviceVector in GPU predictor * Simplify pytest setup; use LocalCUDACluster fixture * Address reviewers' commments Co-authored-by: Hyunsu Cho <chohyu01@cs.wasshington.edu>
This commit is contained in:
committed by
GitHub
parent
c3ea3b7e37
commit
9adb812a0a
45
tests/python-gpu/conftest.py
Normal file
45
tests/python-gpu/conftest.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import sys
|
||||
import pytest
|
||||
import logging
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import testing as tm # noqa
|
||||
|
||||
def has_rmm():
|
||||
try:
|
||||
import rmm
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
@pytest.fixture(scope='session', autouse=True)
|
||||
def setup_rmm_pool(request, pytestconfig):
|
||||
if pytestconfig.getoption('--use-rmm-pool'):
|
||||
if not has_rmm():
|
||||
raise ImportError('The --use-rmm-pool option requires the RMM package')
|
||||
import rmm
|
||||
from dask_cuda.utils import get_n_gpus
|
||||
rmm.reinitialize(pool_allocator=True, initial_pool_size=1024*1024*1024,
|
||||
devices=list(range(get_n_gpus())))
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
def local_cuda_cluster(request, pytestconfig):
|
||||
kwargs = {}
|
||||
if hasattr(request, 'param'):
|
||||
kwargs.update(request.param)
|
||||
if pytestconfig.getoption('--use-rmm-pool'):
|
||||
if not has_rmm():
|
||||
raise ImportError('The --use-rmm-pool option requires the RMM package')
|
||||
import rmm
|
||||
from dask_cuda.utils import get_n_gpus
|
||||
rmm.reinitialize()
|
||||
kwargs['rmm_pool_size'] = '2GB'
|
||||
if tm.no_dask_cuda()['condition']:
|
||||
raise ImportError('The local_cuda_cluster fixture requires dask_cuda package')
|
||||
from dask_cuda import LocalCUDACluster
|
||||
cluster = LocalCUDACluster(**kwargs)
|
||||
yield cluster
|
||||
cluster.close()
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption('--use-rmm-pool', action='store_true', default=False, help='Use RMM pool')
|
||||
Reference in New Issue
Block a user