RMM integration plugin (#5873)

* [CI] Add RMM as an optional dependency * Replace caching allocator with pool allocator from RMM * Revert "Replace caching allocator with pool allocator from RMM" This reverts commit e15845d4e72e890c2babe31a988b26503a7d9038. * Use rmm::mr::get_default_resource() * Try setting default resource (doesn't work yet) * Allocate pool_mr in the heap * Prevent leaking pool_mr handle * Separate EXPECT_DEATH() in separate test suite suffixed DeathTest * Turn off death tests for RMM * Address reviewer's feedback * Prevent leaking of cuda_mr * Fix Jenkinsfile syntax * Remove unnecessary function in Jenkinsfile * [CI] Install NCCL into RMM container * Run Python tests * Try building with RMM, CUDA 10.0 * Do not use RMM for CUDA 10.0 target * Actually test for test_rmm flag * Fix TestPythonGPU * Use CNMeM allocator, since pool allocator doesn't yet support multiGPU * Use 10.0 container to build RMM-enabled XGBoost * Revert "Use 10.0 container to build RMM-enabled XGBoost" This reverts commit 789021fa31112e25b683aef39fff375403060141. * Fix Jenkinsfile * [CI] Assign larger /dev/shm to NCCL * Use 10.2 artifact to run multi-GPU Python tests * Add CUDA 10.0 -> 11.0 cross-version test; remove CUDA 10.0 target * Rename Conda env rmm_test -> gpu_test * Use env var to opt into CNMeM pool for C++ tests * Use identical CUDA version for RMM builds and tests * Use Pytest fixtures to enable RMM pool in Python tests * Move RMM to plugin/CMakeLists.txt; use PLUGIN_RMM * Use per-device MR; use command arg in gtest * Set CMake prefix path to use Conda env * Use 0.15 nightly version of RMM * Remove unnecessary header * Fix a unit test when cudf is missing * Add RMM demos * Remove print() * Use HostDeviceVector in GPU predictor * Simplify pytest setup; use LocalCUDACluster fixture * Address reviewers' commments Co-authored-by: Hyunsu Cho <chohyu01@cs.wasshington.edu>
2020-08-12 01:26:02 -07:00
parent c3ea3b7e37
commit 9adb812a0a
26 changed files with 508 additions and 140 deletions
--- a/57
+++ b/57
@@ -73,7 +73,7 @@ pipeline {
            'build-gpu-cuda10.0': { BuildCUDA(cuda_version: '10.0') },
            // The build-gpu-* builds below use Ubuntu image
            'build-gpu-cuda10.1': { BuildCUDA(cuda_version: '10.1') },
-            'build-gpu-cuda10.2': { BuildCUDA(cuda_version: '10.2') },
+            'build-gpu-cuda10.2': { BuildCUDA(cuda_version: '10.2', build_rmm: true) },
            'build-gpu-cuda11.0': { BuildCUDA(cuda_version: '11.0') },
            'build-jvm-packages-gpu-cuda10.0': { BuildJVMPackagesWithCUDA(spark_version: '3.0.0', cuda_version: '10.0') },
            'build-jvm-packages': { BuildJVMPackages(spark_version: '3.0.0') },
@@ -89,11 +89,12 @@ pipeline {
        script {
          parallel ([
            'test-python-cpu': { TestPythonCPU() },
-            'test-python-gpu-cuda10.2': { TestPythonGPU(host_cuda_version: '10.2') },
+            // artifact_cuda_version doesn't apply to RMM tests; RMM tests will always match CUDA version between artifact and host env
+            'test-python-gpu-cuda10.2': { TestPythonGPU(artifact_cuda_version: '10.0', host_cuda_version: '10.2', test_rmm: true) },
            'test-python-gpu-cuda11.0-cross': { TestPythonGPU(artifact_cuda_version: '10.0', host_cuda_version: '11.0') },
            'test-python-gpu-cuda11.0': { TestPythonGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') },
-            'test-python-mgpu-cuda10.2': { TestPythonGPU(artifact_cuda_version: '10.2', host_cuda_version: '10.2', multi_gpu: true) },
-            'test-cpp-gpu-cuda10.2': { TestCppGPU(artifact_cuda_version: '10.2', host_cuda_version: '10.2') },
+            'test-python-mgpu-cuda10.2': { TestPythonGPU(artifact_cuda_version: '10.0', host_cuda_version: '10.2', multi_gpu: true, test_rmm: true) },
+            'test-cpp-gpu-cuda10.2': { TestCppGPU(artifact_cuda_version: '10.2', host_cuda_version: '10.2', test_rmm: true) },
            'test-cpp-gpu-cuda11.0': { TestCppGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') },
            'test-jvm-jdk8-cuda10.0': { CrossTestJVMwithJDKGPU(artifact_cuda_version: '10.0', host_cuda_version: '10.0') },
            'test-jvm-jdk8': { CrossTestJVMwithJDK(jdk_version: '8', spark_version: '3.0.0') },
@@ -280,6 +281,22 @@ def BuildCUDA(args) {
    }
    echo 'Stashing C++ test executable (testxgboost)...'
    stash name: "xgboost_cpp_tests_cuda${args.cuda_version}", includes: 'build/testxgboost'
+    if (args.build_rmm) {
+      echo "Build with CUDA ${args.cuda_version} and RMM"
+      container_type = "rmm"
+      docker_binary = "docker"
+      docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}"
+      sh """
+      rm -rf build/
+      ${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/build_via_cmake.sh --conda-env=gpu_test -DUSE_CUDA=ON -DUSE_NCCL=ON -DPLUGIN_RMM=ON ${arch_flag}
+      ${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "cd python-package && rm -rf dist/* && python setup.py bdist_wheel --universal"
+      ${dockerRun} ${container_type} ${docker_binary} ${docker_args} python tests/ci_build/rename_whl.py python-package/dist/*.whl ${commit_id} manylinux2010_x86_64
+      """
+      echo 'Stashing Python wheel...'
+      stash name: "xgboost_whl_rmm_cuda${args.cuda_version}", includes: 'python-package/dist/*.whl'
+      echo 'Stashing C++ test executable (testxgboost)...'
+      stash name: "xgboost_cpp_tests_rmm_cuda${args.cuda_version}", includes: 'build/testxgboost'
+    }
    deleteDir()
  }
 }
@@ -366,18 +383,15 @@ def TestPythonGPU(args) {
    def container_type = "gpu"
    def docker_binary = "nvidia-docker"
    def docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}"
-    if (args.multi_gpu) {
-      echo "Using multiple GPUs"
-      // Allocate extra space in /dev/shm to enable NCCL
-      def docker_extra_params = "CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g'"
-      sh """
-      ${docker_extra_params} ${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/test_python.sh mgpu
-      """
-    } else {
-      echo "Using a single GPU"
-      sh """
-      ${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/test_python.sh gpu
-      """
+    def mgpu_indicator = (args.multi_gpu) ? 'mgpu' : 'gpu'
+    // Allocate extra space in /dev/shm to enable NCCL
+    def docker_extra_params = (args.multi_gpu) ? "CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g'" : ''
+    sh "${docker_extra_params} ${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/test_python.sh ${mgpu_indicator}"
+    if (args.test_rmm) {
+      sh "rm -rfv build/ python-package/dist/"
+      unstash name: "xgboost_whl_rmm_cuda${args.host_cuda_version}"
+      unstash name: "xgboost_cpp_tests_rmm_cuda${args.host_cuda_version}"
+      sh "${docker_extra_params} ${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/test_python.sh ${mgpu_indicator} --use-rmm-pool"
    }
    deleteDir()
  }
@@ -408,6 +422,17 @@ def TestCppGPU(args) {
    def docker_binary = "nvidia-docker"
    def docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}"
    sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} build/testxgboost"
+    if (args.test_rmm) {
+      sh "rm -rfv build/"
+      unstash name: "xgboost_cpp_tests_rmm_cuda${args.host_cuda_version}"
+      echo "Test C++, CUDA ${args.host_cuda_version} with RMM"
+      container_type = "rmm"
+      docker_binary = "nvidia-docker"
+      docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}"
+      sh """
+      ${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "source activate gpu_test && build/testxgboost --use-rmm-pool --gtest_filter=-*DeathTest.*"
+      """
+    }
    deleteDir()
  }
 }