RMM integration plugin (#5873)

* [CI] Add RMM as an optional dependency

* Replace caching allocator with pool allocator from RMM

* Revert "Replace caching allocator with pool allocator from RMM"

This reverts commit e15845d4e72e890c2babe31a988b26503a7d9038.

* Use rmm::mr::get_default_resource()

* Try setting default resource (doesn't work yet)

* Allocate pool_mr in the heap

* Prevent leaking pool_mr handle

* Separate EXPECT_DEATH() in separate test suite suffixed DeathTest

* Turn off death tests for RMM

* Address reviewer's feedback

* Prevent leaking of cuda_mr

* Fix Jenkinsfile syntax

* Remove unnecessary function in Jenkinsfile

* [CI] Install NCCL into RMM container

* Run Python tests

* Try building with RMM, CUDA 10.0

* Do not use RMM for CUDA 10.0 target

* Actually test for test_rmm flag

* Fix TestPythonGPU

* Use CNMeM allocator, since pool allocator doesn't yet support multiGPU

* Use 10.0 container to build RMM-enabled XGBoost

* Revert "Use 10.0 container to build RMM-enabled XGBoost"

This reverts commit 789021fa31112e25b683aef39fff375403060141.

* Fix Jenkinsfile

* [CI] Assign larger /dev/shm to NCCL

* Use 10.2 artifact to run multi-GPU Python tests

* Add CUDA 10.0 -> 11.0 cross-version test; remove CUDA 10.0 target

* Rename Conda env rmm_test -> gpu_test

* Use env var to opt into CNMeM pool for C++ tests

* Use identical CUDA version for RMM builds and tests

* Use Pytest fixtures to enable RMM pool in Python tests

* Move RMM to plugin/CMakeLists.txt; use PLUGIN_RMM

* Use per-device MR; use command arg in gtest

* Set CMake prefix path to use Conda env

* Use 0.15 nightly version of RMM

* Remove unnecessary header

* Fix a unit test when cudf is missing

* Add RMM demos

* Remove print()

* Use HostDeviceVector in GPU predictor

* Simplify pytest setup; use LocalCUDACluster fixture

* Address reviewers' commments

Co-authored-by: Hyunsu Cho <chohyu01@cs.wasshington.edu>
This commit is contained in:
Philip Hyunsu Cho
2020-08-12 01:26:02 -07:00
committed by GitHub
parent c3ea3b7e37
commit 9adb812a0a
26 changed files with 508 additions and 140 deletions

View File

@@ -97,11 +97,6 @@ TEST(Span, FromPtrLen) {
}
}
{
auto lazy = [=]() {Span<float const, 16> tmp (arr, 5);};
EXPECT_DEATH(lazy(), "\\[xgboost\\] Condition .* failed.\n");
}
// dynamic extent
{
Span<float, 16> s (arr, 16);
@@ -122,6 +117,15 @@ TEST(Span, FromPtrLen) {
}
}
TEST(SpanDeathTest, FromPtrLen) {
float arr[16];
InitializeRange(arr, arr+16);
{
auto lazy = [=]() {Span<float const, 16> tmp (arr, 5);};
EXPECT_DEATH(lazy(), "\\[xgboost\\] Condition .* failed.\n");
}
}
TEST(Span, FromFirstLast) {
float arr[16];
InitializeRange(arr, arr+16);
@@ -285,7 +289,13 @@ TEST(Span, ElementAccess) {
ASSERT_EQ(i, arr[j]);
++j;
}
}
TEST(SpanDeathTest, ElementAccess) {
float arr[16];
InitializeRange(arr, arr + 16);
Span<float> s (arr);
EXPECT_DEATH(s[16], "\\[xgboost\\] Condition .* failed.\n");
EXPECT_DEATH(s[-1], "\\[xgboost\\] Condition .* failed.\n");
@@ -312,7 +322,9 @@ TEST(Span, FrontBack) {
ASSERT_EQ(s.front(), 0);
ASSERT_EQ(s.back(), 3);
}
}
TEST(SpanDeathTest, FrontBack) {
{
Span<float, 0> s;
EXPECT_DEATH(s.front(), "\\[xgboost\\] Condition .* failed.\n");
@@ -340,10 +352,6 @@ TEST(Span, FirstLast) {
for (size_t i = 0; i < first.size(); ++i) {
ASSERT_EQ(first[i], arr[i]);
}
auto constexpr kOne = static_cast<Span<float, 4>::index_type>(-1);
EXPECT_DEATH(s.first<kOne>(), "\\[xgboost\\] Condition .* failed.\n");
EXPECT_DEATH(s.first<17>(), "\\[xgboost\\] Condition .* failed.\n");
EXPECT_DEATH(s.first<32>(), "\\[xgboost\\] Condition .* failed.\n");
}
{
@@ -359,10 +367,6 @@ TEST(Span, FirstLast) {
for (size_t i = 0; i < last.size(); ++i) {
ASSERT_EQ(last[i], arr[i+12]);
}
auto constexpr kOne = static_cast<Span<float, 4>::index_type>(-1);
EXPECT_DEATH(s.last<kOne>(), "\\[xgboost\\] Condition .* failed.\n");
EXPECT_DEATH(s.last<17>(), "\\[xgboost\\] Condition .* failed.\n");
EXPECT_DEATH(s.last<32>(), "\\[xgboost\\] Condition .* failed.\n");
}
// dynamic extent
@@ -379,10 +383,6 @@ TEST(Span, FirstLast) {
ASSERT_EQ(first[i], s[i]);
}
EXPECT_DEATH(s.first(-1), "\\[xgboost\\] Condition .* failed.\n");
EXPECT_DEATH(s.first(17), "\\[xgboost\\] Condition .* failed.\n");
EXPECT_DEATH(s.first(32), "\\[xgboost\\] Condition .* failed.\n");
delete [] arr;
}
@@ -399,6 +399,50 @@ TEST(Span, FirstLast) {
ASSERT_EQ(s[12 + i], last[i]);
}
delete [] arr;
}
}
TEST(SpanDeathTest, FirstLast) {
// static extent
{
float arr[16];
InitializeRange(arr, arr + 16);
Span<float> s (arr);
auto constexpr kOne = static_cast<Span<float, 4>::index_type>(-1);
EXPECT_DEATH(s.first<kOne>(), "\\[xgboost\\] Condition .* failed.\n");
EXPECT_DEATH(s.first<17>(), "\\[xgboost\\] Condition .* failed.\n");
EXPECT_DEATH(s.first<32>(), "\\[xgboost\\] Condition .* failed.\n");
}
{
float arr[16];
InitializeRange(arr, arr + 16);
Span<float> s (arr);
auto constexpr kOne = static_cast<Span<float, 4>::index_type>(-1);
EXPECT_DEATH(s.last<kOne>(), "\\[xgboost\\] Condition .* failed.\n");
EXPECT_DEATH(s.last<17>(), "\\[xgboost\\] Condition .* failed.\n");
EXPECT_DEATH(s.last<32>(), "\\[xgboost\\] Condition .* failed.\n");
}
// dynamic extent
{
float *arr = new float[16];
InitializeRange(arr, arr + 16);
Span<float> s (arr, 16);
EXPECT_DEATH(s.first(-1), "\\[xgboost\\] Condition .* failed.\n");
EXPECT_DEATH(s.first(17), "\\[xgboost\\] Condition .* failed.\n");
EXPECT_DEATH(s.first(32), "\\[xgboost\\] Condition .* failed.\n");
delete [] arr;
}
{
float *arr = new float[16];
InitializeRange(arr, arr + 16);
Span<float> s (arr, 16);
EXPECT_DEATH(s.last(-1), "\\[xgboost\\] Condition .* failed.\n");
EXPECT_DEATH(s.last(17), "\\[xgboost\\] Condition .* failed.\n");
EXPECT_DEATH(s.last(32), "\\[xgboost\\] Condition .* failed.\n");
@@ -420,7 +464,11 @@ TEST(Span, Subspan) {
auto s4 = s1.subspan(2, dynamic_extent);
ASSERT_EQ(s1.data() + 2, s4.data());
ASSERT_EQ(s4.size(), s1.size() - 2);
}
TEST(SpanDeathTest, Subspan) {
int arr[16] {0};
Span<int> s1 (arr);
EXPECT_DEATH(s1.subspan(-1, 0), "\\[xgboost\\] Condition .* failed.\n");
EXPECT_DEATH(s1.subspan(17, 0), "\\[xgboost\\] Condition .* failed.\n");

View File

@@ -221,7 +221,7 @@ struct TestElementAccess {
}
};
TEST(GPUSpan, ElementAccess) {
TEST(GPUSpanDeathTest, ElementAccess) {
dh::safe_cuda(cudaSetDevice(0));
auto test_element_access = []() {
thrust::host_vector<float> h_vec (16);

View File

@@ -59,7 +59,7 @@ TEST(Transform, DeclareUnifiedTest(Basic)) {
}
#if !defined(__CUDACC__)
TEST(Transform, Exception) {
TEST(TransformDeathTest, Exception) {
size_t const kSize {16};
std::vector<bst_float> h_in(kSize);
const HostDeviceVector<bst_float> in_vec{h_in, -1};