RMM integration plugin (#5873)
* [CI] Add RMM as an optional dependency * Replace caching allocator with pool allocator from RMM * Revert "Replace caching allocator with pool allocator from RMM" This reverts commit e15845d4e72e890c2babe31a988b26503a7d9038. * Use rmm::mr::get_default_resource() * Try setting default resource (doesn't work yet) * Allocate pool_mr in the heap * Prevent leaking pool_mr handle * Separate EXPECT_DEATH() in separate test suite suffixed DeathTest * Turn off death tests for RMM * Address reviewer's feedback * Prevent leaking of cuda_mr * Fix Jenkinsfile syntax * Remove unnecessary function in Jenkinsfile * [CI] Install NCCL into RMM container * Run Python tests * Try building with RMM, CUDA 10.0 * Do not use RMM for CUDA 10.0 target * Actually test for test_rmm flag * Fix TestPythonGPU * Use CNMeM allocator, since pool allocator doesn't yet support multiGPU * Use 10.0 container to build RMM-enabled XGBoost * Revert "Use 10.0 container to build RMM-enabled XGBoost" This reverts commit 789021fa31112e25b683aef39fff375403060141. * Fix Jenkinsfile * [CI] Assign larger /dev/shm to NCCL * Use 10.2 artifact to run multi-GPU Python tests * Add CUDA 10.0 -> 11.0 cross-version test; remove CUDA 10.0 target * Rename Conda env rmm_test -> gpu_test * Use env var to opt into CNMeM pool for C++ tests * Use identical CUDA version for RMM builds and tests * Use Pytest fixtures to enable RMM pool in Python tests * Move RMM to plugin/CMakeLists.txt; use PLUGIN_RMM * Use per-device MR; use command arg in gtest * Set CMake prefix path to use Conda env * Use 0.15 nightly version of RMM * Remove unnecessary header * Fix a unit test when cudf is missing * Add RMM demos * Remove print() * Use HostDeviceVector in GPU predictor * Simplify pytest setup; use LocalCUDACluster fixture * Address reviewers' commments Co-authored-by: Hyunsu Cho <chohyu01@cs.wasshington.edu>
This commit is contained in:
committed by
GitHub
parent
c3ea3b7e37
commit
9adb812a0a
@@ -37,6 +37,8 @@ if (USE_CUDA)
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:${GEN_CODE}>)
|
||||
target_compile_definitions(testxgboost
|
||||
PRIVATE -DXGBOOST_USE_CUDA=1)
|
||||
find_package(CUDA)
|
||||
target_include_directories(testxgboost PRIVATE ${CUDA_INCLUDE_DIRS})
|
||||
set_target_properties(testxgboost PROPERTIES
|
||||
CUDA_SEPARABLE_COMPILATION OFF)
|
||||
|
||||
|
||||
@@ -97,11 +97,6 @@ TEST(Span, FromPtrLen) {
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
auto lazy = [=]() {Span<float const, 16> tmp (arr, 5);};
|
||||
EXPECT_DEATH(lazy(), "\\[xgboost\\] Condition .* failed.\n");
|
||||
}
|
||||
|
||||
// dynamic extent
|
||||
{
|
||||
Span<float, 16> s (arr, 16);
|
||||
@@ -122,6 +117,15 @@ TEST(Span, FromPtrLen) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SpanDeathTest, FromPtrLen) {
|
||||
float arr[16];
|
||||
InitializeRange(arr, arr+16);
|
||||
{
|
||||
auto lazy = [=]() {Span<float const, 16> tmp (arr, 5);};
|
||||
EXPECT_DEATH(lazy(), "\\[xgboost\\] Condition .* failed.\n");
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Span, FromFirstLast) {
|
||||
float arr[16];
|
||||
InitializeRange(arr, arr+16);
|
||||
@@ -285,7 +289,13 @@ TEST(Span, ElementAccess) {
|
||||
ASSERT_EQ(i, arr[j]);
|
||||
++j;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SpanDeathTest, ElementAccess) {
|
||||
float arr[16];
|
||||
InitializeRange(arr, arr + 16);
|
||||
|
||||
Span<float> s (arr);
|
||||
EXPECT_DEATH(s[16], "\\[xgboost\\] Condition .* failed.\n");
|
||||
EXPECT_DEATH(s[-1], "\\[xgboost\\] Condition .* failed.\n");
|
||||
|
||||
@@ -312,7 +322,9 @@ TEST(Span, FrontBack) {
|
||||
ASSERT_EQ(s.front(), 0);
|
||||
ASSERT_EQ(s.back(), 3);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SpanDeathTest, FrontBack) {
|
||||
{
|
||||
Span<float, 0> s;
|
||||
EXPECT_DEATH(s.front(), "\\[xgboost\\] Condition .* failed.\n");
|
||||
@@ -340,10 +352,6 @@ TEST(Span, FirstLast) {
|
||||
for (size_t i = 0; i < first.size(); ++i) {
|
||||
ASSERT_EQ(first[i], arr[i]);
|
||||
}
|
||||
auto constexpr kOne = static_cast<Span<float, 4>::index_type>(-1);
|
||||
EXPECT_DEATH(s.first<kOne>(), "\\[xgboost\\] Condition .* failed.\n");
|
||||
EXPECT_DEATH(s.first<17>(), "\\[xgboost\\] Condition .* failed.\n");
|
||||
EXPECT_DEATH(s.first<32>(), "\\[xgboost\\] Condition .* failed.\n");
|
||||
}
|
||||
|
||||
{
|
||||
@@ -359,10 +367,6 @@ TEST(Span, FirstLast) {
|
||||
for (size_t i = 0; i < last.size(); ++i) {
|
||||
ASSERT_EQ(last[i], arr[i+12]);
|
||||
}
|
||||
auto constexpr kOne = static_cast<Span<float, 4>::index_type>(-1);
|
||||
EXPECT_DEATH(s.last<kOne>(), "\\[xgboost\\] Condition .* failed.\n");
|
||||
EXPECT_DEATH(s.last<17>(), "\\[xgboost\\] Condition .* failed.\n");
|
||||
EXPECT_DEATH(s.last<32>(), "\\[xgboost\\] Condition .* failed.\n");
|
||||
}
|
||||
|
||||
// dynamic extent
|
||||
@@ -379,10 +383,6 @@ TEST(Span, FirstLast) {
|
||||
ASSERT_EQ(first[i], s[i]);
|
||||
}
|
||||
|
||||
EXPECT_DEATH(s.first(-1), "\\[xgboost\\] Condition .* failed.\n");
|
||||
EXPECT_DEATH(s.first(17), "\\[xgboost\\] Condition .* failed.\n");
|
||||
EXPECT_DEATH(s.first(32), "\\[xgboost\\] Condition .* failed.\n");
|
||||
|
||||
delete [] arr;
|
||||
}
|
||||
|
||||
@@ -399,6 +399,50 @@ TEST(Span, FirstLast) {
|
||||
ASSERT_EQ(s[12 + i], last[i]);
|
||||
}
|
||||
|
||||
delete [] arr;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SpanDeathTest, FirstLast) {
|
||||
// static extent
|
||||
{
|
||||
float arr[16];
|
||||
InitializeRange(arr, arr + 16);
|
||||
|
||||
Span<float> s (arr);
|
||||
auto constexpr kOne = static_cast<Span<float, 4>::index_type>(-1);
|
||||
EXPECT_DEATH(s.first<kOne>(), "\\[xgboost\\] Condition .* failed.\n");
|
||||
EXPECT_DEATH(s.first<17>(), "\\[xgboost\\] Condition .* failed.\n");
|
||||
EXPECT_DEATH(s.first<32>(), "\\[xgboost\\] Condition .* failed.\n");
|
||||
}
|
||||
|
||||
{
|
||||
float arr[16];
|
||||
InitializeRange(arr, arr + 16);
|
||||
|
||||
Span<float> s (arr);
|
||||
auto constexpr kOne = static_cast<Span<float, 4>::index_type>(-1);
|
||||
EXPECT_DEATH(s.last<kOne>(), "\\[xgboost\\] Condition .* failed.\n");
|
||||
EXPECT_DEATH(s.last<17>(), "\\[xgboost\\] Condition .* failed.\n");
|
||||
EXPECT_DEATH(s.last<32>(), "\\[xgboost\\] Condition .* failed.\n");
|
||||
}
|
||||
|
||||
// dynamic extent
|
||||
{
|
||||
float *arr = new float[16];
|
||||
InitializeRange(arr, arr + 16);
|
||||
Span<float> s (arr, 16);
|
||||
EXPECT_DEATH(s.first(-1), "\\[xgboost\\] Condition .* failed.\n");
|
||||
EXPECT_DEATH(s.first(17), "\\[xgboost\\] Condition .* failed.\n");
|
||||
EXPECT_DEATH(s.first(32), "\\[xgboost\\] Condition .* failed.\n");
|
||||
|
||||
delete [] arr;
|
||||
}
|
||||
|
||||
{
|
||||
float *arr = new float[16];
|
||||
InitializeRange(arr, arr + 16);
|
||||
Span<float> s (arr, 16);
|
||||
EXPECT_DEATH(s.last(-1), "\\[xgboost\\] Condition .* failed.\n");
|
||||
EXPECT_DEATH(s.last(17), "\\[xgboost\\] Condition .* failed.\n");
|
||||
EXPECT_DEATH(s.last(32), "\\[xgboost\\] Condition .* failed.\n");
|
||||
@@ -420,7 +464,11 @@ TEST(Span, Subspan) {
|
||||
auto s4 = s1.subspan(2, dynamic_extent);
|
||||
ASSERT_EQ(s1.data() + 2, s4.data());
|
||||
ASSERT_EQ(s4.size(), s1.size() - 2);
|
||||
}
|
||||
|
||||
TEST(SpanDeathTest, Subspan) {
|
||||
int arr[16] {0};
|
||||
Span<int> s1 (arr);
|
||||
EXPECT_DEATH(s1.subspan(-1, 0), "\\[xgboost\\] Condition .* failed.\n");
|
||||
EXPECT_DEATH(s1.subspan(17, 0), "\\[xgboost\\] Condition .* failed.\n");
|
||||
|
||||
|
||||
@@ -221,7 +221,7 @@ struct TestElementAccess {
|
||||
}
|
||||
};
|
||||
|
||||
TEST(GPUSpan, ElementAccess) {
|
||||
TEST(GPUSpanDeathTest, ElementAccess) {
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
auto test_element_access = []() {
|
||||
thrust::host_vector<float> h_vec (16);
|
||||
|
||||
@@ -59,7 +59,7 @@ TEST(Transform, DeclareUnifiedTest(Basic)) {
|
||||
}
|
||||
|
||||
#if !defined(__CUDACC__)
|
||||
TEST(Transform, Exception) {
|
||||
TEST(TransformDeathTest, Exception) {
|
||||
size_t const kSize {16};
|
||||
std::vector<bst_float> h_in(kSize);
|
||||
const HostDeviceVector<bst_float> in_vec{h_in, -1};
|
||||
|
||||
@@ -20,6 +20,15 @@
|
||||
#include "../../src/gbm/gbtree_model.h"
|
||||
#include "xgboost/predictor.h"
|
||||
|
||||
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
#include <vector>
|
||||
#include "rmm/mr/device/per_device_resource.hpp"
|
||||
#include "rmm/mr/device/cuda_memory_resource.hpp"
|
||||
#include "rmm/mr/device/pool_memory_resource.hpp"
|
||||
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
||||
|
||||
bool FileExists(const std::string& filename) {
|
||||
struct stat st;
|
||||
return stat(filename.c_str(), &st) == 0;
|
||||
@@ -478,4 +487,57 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(
|
||||
return gbm;
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
||||
|
||||
using CUDAMemoryResource = rmm::mr::cuda_memory_resource;
|
||||
using PoolMemoryResource = rmm::mr::pool_memory_resource<CUDAMemoryResource>;
|
||||
class RMMAllocator {
|
||||
public:
|
||||
std::vector<std::unique_ptr<CUDAMemoryResource>> cuda_mr;
|
||||
std::vector<std::unique_ptr<PoolMemoryResource>> pool_mr;
|
||||
int n_gpu;
|
||||
RMMAllocator() : n_gpu(common::AllVisibleGPUs()) {
|
||||
int current_device;
|
||||
CHECK_EQ(cudaGetDevice(¤t_device), cudaSuccess);
|
||||
for (int i = 0; i < n_gpu; ++i) {
|
||||
CHECK_EQ(cudaSetDevice(i), cudaSuccess);
|
||||
cuda_mr.push_back(std::make_unique<CUDAMemoryResource>());
|
||||
pool_mr.push_back(std::make_unique<PoolMemoryResource>(cuda_mr[i].get()));
|
||||
}
|
||||
CHECK_EQ(cudaSetDevice(current_device), cudaSuccess);
|
||||
}
|
||||
~RMMAllocator() = default;
|
||||
};
|
||||
|
||||
void DeleteRMMResource(RMMAllocator* r) {
|
||||
delete r;
|
||||
}
|
||||
|
||||
RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv) {
|
||||
bool use_rmm_pool = false;
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
if (argv[i] == std::string("--use-rmm-pool")) {
|
||||
use_rmm_pool = true;
|
||||
}
|
||||
}
|
||||
if (!use_rmm_pool) {
|
||||
return RMMAllocatorPtr(nullptr, DeleteRMMResource);
|
||||
}
|
||||
LOG(INFO) << "Using RMM memory pool";
|
||||
auto ptr = RMMAllocatorPtr(new RMMAllocator(), DeleteRMMResource);
|
||||
for (int i = 0; i < ptr->n_gpu; ++i) {
|
||||
rmm::mr::set_per_device_resource(rmm::cuda_device_id(i), ptr->pool_mr[i].get());
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
#else // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
||||
class RMMAllocator {};
|
||||
|
||||
void DeleteRMMResource(RMMAllocator* r) {}
|
||||
|
||||
RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv) {
|
||||
return RMMAllocatorPtr(nullptr, DeleteRMMResource);
|
||||
}
|
||||
#endif // !defined(XGBOOST_USE_RMM) || XGBOOST_USE_RMM != 1
|
||||
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include <fstream>
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
@@ -352,5 +353,9 @@ inline int Next(DataIterHandle self) {
|
||||
return static_cast<CudaArrayIterForTest*>(self)->Next();
|
||||
}
|
||||
|
||||
class RMMAllocator;
|
||||
using RMMAllocatorPtr = std::unique_ptr<RMMAllocator, void(*)(RMMAllocator*)>;
|
||||
RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv);
|
||||
|
||||
} // namespace xgboost
|
||||
#endif
|
||||
|
||||
@@ -3,13 +3,17 @@
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/logging.h>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "helpers.h"
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
xgboost::Args args {{"verbosity", "2"}};
|
||||
xgboost::ConsoleLogger::Configure(args);
|
||||
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
testing::FLAGS_gtest_death_test_style = "threadsafe";
|
||||
auto rmm_alloc = xgboost::SetUpRMMResourceForCppTests(argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
|
||||
@@ -119,7 +119,7 @@ void TestIncorrectRow() {
|
||||
});
|
||||
}
|
||||
|
||||
TEST(RowPartitioner, IncorrectRow) {
|
||||
TEST(RowPartitionerDeathTest, IncorrectRow) {
|
||||
ASSERT_DEATH({ TestIncorrectRow(); },".*");
|
||||
}
|
||||
} // namespace tree
|
||||
|
||||
Reference in New Issue
Block a user