* [CI] Add RMM as an optional dependency * Replace caching allocator with pool allocator from RMM * Revert "Replace caching allocator with pool allocator from RMM" This reverts commit e15845d4e72e890c2babe31a988b26503a7d9038. * Use rmm::mr::get_default_resource() * Try setting default resource (doesn't work yet) * Allocate pool_mr in the heap * Prevent leaking pool_mr handle * Separate EXPECT_DEATH() in separate test suite suffixed DeathTest * Turn off death tests for RMM * Address reviewer's feedback * Prevent leaking of cuda_mr * Fix Jenkinsfile syntax * Remove unnecessary function in Jenkinsfile * [CI] Install NCCL into RMM container * Run Python tests * Try building with RMM, CUDA 10.0 * Do not use RMM for CUDA 10.0 target * Actually test for test_rmm flag * Fix TestPythonGPU * Use CNMeM allocator, since pool allocator doesn't yet support multiGPU * Use 10.0 container to build RMM-enabled XGBoost * Revert "Use 10.0 container to build RMM-enabled XGBoost" This reverts commit 789021fa31112e25b683aef39fff375403060141. * Fix Jenkinsfile * [CI] Assign larger /dev/shm to NCCL * Use 10.2 artifact to run multi-GPU Python tests * Add CUDA 10.0 -> 11.0 cross-version test; remove CUDA 10.0 target * Rename Conda env rmm_test -> gpu_test * Use env var to opt into CNMeM pool for C++ tests * Use identical CUDA version for RMM builds and tests * Use Pytest fixtures to enable RMM pool in Python tests * Move RMM to plugin/CMakeLists.txt; use PLUGIN_RMM * Use per-device MR; use command arg in gtest * Set CMake prefix path to use Conda env * Use 0.15 nightly version of RMM * Remove unnecessary header * Fix a unit test when cudf is missing * Add RMM demos * Remove print() * Use HostDeviceVector in GPU predictor * Simplify pytest setup; use LocalCUDACluster fixture * Address reviewers' commments Co-authored-by: Hyunsu Cho <chohyu01@cs.wasshington.edu>
76 lines
1.9 KiB
C++
76 lines
1.9 KiB
C++
#include <gtest/gtest.h>
|
|
#include <xgboost/base.h>
|
|
#include <xgboost/span.h>
|
|
#include <xgboost/host_device_vector.h>
|
|
|
|
#include <vector>
|
|
|
|
#include "../../../src/common/transform.h"
|
|
#include "../helpers.h"
|
|
|
|
#if defined(__CUDACC__)
|
|
|
|
#define TRANSFORM_GPU 0
|
|
|
|
#else
|
|
|
|
#define TRANSFORM_GPU -1
|
|
|
|
#endif
|
|
|
|
template <typename Iter>
|
|
void InitializeRange(Iter _begin, Iter _end) {
|
|
float j = 0;
|
|
for (Iter i = _begin; i != _end; ++i, ++j) {
|
|
*i = j;
|
|
}
|
|
}
|
|
|
|
namespace xgboost {
|
|
namespace common {
|
|
|
|
template <typename T>
|
|
struct TestTransformRange {
|
|
void XGBOOST_DEVICE operator()(size_t _idx,
|
|
Span<bst_float> _out, Span<const bst_float> _in) {
|
|
_out[_idx] = _in[_idx];
|
|
}
|
|
};
|
|
|
|
TEST(Transform, DeclareUnifiedTest(Basic)) {
|
|
const size_t size {256};
|
|
std::vector<bst_float> h_in(size);
|
|
std::vector<bst_float> h_out(size);
|
|
InitializeRange(h_in.begin(), h_in.end());
|
|
std::vector<bst_float> h_sol(size);
|
|
InitializeRange(h_sol.begin(), h_sol.end());
|
|
|
|
const HostDeviceVector<bst_float> in_vec{h_in, TRANSFORM_GPU};
|
|
HostDeviceVector<bst_float> out_vec{h_out, TRANSFORM_GPU};
|
|
out_vec.Fill(0);
|
|
|
|
Transform<>::Init(TestTransformRange<bst_float>{},
|
|
Range{0, static_cast<Range::DifferenceType>(size)},
|
|
TRANSFORM_GPU)
|
|
.Eval(&out_vec, &in_vec);
|
|
std::vector<bst_float> res = out_vec.HostVector();
|
|
|
|
ASSERT_TRUE(std::equal(h_sol.begin(), h_sol.end(), res.begin()));
|
|
}
|
|
|
|
#if !defined(__CUDACC__)
|
|
TEST(TransformDeathTest, Exception) {
|
|
size_t const kSize {16};
|
|
std::vector<bst_float> h_in(kSize);
|
|
const HostDeviceVector<bst_float> in_vec{h_in, -1};
|
|
EXPECT_DEATH({
|
|
Transform<>::Init([](size_t idx, common::Span<float const> _in) { _in[idx + 1]; },
|
|
Range(0, static_cast<Range::DifferenceType>(kSize)), -1)
|
|
.Eval(&in_vec);
|
|
}, "");
|
|
}
|
|
#endif
|
|
|
|
} // namespace common
|
|
} // namespace xgboost
|