Purge device_helpers.cuh (#5534)
* Simplifications with caching_device_vector * Purge device helpers
This commit is contained in:
@@ -27,64 +27,13 @@ void CreateTestData(xgboost::bst_uint num_rows, int max_row_size,
|
||||
}
|
||||
}
|
||||
|
||||
void TestLbs() {
|
||||
srand(17);
|
||||
dh::CubMemory temp_memory;
|
||||
|
||||
std::vector<int> test_rows = {4, 100, 1000};
|
||||
std::vector<int> test_max_row_sizes = {4, 100, 1300};
|
||||
|
||||
for (auto num_rows : test_rows) {
|
||||
for (auto max_row_size : test_max_row_sizes) {
|
||||
thrust::host_vector<int> h_row_ptr;
|
||||
thrust::host_vector<xgboost::bst_uint> h_rows;
|
||||
CreateTestData(num_rows, max_row_size, &h_row_ptr, &h_rows);
|
||||
thrust::device_vector<size_t> row_ptr = h_row_ptr;
|
||||
thrust::device_vector<int> output_row(h_rows.size());
|
||||
auto d_output_row = output_row.data();
|
||||
|
||||
dh::TransformLbs(0, &temp_memory, h_rows.size(), dh::Raw(row_ptr),
|
||||
row_ptr.size() - 1, false,
|
||||
[=] __device__(size_t idx, size_t ridx) {
|
||||
d_output_row[idx] = ridx;
|
||||
});
|
||||
|
||||
dh::safe_cuda(cudaDeviceSynchronize());
|
||||
ASSERT_TRUE(h_rows == output_row);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(CubLBS, Test) {
|
||||
TestLbs();
|
||||
}
|
||||
|
||||
TEST(SumReduce, Test) {
|
||||
thrust::device_vector<float> data(100, 1.0f);
|
||||
dh::CubMemory temp;
|
||||
auto sum = dh::SumReduction(&temp, dh::Raw(data), data.size());
|
||||
auto sum = dh::SumReduction(&temp, data.data().get(), data.size());
|
||||
ASSERT_NEAR(sum, 100.0f, 1e-5);
|
||||
}
|
||||
|
||||
void TestAllocator() {
|
||||
int n = 10;
|
||||
Span<float> a;
|
||||
Span<int> b;
|
||||
Span<size_t> c;
|
||||
dh::BulkAllocator ba;
|
||||
ba.Allocate(0, &a, n, &b, n, &c, n);
|
||||
|
||||
// Should be no illegal memory accesses
|
||||
dh::LaunchN(0, n, [=] __device__(size_t idx) { c[idx] = a[idx] + b[idx]; });
|
||||
|
||||
dh::safe_cuda(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
// Define the test in a function so we can use device lambda
|
||||
TEST(BulkAllocator, Test) {
|
||||
TestAllocator();
|
||||
}
|
||||
|
||||
template <typename T, typename Comp = thrust::less<T>>
|
||||
void TestUpperBoundImpl(const std::vector<T> &vec, T val_to_find,
|
||||
const Comp &comp = Comp()) {
|
||||
|
||||
@@ -23,7 +23,7 @@ Json GenerateDenseColumn(std::string const& typestr, size_t kRows,
|
||||
d_data.resize(kRows);
|
||||
thrust::sequence(thrust::device, d_data.begin(), d_data.end(), 0.0f, 2.0f);
|
||||
|
||||
auto p_d_data = dh::Raw(d_data);
|
||||
auto p_d_data = d_data.data().get();
|
||||
|
||||
std::vector<Json> j_data {
|
||||
Json(Integer(reinterpret_cast<Integer::Int>(p_d_data))),
|
||||
@@ -49,7 +49,7 @@ Json GenerateSparseColumn(std::string const& typestr, size_t kRows,
|
||||
d_data[i] = i * 2.0;
|
||||
}
|
||||
|
||||
auto p_d_data = dh::Raw(d_data);
|
||||
auto p_d_data = d_data.data().get();
|
||||
|
||||
std::vector<Json> j_data {
|
||||
Json(Integer(reinterpret_cast<Integer::Int>(p_d_data))),
|
||||
|
||||
@@ -25,7 +25,7 @@ std::string PrepareData(std::string typestr, thrust::device_vector<T>* out, cons
|
||||
column["version"] = Integer(static_cast<Integer::Int>(1));
|
||||
column["typestr"] = String(typestr);
|
||||
|
||||
auto p_d_data = dh::Raw(d_data);
|
||||
auto p_d_data = d_data.data().get();
|
||||
std::vector<Json> j_data {
|
||||
Json(Integer(reinterpret_cast<Integer::Int>(p_d_data))),
|
||||
Json(Boolean(false))};
|
||||
|
||||
@@ -24,37 +24,33 @@ namespace tree {
|
||||
|
||||
TEST(GpuHist, DeviceHistogram) {
|
||||
// Ensures that node allocates correctly after reaching `kStopGrowingSize`.
|
||||
dh::SaveCudaContext{
|
||||
[&]() {
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
constexpr size_t kNBins = 128;
|
||||
constexpr size_t kNNodes = 4;
|
||||
constexpr size_t kStopGrowing = kNNodes * kNBins * 2u;
|
||||
DeviceHistogram<GradientPairPrecise, kStopGrowing> histogram;
|
||||
histogram.Init(0, kNBins);
|
||||
for (size_t i = 0; i < kNNodes; ++i) {
|
||||
histogram.AllocateHistogram(i);
|
||||
}
|
||||
histogram.Reset();
|
||||
ASSERT_EQ(histogram.Data().size(), kStopGrowing);
|
||||
dh::safe_cuda(cudaSetDevice(0));
|
||||
constexpr size_t kNBins = 128;
|
||||
constexpr size_t kNNodes = 4;
|
||||
constexpr size_t kStopGrowing = kNNodes * kNBins * 2u;
|
||||
DeviceHistogram<GradientPairPrecise, kStopGrowing> histogram;
|
||||
histogram.Init(0, kNBins);
|
||||
for (size_t i = 0; i < kNNodes; ++i) {
|
||||
histogram.AllocateHistogram(i);
|
||||
}
|
||||
histogram.Reset();
|
||||
ASSERT_EQ(histogram.Data().size(), kStopGrowing);
|
||||
|
||||
// Use allocated memory but do not erase nidx_map.
|
||||
for (size_t i = 0; i < kNNodes; ++i) {
|
||||
histogram.AllocateHistogram(i);
|
||||
}
|
||||
for (size_t i = 0; i < kNNodes; ++i) {
|
||||
ASSERT_TRUE(histogram.HistogramExists(i));
|
||||
}
|
||||
// Use allocated memory but do not erase nidx_map.
|
||||
for (size_t i = 0; i < kNNodes; ++i) {
|
||||
histogram.AllocateHistogram(i);
|
||||
}
|
||||
for (size_t i = 0; i < kNNodes; ++i) {
|
||||
ASSERT_TRUE(histogram.HistogramExists(i));
|
||||
}
|
||||
|
||||
// Erase existing nidx_map.
|
||||
for (size_t i = kNNodes; i < kNNodes * 2; ++i) {
|
||||
histogram.AllocateHistogram(i);
|
||||
}
|
||||
for (size_t i = 0; i < kNNodes; ++i) {
|
||||
ASSERT_FALSE(histogram.HistogramExists(i));
|
||||
}
|
||||
}
|
||||
};
|
||||
// Erase existing nidx_map.
|
||||
for (size_t i = kNNodes; i < kNNodes * 2; ++i) {
|
||||
histogram.AllocateHistogram(i);
|
||||
}
|
||||
for (size_t i = 0; i < kNNodes; ++i) {
|
||||
ASSERT_FALSE(histogram.HistogramExists(i));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<GradientPairPrecise> GetHostHistGpair() {
|
||||
@@ -187,16 +183,14 @@ TEST(GpuHist, EvaluateSplits) {
|
||||
GPUHistMakerDevice<GradientPairPrecise>
|
||||
maker(0, page.get(), kNRows, param, kNCols, kNCols, true, batch_param);
|
||||
// Initialize GPUHistMakerDevice::node_sum_gradients
|
||||
maker.node_sum_gradients = {{6.4f, 12.8f}};
|
||||
maker.host_node_sum_gradients = {{6.4f, 12.8f}};
|
||||
|
||||
// Initialize GPUHistMakerDevice::cut
|
||||
auto cmat = GetHostCutMatrix();
|
||||
|
||||
// Copy cut matrix to device.
|
||||
page->Cuts() = cmat;
|
||||
maker.ba.Allocate(0, &(maker.monotone_constraints), kNCols);
|
||||
dh::CopyVectorToDeviceSpan(maker.monotone_constraints,
|
||||
param.monotone_constraints);
|
||||
maker.monotone_constraints = param.monotone_constraints;
|
||||
|
||||
// Initialize GPUHistMakerDevice::hist
|
||||
maker.hist.Init(0, (max_bins - 1) * kNCols);
|
||||
|
||||
Reference in New Issue
Block a user