Purge device_helpers.cuh (#5534)

* Simplifications with caching_device_vector

* Purge device helpers
This commit is contained in:
Rory Mitchell
2020-04-15 21:51:56 +12:00
committed by GitHub
parent a2f54963b6
commit ca4e05660e
9 changed files with 182 additions and 733 deletions

View File

@@ -27,64 +27,13 @@ void CreateTestData(xgboost::bst_uint num_rows, int max_row_size,
}
}
void TestLbs() {
srand(17);
dh::CubMemory temp_memory;
std::vector<int> test_rows = {4, 100, 1000};
std::vector<int> test_max_row_sizes = {4, 100, 1300};
for (auto num_rows : test_rows) {
for (auto max_row_size : test_max_row_sizes) {
thrust::host_vector<int> h_row_ptr;
thrust::host_vector<xgboost::bst_uint> h_rows;
CreateTestData(num_rows, max_row_size, &h_row_ptr, &h_rows);
thrust::device_vector<size_t> row_ptr = h_row_ptr;
thrust::device_vector<int> output_row(h_rows.size());
auto d_output_row = output_row.data();
dh::TransformLbs(0, &temp_memory, h_rows.size(), dh::Raw(row_ptr),
row_ptr.size() - 1, false,
[=] __device__(size_t idx, size_t ridx) {
d_output_row[idx] = ridx;
});
dh::safe_cuda(cudaDeviceSynchronize());
ASSERT_TRUE(h_rows == output_row);
}
}
}
TEST(CubLBS, Test) {
TestLbs();
}
TEST(SumReduce, Test) {
thrust::device_vector<float> data(100, 1.0f);
dh::CubMemory temp;
auto sum = dh::SumReduction(&temp, dh::Raw(data), data.size());
auto sum = dh::SumReduction(&temp, data.data().get(), data.size());
ASSERT_NEAR(sum, 100.0f, 1e-5);
}
void TestAllocator() {
int n = 10;
Span<float> a;
Span<int> b;
Span<size_t> c;
dh::BulkAllocator ba;
ba.Allocate(0, &a, n, &b, n, &c, n);
// Should be no illegal memory accesses
dh::LaunchN(0, n, [=] __device__(size_t idx) { c[idx] = a[idx] + b[idx]; });
dh::safe_cuda(cudaDeviceSynchronize());
}
// Define the test in a function so we can use device lambda
TEST(BulkAllocator, Test) {
TestAllocator();
}
template <typename T, typename Comp = thrust::less<T>>
void TestUpperBoundImpl(const std::vector<T> &vec, T val_to_find,
const Comp &comp = Comp()) {

View File

@@ -23,7 +23,7 @@ Json GenerateDenseColumn(std::string const& typestr, size_t kRows,
d_data.resize(kRows);
thrust::sequence(thrust::device, d_data.begin(), d_data.end(), 0.0f, 2.0f);
auto p_d_data = dh::Raw(d_data);
auto p_d_data = d_data.data().get();
std::vector<Json> j_data {
Json(Integer(reinterpret_cast<Integer::Int>(p_d_data))),
@@ -49,7 +49,7 @@ Json GenerateSparseColumn(std::string const& typestr, size_t kRows,
d_data[i] = i * 2.0;
}
auto p_d_data = dh::Raw(d_data);
auto p_d_data = d_data.data().get();
std::vector<Json> j_data {
Json(Integer(reinterpret_cast<Integer::Int>(p_d_data))),

View File

@@ -25,7 +25,7 @@ std::string PrepareData(std::string typestr, thrust::device_vector<T>* out, cons
column["version"] = Integer(static_cast<Integer::Int>(1));
column["typestr"] = String(typestr);
auto p_d_data = dh::Raw(d_data);
auto p_d_data = d_data.data().get();
std::vector<Json> j_data {
Json(Integer(reinterpret_cast<Integer::Int>(p_d_data))),
Json(Boolean(false))};

View File

@@ -24,37 +24,33 @@ namespace tree {
TEST(GpuHist, DeviceHistogram) {
// Ensures that node allocates correctly after reaching `kStopGrowingSize`.
dh::SaveCudaContext{
[&]() {
dh::safe_cuda(cudaSetDevice(0));
constexpr size_t kNBins = 128;
constexpr size_t kNNodes = 4;
constexpr size_t kStopGrowing = kNNodes * kNBins * 2u;
DeviceHistogram<GradientPairPrecise, kStopGrowing> histogram;
histogram.Init(0, kNBins);
for (size_t i = 0; i < kNNodes; ++i) {
histogram.AllocateHistogram(i);
}
histogram.Reset();
ASSERT_EQ(histogram.Data().size(), kStopGrowing);
dh::safe_cuda(cudaSetDevice(0));
constexpr size_t kNBins = 128;
constexpr size_t kNNodes = 4;
constexpr size_t kStopGrowing = kNNodes * kNBins * 2u;
DeviceHistogram<GradientPairPrecise, kStopGrowing> histogram;
histogram.Init(0, kNBins);
for (size_t i = 0; i < kNNodes; ++i) {
histogram.AllocateHistogram(i);
}
histogram.Reset();
ASSERT_EQ(histogram.Data().size(), kStopGrowing);
// Use allocated memory but do not erase nidx_map.
for (size_t i = 0; i < kNNodes; ++i) {
histogram.AllocateHistogram(i);
}
for (size_t i = 0; i < kNNodes; ++i) {
ASSERT_TRUE(histogram.HistogramExists(i));
}
// Use allocated memory but do not erase nidx_map.
for (size_t i = 0; i < kNNodes; ++i) {
histogram.AllocateHistogram(i);
}
for (size_t i = 0; i < kNNodes; ++i) {
ASSERT_TRUE(histogram.HistogramExists(i));
}
// Erase existing nidx_map.
for (size_t i = kNNodes; i < kNNodes * 2; ++i) {
histogram.AllocateHistogram(i);
}
for (size_t i = 0; i < kNNodes; ++i) {
ASSERT_FALSE(histogram.HistogramExists(i));
}
}
};
// Erase existing nidx_map.
for (size_t i = kNNodes; i < kNNodes * 2; ++i) {
histogram.AllocateHistogram(i);
}
for (size_t i = 0; i < kNNodes; ++i) {
ASSERT_FALSE(histogram.HistogramExists(i));
}
}
std::vector<GradientPairPrecise> GetHostHistGpair() {
@@ -187,16 +183,14 @@ TEST(GpuHist, EvaluateSplits) {
GPUHistMakerDevice<GradientPairPrecise>
maker(0, page.get(), kNRows, param, kNCols, kNCols, true, batch_param);
// Initialize GPUHistMakerDevice::node_sum_gradients
maker.node_sum_gradients = {{6.4f, 12.8f}};
maker.host_node_sum_gradients = {{6.4f, 12.8f}};
// Initialize GPUHistMakerDevice::cut
auto cmat = GetHostCutMatrix();
// Copy cut matrix to device.
page->Cuts() = cmat;
maker.ba.Allocate(0, &(maker.monotone_constraints), kNCols);
dh::CopyVectorToDeviceSpan(maker.monotone_constraints,
param.monotone_constraints);
maker.monotone_constraints = param.monotone_constraints;
// Initialize GPUHistMakerDevice::hist
maker.hist.Init(0, (max_bins - 1) * kNCols);