Purge device_helpers.cuh (#5534)

* Simplifications with caching_device_vector * Purge device helpers
2020-04-15 21:51:56 +12:00
parent a2f54963b6
commit ca4e05660e
9 changed files with 182 additions and 733 deletions
--- a/tests/cpp/common/test_device_helpers.cu
+++ b/tests/cpp/common/test_device_helpers.cu
@@ -27,64 +27,13 @@ void CreateTestData(xgboost::bst_uint num_rows, int max_row_size,
  }
 }

-void TestLbs() {
-  srand(17);
-  dh::CubMemory temp_memory;
-
-  std::vector<int> test_rows = {4, 100, 1000};
-  std::vector<int> test_max_row_sizes = {4, 100, 1300};
-
-  for (auto num_rows : test_rows) {
-    for (auto max_row_size : test_max_row_sizes) {
-      thrust::host_vector<int> h_row_ptr;
-      thrust::host_vector<xgboost::bst_uint> h_rows;
-      CreateTestData(num_rows, max_row_size, &h_row_ptr, &h_rows);
-      thrust::device_vector<size_t> row_ptr = h_row_ptr;
-      thrust::device_vector<int> output_row(h_rows.size());
-      auto d_output_row = output_row.data();
-
-      dh::TransformLbs(0, &temp_memory, h_rows.size(), dh::Raw(row_ptr),
-                       row_ptr.size() - 1, false,
-                       [=] __device__(size_t idx, size_t ridx) {
-                         d_output_row[idx] = ridx;
-                       });
-
-      dh::safe_cuda(cudaDeviceSynchronize());
-      ASSERT_TRUE(h_rows == output_row);
-    }
-  }
-}
-
-TEST(CubLBS, Test) {
-  TestLbs();
-}
-
 TEST(SumReduce, Test) {
  thrust::device_vector<float> data(100, 1.0f);
  dh::CubMemory temp;
-  auto sum = dh::SumReduction(&temp, dh::Raw(data), data.size());
+  auto sum = dh::SumReduction(&temp, data.data().get(), data.size());
  ASSERT_NEAR(sum, 100.0f, 1e-5);
 }

-void TestAllocator() {
-  int n = 10;
-  Span<float> a;
-  Span<int> b;
-  Span<size_t> c;
-  dh::BulkAllocator ba;
-  ba.Allocate(0, &a, n, &b, n, &c, n);
-
-  // Should be no illegal memory accesses
-  dh::LaunchN(0, n, [=] __device__(size_t idx) { c[idx] = a[idx] + b[idx]; });
-
-  dh::safe_cuda(cudaDeviceSynchronize());
-}
-
-// Define the test in a function so we can use device lambda
-TEST(BulkAllocator, Test) {
-  TestAllocator();
-}
-
 template <typename T, typename Comp = thrust::less<T>>
 void TestUpperBoundImpl(const std::vector<T> &vec, T val_to_find,
                        const Comp &comp = Comp()) {
--- a/tests/cpp/data/test_array_interface.h
+++ b/tests/cpp/data/test_array_interface.h
@@ -23,7 +23,7 @@ Json GenerateDenseColumn(std::string const& typestr, size_t kRows,
  d_data.resize(kRows);
  thrust::sequence(thrust::device, d_data.begin(), d_data.end(), 0.0f, 2.0f);

-  auto p_d_data = dh::Raw(d_data);
+  auto p_d_data = d_data.data().get();

  std::vector<Json> j_data {
    Json(Integer(reinterpret_cast<Integer::Int>(p_d_data))),
@@ -49,7 +49,7 @@ Json GenerateSparseColumn(std::string const& typestr, size_t kRows,
    d_data[i] = i * 2.0;
  }

-  auto p_d_data = dh::Raw(d_data);
+  auto p_d_data = d_data.data().get();

  std::vector<Json> j_data {
    Json(Integer(reinterpret_cast<Integer::Int>(p_d_data))),
--- a/tests/cpp/data/test_metainfo.cu
+++ b/tests/cpp/data/test_metainfo.cu
@@ -25,7 +25,7 @@ std::string PrepareData(std::string typestr, thrust::device_vector<T>* out, cons
  column["version"] = Integer(static_cast<Integer::Int>(1));
  column["typestr"] = String(typestr);

-  auto p_d_data = dh::Raw(d_data);
+  auto p_d_data = d_data.data().get();
  std::vector<Json> j_data {
        Json(Integer(reinterpret_cast<Integer::Int>(p_d_data))),
        Json(Boolean(false))};
--- a/tests/cpp/tree/test_gpu_hist.cu
+++ b/tests/cpp/tree/test_gpu_hist.cu
@@ -24,37 +24,33 @@ namespace tree {

 TEST(GpuHist, DeviceHistogram) {
  // Ensures that node allocates correctly after reaching `kStopGrowingSize`.
-  dh::SaveCudaContext{
-    [&]() {
-      dh::safe_cuda(cudaSetDevice(0));
-      constexpr size_t kNBins = 128;
-      constexpr size_t kNNodes = 4;
-      constexpr size_t kStopGrowing = kNNodes * kNBins * 2u;
-      DeviceHistogram<GradientPairPrecise, kStopGrowing> histogram;
-      histogram.Init(0, kNBins);
-      for (size_t i = 0; i < kNNodes; ++i) {
-        histogram.AllocateHistogram(i);
-      }
-      histogram.Reset();
-      ASSERT_EQ(histogram.Data().size(), kStopGrowing);
+  dh::safe_cuda(cudaSetDevice(0));
+  constexpr size_t kNBins = 128;
+  constexpr size_t kNNodes = 4;
+  constexpr size_t kStopGrowing = kNNodes * kNBins * 2u;
+  DeviceHistogram<GradientPairPrecise, kStopGrowing> histogram;
+  histogram.Init(0, kNBins);
+  for (size_t i = 0; i < kNNodes; ++i) {
+    histogram.AllocateHistogram(i);
+  }
+  histogram.Reset();
+  ASSERT_EQ(histogram.Data().size(), kStopGrowing);

-      // Use allocated memory but do not erase nidx_map.
-      for (size_t i = 0; i < kNNodes; ++i) {
-        histogram.AllocateHistogram(i);
-      }
-      for (size_t i = 0; i < kNNodes; ++i) {
-        ASSERT_TRUE(histogram.HistogramExists(i));
-      }
+  // Use allocated memory but do not erase nidx_map.
+  for (size_t i = 0; i < kNNodes; ++i) {
+    histogram.AllocateHistogram(i);
+  }
+  for (size_t i = 0; i < kNNodes; ++i) {
+    ASSERT_TRUE(histogram.HistogramExists(i));
+  }

-      // Erase existing nidx_map.
-      for (size_t i = kNNodes; i < kNNodes * 2; ++i) {
-        histogram.AllocateHistogram(i);
-      }
-      for (size_t i = 0; i < kNNodes; ++i) {
-        ASSERT_FALSE(histogram.HistogramExists(i));
-      }
-    }
-  };
+  // Erase existing nidx_map.
+  for (size_t i = kNNodes; i < kNNodes * 2; ++i) {
+    histogram.AllocateHistogram(i);
+  }
+  for (size_t i = 0; i < kNNodes; ++i) {
+    ASSERT_FALSE(histogram.HistogramExists(i));
+  }
 }

 std::vector<GradientPairPrecise> GetHostHistGpair() {
@@ -187,16 +183,14 @@ TEST(GpuHist, EvaluateSplits) {
  GPUHistMakerDevice<GradientPairPrecise>
      maker(0, page.get(), kNRows, param, kNCols, kNCols, true, batch_param);
  // Initialize GPUHistMakerDevice::node_sum_gradients
-  maker.node_sum_gradients = {{6.4f, 12.8f}};
+  maker.host_node_sum_gradients = {{6.4f, 12.8f}};

  // Initialize GPUHistMakerDevice::cut
  auto cmat = GetHostCutMatrix();

  // Copy cut matrix to device.
  page->Cuts() = cmat;
-  maker.ba.Allocate(0, &(maker.monotone_constraints), kNCols);
-  dh::CopyVectorToDeviceSpan(maker.monotone_constraints,
-                             param.monotone_constraints);
+  maker.monotone_constraints = param.monotone_constraints;

  // Initialize GPUHistMakerDevice::hist
  maker.hist.Init(0, (max_bins - 1) * kNCols);