further cleanup of single process multi-GPU code (#4810)

* use subspan in gpu predictor instead of copying * Revise `HostDeviceVector`
2019-08-30 02:27:23 -07:00
parent 0184eb5d02
commit 733ed24dd9
12 changed files with 289 additions and 593 deletions
--- a/tests/cpp/tree/test_gpu_hist.cu
+++ b/tests/cpp/tree/test_gpu_hist.cu
@@ -113,7 +113,7 @@ TEST(GpuHist, BuildGidxDense) {
    {"max_leaves", "0"},
  };
  param.Init(args);
-  DeviceShard<GradientPairPrecise> shard(0, 0, 0, kNRows, param, kNCols, kNCols);
+  DeviceShard<GradientPairPrecise> shard(0, kNRows, param, kNCols, kNCols);
  BuildGidx(&shard, kNRows, kNCols);

  std::vector<common::CompressedByteT> h_gidx_buffer(shard.gidx_buffer.size());
@@ -154,8 +154,7 @@ TEST(GpuHist, BuildGidxSparse) {
  };
  param.Init(args);

-  DeviceShard<GradientPairPrecise> shard(0, 0, 0, kNRows, param, kNCols,
-                                         kNCols);
+  DeviceShard<GradientPairPrecise> shard(0, kNRows, param, kNCols, kNCols);
  BuildGidx(&shard, kNRows, kNCols, 0.9f);

  std::vector<common::CompressedByteT> h_gidx_buffer(shard.gidx_buffer.size());
@@ -200,8 +199,7 @@ void TestBuildHist(bool use_shared_memory_histograms) {
    {"max_leaves", "0"},
  };
  param.Init(args);
-  DeviceShard<GradientSumT> shard(0, 0, 0, kNRows, param, kNCols,
-                                  kNCols);
+  DeviceShard<GradientSumT> shard(0, kNRows, param, kNCols, kNCols);
  BuildGidx(&shard, kNRows, kNCols);

  xgboost::SimpleLCG gen;
@@ -303,8 +301,7 @@ TEST(GpuHist, EvaluateSplits) {

  // Initialize DeviceShard
  std::unique_ptr<DeviceShard<GradientPairPrecise>> shard{
-    new DeviceShard<GradientPairPrecise>(0, 0, 0, kNRows, param, kNCols,
-                                         kNCols)};
+    new DeviceShard<GradientPairPrecise>(0, kNRows, param, kNCols, kNCols)};
  // Initialize DeviceShard::node_sum_gradients
  shard->node_sum_gradients = {{6.4f, 12.8f}};

@@ -391,24 +388,20 @@ void TestHistogramIndexImpl() {
  hist_maker_ext.Configure(training_params, &generic_param);
  hist_maker_ext.InitDataOnce(hist_maker_ext_dmat.get());

-  ASSERT_EQ(hist_maker.shards_.size(), hist_maker_ext.shards_.size());
-
-  // Extract the device shards from the histogram makers and from that its compressed
+  // Extract the device shard from the histogram makers and from that its compressed
  // histogram index
-  for (size_t i = 0; i < hist_maker.shards_.size(); ++i) {
-    const auto &dev_shard = hist_maker.shards_[i];
-    std::vector<common::CompressedByteT> h_gidx_buffer(dev_shard->gidx_buffer.size());
-    dh::CopyDeviceSpanToVector(&h_gidx_buffer, dev_shard->gidx_buffer);
+  const auto &dev_shard = hist_maker.shard_;
+  std::vector<common::CompressedByteT> h_gidx_buffer(dev_shard->gidx_buffer.size());
+  dh::CopyDeviceSpanToVector(&h_gidx_buffer, dev_shard->gidx_buffer);

-    const auto &dev_shard_ext = hist_maker_ext.shards_[i];
-    std::vector<common::CompressedByteT> h_gidx_buffer_ext(dev_shard_ext->gidx_buffer.size());
-    dh::CopyDeviceSpanToVector(&h_gidx_buffer_ext, dev_shard_ext->gidx_buffer);
+  const auto &dev_shard_ext = hist_maker_ext.shard_;
+  std::vector<common::CompressedByteT> h_gidx_buffer_ext(dev_shard_ext->gidx_buffer.size());
+  dh::CopyDeviceSpanToVector(&h_gidx_buffer_ext, dev_shard_ext->gidx_buffer);

-    ASSERT_EQ(dev_shard->n_bins, dev_shard_ext->n_bins);
-    ASSERT_EQ(dev_shard->gidx_buffer.size(), dev_shard_ext->gidx_buffer.size());
+  ASSERT_EQ(dev_shard->n_bins, dev_shard_ext->n_bins);
+  ASSERT_EQ(dev_shard->gidx_buffer.size(), dev_shard_ext->gidx_buffer.size());

-    ASSERT_EQ(h_gidx_buffer, h_gidx_buffer_ext);
-  }
+  ASSERT_EQ(h_gidx_buffer, h_gidx_buffer_ext);
 }

 TEST(GpuHist, TestHistogramIndex) {