-Add experimental GPU algorithm for lossguided mode (#2755)

-Improved GPU algorithm unit tests -Removed some thrust code to improve compile times
2017-10-01 00:18:35 +13:00
parent 69c3b78a29
commit 4cb2f7598b
14 changed files with 1291 additions and 593 deletions
--- a/tests/cpp/common/test_device_helpers.cu
+++ b/tests/cpp/common/test_device_helpers.cu
@@ -41,7 +41,7 @@ void SpeedTest() {
      [=] __device__(size_t idx, size_t ridx) { d_output_row[idx] = ridx; });

  dh::safe_cuda(cudaDeviceSynchronize());
-  double time = t.elapsedSeconds();
+  double time = t.ElapsedSeconds();
  const int mb_size = 1048576;
  size_t size = (sizeof(int) * h_rows.size()) / mb_size;
  printf("size: %llumb, time: %fs, bandwidth: %fmb/s\n", size, time,
--- a/tests/cpp/tree/test_gpu_hist_experimental.cu
+++ b/tests/cpp/tree/test_gpu_hist_experimental.cu
@@ -0,0 +1,72 @@
+
+/*!
+ * Copyright 2017 XGBoost contributors
+ */
+#include <thrust/device_vector.h>
+#include <xgboost/base.h>
+#include "../helpers.h"
+#include "gtest/gtest.h"
+
+#include "../../../src/tree/updater_gpu_hist_experimental.cu"
+#include "../../../src/gbm/gbtree_model.h"
+
+namespace xgboost {
+namespace tree {
+TEST(gpu_hist_experimental, TestSparseShard) {
+  int rows = 100;
+  int columns = 80;
+  int max_bins = 4;
+  auto dmat = CreateDMatrix(rows, columns, 0.9);
+  common::HistCutMatrix hmat;
+  common::GHistIndexMatrix gmat;
+  hmat.Init(dmat.get(), max_bins);
+  gmat.cut = &hmat;
+  gmat.Init(dmat.get());
+  DeviceShard shard(0, 0, gmat, 0, rows, hmat.row_ptr.back(), TrainParam());
+
+  ASSERT_LT(shard.row_stride, columns);
+
+  auto host_gidx_buffer = shard.gidx_buffer.as_vector();
+
+  common::CompressedIterator<uint32_t> gidx(host_gidx_buffer.data(),
+                                            hmat.row_ptr.back() + 1);
+
+  for (int i = 0; i < rows; i++) {
+    int row_offset = 0;
+    for (int j = gmat.row_ptr[i]; j < gmat.row_ptr[i + 1]; j++) {
+      ASSERT_EQ(gidx[i * shard.row_stride + row_offset], gmat.index[j]);
+      row_offset++;
+    }
+
+    for (; row_offset < shard.row_stride; row_offset++) {
+      ASSERT_EQ(gidx[i * shard.row_stride + row_offset], shard.null_gidx_value);
+    }
+  }
+}
+
+TEST(gpu_hist_experimental, TestDenseShard) {
+  int rows = 100;
+  int columns = 80;
+  int max_bins = 4;
+  auto dmat = CreateDMatrix(rows, columns, 0);
+  common::HistCutMatrix hmat;
+  common::GHistIndexMatrix gmat;
+  hmat.Init(dmat.get(), max_bins);
+  gmat.cut = &hmat;
+  gmat.Init(dmat.get());
+  DeviceShard shard(0, 0, gmat, 0, rows, hmat.row_ptr.back(), TrainParam());
+
+  ASSERT_EQ(shard.row_stride, columns);
+
+  auto host_gidx_buffer = shard.gidx_buffer.as_vector();
+
+  common::CompressedIterator<uint32_t> gidx(host_gidx_buffer.data(),
+                                            hmat.row_ptr.back() + 1);
+
+  for (int i = 0; i < gmat.index.size(); i++) {
+    ASSERT_EQ(gidx[i], gmat.index[i]);
+  }
+}
+
+}  // namespace tree
+}  // namespace xgboost