Improved gpu_hist_experimental algorithm (#2866)

- Implement colsampling, subsampling for gpu_hist_experimental - Optimised multi-GPU implementation for gpu_hist_experimental - Make nccl optional - Add Volta architecture flag - Optimise RegLossObj - Add timing utilities for debug verbose mode - Bump required cuda version to 8.0
2017-11-11 13:58:40 +13:00
parent 16c63f30d0
commit 40c6e2f0c8
14 changed files with 855 additions and 473 deletions
--- a/tests/cpp/tree/test_gpu_hist_experimental.cu
+++ b/tests/cpp/tree/test_gpu_hist_experimental.cu
@@ -7,8 +7,8 @@
 #include "../helpers.h"
 #include "gtest/gtest.h"

-#include "../../../src/tree/updater_gpu_hist_experimental.cu"
 #include "../../../src/gbm/gbtree_model.h"
+#include "../../../src/tree/updater_gpu_hist_experimental.cu"

 namespace xgboost {
 namespace tree {
@@ -22,7 +22,9 @@ TEST(gpu_hist_experimental, TestSparseShard) {
  hmat.Init(dmat.get(), max_bins);
  gmat.cut = &hmat;
  gmat.Init(dmat.get());
-  DeviceShard shard(0, 0, gmat, 0, rows, hmat.row_ptr.back(), TrainParam());
+  ncclComm_t comm;
+  DeviceShard shard(0, 0, gmat, 0, rows, hmat.row_ptr.back(),
+                    TrainParam());

  ASSERT_LT(shard.row_stride, columns);

@@ -54,7 +56,9 @@ TEST(gpu_hist_experimental, TestDenseShard) {
  hmat.Init(dmat.get(), max_bins);
  gmat.cut = &hmat;
  gmat.Init(dmat.get());
-  DeviceShard shard(0, 0, gmat, 0, rows, hmat.row_ptr.back(), TrainParam());
+  ncclComm_t comm;
+  DeviceShard shard(0, 0, gmat, 0, rows, hmat.row_ptr.back(),
+                    TrainParam());

  ASSERT_EQ(shard.row_stride, columns);