Improved gpu_hist_experimental algorithm (#2866)

- Implement colsampling, subsampling for gpu_hist_experimental

 - Optimised multi-GPU implementation for gpu_hist_experimental

 - Make nccl optional

 - Add Volta architecture flag

 - Optimise RegLossObj

 - Add timing utilities for debug verbose mode

 - Bump required cuda version to 8.0
This commit is contained in:
Rory Mitchell
2017-11-11 13:58:40 +13:00
committed by GitHub
parent 16c63f30d0
commit 40c6e2f0c8
14 changed files with 855 additions and 473 deletions

View File

@@ -7,8 +7,8 @@
#include "../helpers.h"
#include "gtest/gtest.h"
#include "../../../src/tree/updater_gpu_hist_experimental.cu"
#include "../../../src/gbm/gbtree_model.h"
#include "../../../src/tree/updater_gpu_hist_experimental.cu"
namespace xgboost {
namespace tree {
@@ -22,7 +22,9 @@ TEST(gpu_hist_experimental, TestSparseShard) {
hmat.Init(dmat.get(), max_bins);
gmat.cut = &hmat;
gmat.Init(dmat.get());
DeviceShard shard(0, 0, gmat, 0, rows, hmat.row_ptr.back(), TrainParam());
ncclComm_t comm;
DeviceShard shard(0, 0, gmat, 0, rows, hmat.row_ptr.back(),
TrainParam());
ASSERT_LT(shard.row_stride, columns);
@@ -54,7 +56,9 @@ TEST(gpu_hist_experimental, TestDenseShard) {
hmat.Init(dmat.get(), max_bins);
gmat.cut = &hmat;
gmat.Init(dmat.get());
DeviceShard shard(0, 0, gmat, 0, rows, hmat.row_ptr.back(), TrainParam());
ncclComm_t comm;
DeviceShard shard(0, 0, gmat, 0, rows, hmat.row_ptr.back(),
TrainParam());
ASSERT_EQ(shard.row_stride, columns);