Improved gpu_hist_experimental algorithm (#2866)

- Implement colsampling, subsampling for gpu_hist_experimental

 - Optimised multi-GPU implementation for gpu_hist_experimental

 - Make nccl optional

 - Add Volta architecture flag

 - Optimise RegLossObj

 - Add timing utilities for debug verbose mode

 - Bump required cuda version to 8.0
This commit is contained in:
Rory Mitchell
2017-11-11 13:58:40 +13:00
committed by GitHub
parent 16c63f30d0
commit 40c6e2f0c8
14 changed files with 855 additions and 473 deletions

View File

@@ -6,6 +6,7 @@
#include <xgboost/base.h>
#include "../../../src/common/device_helpers.cuh"
#include "gtest/gtest.h"
#include "../../../src/common/timer.h"
void CreateTestData(xgboost::bst_uint num_rows, int max_row_size,
thrust::host_vector<int> *row_ptr,
@@ -35,7 +36,7 @@ void SpeedTest() {
thrust::device_vector<int> output_row(h_rows.size());
auto d_output_row = output_row.data();
dh::Timer t;
xgboost::common::Timer t;
dh::TransformLbs(
0, &temp_memory, h_rows.size(), dh::raw(row_ptr), row_ptr.size() - 1, false,
[=] __device__(size_t idx, size_t ridx) { d_output_row[idx] = ridx; });

View File

@@ -7,8 +7,8 @@
#include "../helpers.h"
#include "gtest/gtest.h"
#include "../../../src/tree/updater_gpu_hist_experimental.cu"
#include "../../../src/gbm/gbtree_model.h"
#include "../../../src/tree/updater_gpu_hist_experimental.cu"
namespace xgboost {
namespace tree {
@@ -22,7 +22,9 @@ TEST(gpu_hist_experimental, TestSparseShard) {
hmat.Init(dmat.get(), max_bins);
gmat.cut = &hmat;
gmat.Init(dmat.get());
DeviceShard shard(0, 0, gmat, 0, rows, hmat.row_ptr.back(), TrainParam());
ncclComm_t comm;
DeviceShard shard(0, 0, gmat, 0, rows, hmat.row_ptr.back(),
TrainParam());
ASSERT_LT(shard.row_stride, columns);
@@ -54,7 +56,9 @@ TEST(gpu_hist_experimental, TestDenseShard) {
hmat.Init(dmat.get(), max_bins);
gmat.cut = &hmat;
gmat.Init(dmat.get());
DeviceShard shard(0, 0, gmat, 0, rows, hmat.row_ptr.back(), TrainParam());
ncclComm_t comm;
DeviceShard shard(0, 0, gmat, 0, rows, hmat.row_ptr.back(),
TrainParam());
ASSERT_EQ(shard.row_stride, columns);