Improved gpu_hist_experimental algorithm (#2866)

- Implement colsampling, subsampling for gpu_hist_experimental

 - Optimised multi-GPU implementation for gpu_hist_experimental

 - Make nccl optional

 - Add Volta architecture flag

 - Optimise RegLossObj

 - Add timing utilities for debug verbose mode

 - Bump required cuda version to 8.0
This commit is contained in:
Rory Mitchell
2017-11-11 13:58:40 +13:00
committed by GitHub
parent 16c63f30d0
commit 40c6e2f0c8
14 changed files with 855 additions and 473 deletions

View File

@@ -6,6 +6,7 @@
#include <xgboost/base.h>
#include "../../../src/common/device_helpers.cuh"
#include "gtest/gtest.h"
#include "../../../src/common/timer.h"
void CreateTestData(xgboost::bst_uint num_rows, int max_row_size,
thrust::host_vector<int> *row_ptr,
@@ -35,7 +36,7 @@ void SpeedTest() {
thrust::device_vector<int> output_row(h_rows.size());
auto d_output_row = output_row.data();
dh::Timer t;
xgboost::common::Timer t;
dh::TransformLbs(
0, &temp_memory, h_rows.size(), dh::raw(row_ptr), row_ptr.size() - 1, false,
[=] __device__(size_t idx, size_t ridx) { d_output_row[idx] = ridx; });