Improved gpu_hist_experimental algorithm (#2866)
- Implement colsampling, subsampling for gpu_hist_experimental - Optimised multi-GPU implementation for gpu_hist_experimental - Make nccl optional - Add Volta architecture flag - Optimise RegLossObj - Add timing utilities for debug verbose mode - Bump required cuda version to 8.0
This commit is contained in:
@@ -11,20 +11,36 @@ rng = np.random.RandomState(1994)
|
||||
|
||||
|
||||
def run_benchmark(args):
|
||||
print("Generating dataset: {} rows * {} columns".format(args.rows, args.columns))
|
||||
print("{}/{} test/train split".format(args.test_size, 1.0 - args.test_size))
|
||||
tmp = time.time()
|
||||
X, y = make_classification(args.rows, n_features=args.columns, random_state=7)
|
||||
if args.sparsity < 1.0:
|
||||
X = np.array([[np.nan if rng.uniform(0, 1) < args.sparsity else x for x in x_row] for x_row in X])
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=args.test_size, random_state=7)
|
||||
print ("Generate Time: %s seconds" % (str(time.time() - tmp)))
|
||||
tmp = time.time()
|
||||
print ("DMatrix Start")
|
||||
dtrain = xgb.DMatrix(X_train, y_train, nthread=-1)
|
||||
dtest = xgb.DMatrix(X_test, y_test, nthread=-1)
|
||||
print ("DMatrix Time: %s seconds" % (str(time.time() - tmp)))
|
||||
try:
|
||||
dtest = xgb.DMatrix('dtest.dm')
|
||||
dtrain = xgb.DMatrix('dtrain.dm')
|
||||
|
||||
if not (dtest.num_col() == args.columns \
|
||||
and dtrain.num_col() == args.columns):
|
||||
raise ValueError("Wrong cols")
|
||||
if not (dtest.num_row() == args.rows * args.test_size \
|
||||
and dtrain.num_row() == args.rows * (1-args.test_size)):
|
||||
raise ValueError("Wrong rows")
|
||||
except:
|
||||
|
||||
print("Generating dataset: {} rows * {} columns".format(args.rows, args.columns))
|
||||
print("{}/{} test/train split".format(args.test_size, 1.0 - args.test_size))
|
||||
tmp = time.time()
|
||||
X, y = make_classification(args.rows, n_features=args.columns, n_redundant=0, n_informative=args.columns, n_repeated=0, random_state=7)
|
||||
if args.sparsity < 1.0:
|
||||
X = np.array([[np.nan if rng.uniform(0, 1) < args.sparsity else x for x in x_row] for x_row in X])
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=args.test_size, random_state=7)
|
||||
print ("Generate Time: %s seconds" % (str(time.time() - tmp)))
|
||||
tmp = time.time()
|
||||
print ("DMatrix Start")
|
||||
dtrain = xgb.DMatrix(X_train, y_train)
|
||||
dtest = xgb.DMatrix(X_test, y_test, nthread=-1)
|
||||
print ("DMatrix Time: %s seconds" % (str(time.time() - tmp)))
|
||||
|
||||
dtest.save_binary('dtest.dm')
|
||||
dtrain.save_binary('dtrain.dm')
|
||||
|
||||
param = {'objective': 'binary:logistic'}
|
||||
if args.params is not '':
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include <xgboost/base.h>
|
||||
#include "../../../src/common/device_helpers.cuh"
|
||||
#include "gtest/gtest.h"
|
||||
#include "../../../src/common/timer.h"
|
||||
|
||||
void CreateTestData(xgboost::bst_uint num_rows, int max_row_size,
|
||||
thrust::host_vector<int> *row_ptr,
|
||||
@@ -35,7 +36,7 @@ void SpeedTest() {
|
||||
thrust::device_vector<int> output_row(h_rows.size());
|
||||
auto d_output_row = output_row.data();
|
||||
|
||||
dh::Timer t;
|
||||
xgboost::common::Timer t;
|
||||
dh::TransformLbs(
|
||||
0, &temp_memory, h_rows.size(), dh::raw(row_ptr), row_ptr.size() - 1, false,
|
||||
[=] __device__(size_t idx, size_t ridx) { d_output_row[idx] = ridx; });
|
||||
|
||||
@@ -7,8 +7,8 @@
|
||||
#include "../helpers.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include "../../../src/tree/updater_gpu_hist_experimental.cu"
|
||||
#include "../../../src/gbm/gbtree_model.h"
|
||||
#include "../../../src/tree/updater_gpu_hist_experimental.cu"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
@@ -22,7 +22,9 @@ TEST(gpu_hist_experimental, TestSparseShard) {
|
||||
hmat.Init(dmat.get(), max_bins);
|
||||
gmat.cut = &hmat;
|
||||
gmat.Init(dmat.get());
|
||||
DeviceShard shard(0, 0, gmat, 0, rows, hmat.row_ptr.back(), TrainParam());
|
||||
ncclComm_t comm;
|
||||
DeviceShard shard(0, 0, gmat, 0, rows, hmat.row_ptr.back(),
|
||||
TrainParam());
|
||||
|
||||
ASSERT_LT(shard.row_stride, columns);
|
||||
|
||||
@@ -54,7 +56,9 @@ TEST(gpu_hist_experimental, TestDenseShard) {
|
||||
hmat.Init(dmat.get(), max_bins);
|
||||
gmat.cut = &hmat;
|
||||
gmat.Init(dmat.get());
|
||||
DeviceShard shard(0, 0, gmat, 0, rows, hmat.row_ptr.back(), TrainParam());
|
||||
ncclComm_t comm;
|
||||
DeviceShard shard(0, 0, gmat, 0, rows, hmat.row_ptr.back(),
|
||||
TrainParam());
|
||||
|
||||
ASSERT_EQ(shard.row_stride, columns);
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ import numpy as np
|
||||
import unittest
|
||||
from nose.plugins.attrib import attr
|
||||
from sklearn.datasets import load_digits, load_boston, load_breast_cancer, make_regression
|
||||
import itertools as it
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
@@ -15,8 +16,9 @@ rng = np.random.RandomState(1994)
|
||||
def non_increasing(L, tolerance):
|
||||
return all((y - x) < tolerance for x, y in zip(L, L[1:]))
|
||||
|
||||
#Check result is always decreasing and final accuracy is within tolerance
|
||||
def assert_accuracy(res, tree_method, comparison_tree_method, tolerance):
|
||||
|
||||
# Check result is always decreasing and final accuracy is within tolerance
|
||||
def assert_accuracy(res, tree_method, comparison_tree_method, tolerance, param):
|
||||
assert non_increasing(res[tree_method], tolerance)
|
||||
assert np.allclose(res[tree_method][-1], res[comparison_tree_method][-1], 1e-3, 1e-2)
|
||||
|
||||
@@ -26,13 +28,14 @@ def train_boston(param_in, comparison_tree_method):
|
||||
dtrain = xgb.DMatrix(data.data, label=data.target)
|
||||
param = {}
|
||||
param.update(param_in)
|
||||
param['max_depth'] = 2
|
||||
res_tmp = {}
|
||||
res = {}
|
||||
num_rounds = 10
|
||||
xgb.train(param, dtrain, num_rounds, [(dtrain, 'train')], evals_result=res_tmp)
|
||||
bst = xgb.train(param, dtrain, num_rounds, [(dtrain, 'train')], evals_result=res_tmp)
|
||||
res[param['tree_method']] = res_tmp['train']['rmse']
|
||||
param["tree_method"] = comparison_tree_method
|
||||
xgb.train(param, dtrain, num_rounds, [(dtrain, 'train')], evals_result=res_tmp)
|
||||
bst = xgb.train(param, dtrain, num_rounds, [(dtrain, 'train')], evals_result=res_tmp)
|
||||
res[comparison_tree_method] = res_tmp['train']['rmse']
|
||||
|
||||
return res
|
||||
@@ -92,17 +95,24 @@ def train_sparse(param_in, comparison_tree_method):
|
||||
return res
|
||||
|
||||
|
||||
# Enumerates all permutations of variable parameters
|
||||
def assert_updater_accuracy(tree_method, comparison_tree_method, variable_param, tolerance):
|
||||
param = {'tree_method': tree_method}
|
||||
for k, set in variable_param.items():
|
||||
for val in set:
|
||||
param_tmp = param.copy()
|
||||
param_tmp[k] = val
|
||||
print(param_tmp, file=sys.stderr)
|
||||
assert_accuracy(train_boston(param_tmp, comparison_tree_method), tree_method, comparison_tree_method, tolerance)
|
||||
assert_accuracy(train_digits(param_tmp, comparison_tree_method), tree_method, comparison_tree_method, tolerance)
|
||||
assert_accuracy(train_cancer(param_tmp, comparison_tree_method), tree_method, comparison_tree_method, tolerance)
|
||||
assert_accuracy(train_sparse(param_tmp, comparison_tree_method), tree_method, comparison_tree_method, tolerance)
|
||||
param = {'tree_method': tree_method }
|
||||
names = sorted(variable_param)
|
||||
combinations = it.product(*(variable_param[Name] for Name in names))
|
||||
|
||||
for set in combinations:
|
||||
print(names, file=sys.stderr)
|
||||
print(set, file=sys.stderr)
|
||||
param_tmp = param.copy()
|
||||
for i, name in enumerate(names):
|
||||
param_tmp[name] = set[i]
|
||||
|
||||
print(param_tmp, file=sys.stderr)
|
||||
assert_accuracy(train_boston(param_tmp, comparison_tree_method), tree_method, comparison_tree_method, tolerance, param_tmp)
|
||||
assert_accuracy(train_digits(param_tmp, comparison_tree_method), tree_method, comparison_tree_method, tolerance, param_tmp)
|
||||
assert_accuracy(train_cancer(param_tmp, comparison_tree_method), tree_method, comparison_tree_method, tolerance, param_tmp)
|
||||
assert_accuracy(train_sparse(param_tmp, comparison_tree_method), tree_method, comparison_tree_method, tolerance, param_tmp)
|
||||
|
||||
|
||||
@attr('gpu')
|
||||
@@ -116,5 +126,5 @@ class TestGPU(unittest.TestCase):
|
||||
assert_updater_accuracy('gpu_exact', 'exact', variable_param, 0.02)
|
||||
|
||||
def test_gpu_hist_experimental(self):
|
||||
variable_param = {'max_depth': [2, 6], 'max_leaves': [255, 4], 'max_bin': [2, 16, 1024]}
|
||||
variable_param = {'n_gpus': [1, -1], 'max_depth': [2, 6], 'max_leaves': [255, 4], 'max_bin': [2, 16, 1024]}
|
||||
assert_updater_accuracy('gpu_hist_experimental', 'hist', variable_param, 0.01)
|
||||
|
||||
Reference in New Issue
Block a user