-Add experimental GPU algorithm for lossguided mode (#2755)
-Improved GPU algorithm unit tests -Removed some thrust code to improve compile times
This commit is contained in:
@@ -5,57 +5,45 @@ import numpy as np
|
||||
from sklearn.datasets import make_classification
|
||||
from sklearn.model_selection import train_test_split
|
||||
import time
|
||||
import ast
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
|
||||
def run_benchmark(args, gpu_algorithm, cpu_algorithm):
|
||||
def run_benchmark(args):
|
||||
print("Generating dataset: {} rows * {} columns".format(args.rows, args.columns))
|
||||
print("{}/{} test/train split".format(args.test_size, 1.0 - args.test_size))
|
||||
tmp = time.time()
|
||||
X, y = make_classification(args.rows, n_features=args.columns, random_state=7)
|
||||
if args.sparsity < 1.0:
|
||||
X = np.array([[np.nan if rng.uniform(0, 1) < args.sparsity else x for x in x_row] for x_row in X])
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=args.test_size, random_state=7)
|
||||
print ("Generate Time: %s seconds" % (str(time.time() - tmp)))
|
||||
tmp = time.time()
|
||||
print ("DMatrix Start")
|
||||
# omp way
|
||||
dtrain = xgb.DMatrix(X_train, y_train, nthread=-1)
|
||||
dtest = xgb.DMatrix(X_test, y_test, nthread=-1)
|
||||
print ("DMatrix Time: %s seconds" % (str(time.time() - tmp)))
|
||||
|
||||
param = {'objective': 'binary:logistic',
|
||||
'max_depth': 6,
|
||||
'silent': 0,
|
||||
'n_gpus': 1,
|
||||
'gpu_id': 0,
|
||||
'eval_metric': 'error',
|
||||
'debug_verbose': 0,
|
||||
}
|
||||
param = {'objective': 'binary:logistic'}
|
||||
if args.params is not '':
|
||||
param.update(ast.literal_eval(args.params))
|
||||
|
||||
param['tree_method'] = gpu_algorithm
|
||||
param['tree_method'] = args.tree_method
|
||||
print("Training with '%s'" % param['tree_method'])
|
||||
tmp = time.time()
|
||||
xgb.train(param, dtrain, args.iterations, evals=[(dtest, "test")])
|
||||
print ("Train Time: %s seconds" % (str(time.time() - tmp)))
|
||||
|
||||
param['silent'] = 1
|
||||
param['tree_method'] = cpu_algorithm
|
||||
print("Training with '%s'" % param['tree_method'])
|
||||
tmp = time.time()
|
||||
xgb.train(param, dtrain, args.iterations, evals=[(dtest, "test")])
|
||||
print ("Time: %s seconds" % (str(time.time() - tmp)))
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--algorithm', choices=['all', 'gpu_exact', 'gpu_hist'], default='all')
|
||||
parser.add_argument('--tree_method', default='gpu_hist')
|
||||
parser.add_argument('--sparsity', type=float, default=0.0)
|
||||
parser.add_argument('--rows', type=int, default=1000000)
|
||||
parser.add_argument('--columns', type=int, default=50)
|
||||
parser.add_argument('--iterations', type=int, default=500)
|
||||
parser.add_argument('--test_size', type=float, default=0.25)
|
||||
parser.add_argument('--params', default='', help='Provide additional parameters as a Python dict string, e.g. --params \"{\'max_depth\':2}\"')
|
||||
args = parser.parse_args()
|
||||
|
||||
if 'gpu_hist' in args.algorithm:
|
||||
run_benchmark(args, args.algorithm, 'hist')
|
||||
elif 'gpu_exact' in args.algorithm:
|
||||
run_benchmark(args, args.algorithm, 'exact')
|
||||
elif 'all' in args.algorithm:
|
||||
run_benchmark(args, 'gpu_exact', 'exact')
|
||||
run_benchmark(args, 'gpu_hist', 'hist')
|
||||
run_benchmark(args)
|
||||
|
||||
@@ -41,7 +41,7 @@ void SpeedTest() {
|
||||
[=] __device__(size_t idx, size_t ridx) { d_output_row[idx] = ridx; });
|
||||
|
||||
dh::safe_cuda(cudaDeviceSynchronize());
|
||||
double time = t.elapsedSeconds();
|
||||
double time = t.ElapsedSeconds();
|
||||
const int mb_size = 1048576;
|
||||
size_t size = (sizeof(int) * h_rows.size()) / mb_size;
|
||||
printf("size: %llumb, time: %fs, bandwidth: %fmb/s\n", size, time,
|
||||
|
||||
72
tests/cpp/tree/test_gpu_hist_experimental.cu
Normal file
72
tests/cpp/tree/test_gpu_hist_experimental.cu
Normal file
@@ -0,0 +1,72 @@
|
||||
|
||||
/*!
|
||||
* Copyright 2017 XGBoost contributors
|
||||
*/
|
||||
#include <thrust/device_vector.h>
|
||||
#include <xgboost/base.h>
|
||||
#include "../helpers.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include "../../../src/tree/updater_gpu_hist_experimental.cu"
|
||||
#include "../../../src/gbm/gbtree_model.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
TEST(gpu_hist_experimental, TestSparseShard) {
|
||||
int rows = 100;
|
||||
int columns = 80;
|
||||
int max_bins = 4;
|
||||
auto dmat = CreateDMatrix(rows, columns, 0.9);
|
||||
common::HistCutMatrix hmat;
|
||||
common::GHistIndexMatrix gmat;
|
||||
hmat.Init(dmat.get(), max_bins);
|
||||
gmat.cut = &hmat;
|
||||
gmat.Init(dmat.get());
|
||||
DeviceShard shard(0, 0, gmat, 0, rows, hmat.row_ptr.back(), TrainParam());
|
||||
|
||||
ASSERT_LT(shard.row_stride, columns);
|
||||
|
||||
auto host_gidx_buffer = shard.gidx_buffer.as_vector();
|
||||
|
||||
common::CompressedIterator<uint32_t> gidx(host_gidx_buffer.data(),
|
||||
hmat.row_ptr.back() + 1);
|
||||
|
||||
for (int i = 0; i < rows; i++) {
|
||||
int row_offset = 0;
|
||||
for (int j = gmat.row_ptr[i]; j < gmat.row_ptr[i + 1]; j++) {
|
||||
ASSERT_EQ(gidx[i * shard.row_stride + row_offset], gmat.index[j]);
|
||||
row_offset++;
|
||||
}
|
||||
|
||||
for (; row_offset < shard.row_stride; row_offset++) {
|
||||
ASSERT_EQ(gidx[i * shard.row_stride + row_offset], shard.null_gidx_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(gpu_hist_experimental, TestDenseShard) {
|
||||
int rows = 100;
|
||||
int columns = 80;
|
||||
int max_bins = 4;
|
||||
auto dmat = CreateDMatrix(rows, columns, 0);
|
||||
common::HistCutMatrix hmat;
|
||||
common::GHistIndexMatrix gmat;
|
||||
hmat.Init(dmat.get(), max_bins);
|
||||
gmat.cut = &hmat;
|
||||
gmat.Init(dmat.get());
|
||||
DeviceShard shard(0, 0, gmat, 0, rows, hmat.row_ptr.back(), TrainParam());
|
||||
|
||||
ASSERT_EQ(shard.row_stride, columns);
|
||||
|
||||
auto host_gidx_buffer = shard.gidx_buffer.as_vector();
|
||||
|
||||
common::CompressedIterator<uint32_t> gidx(host_gidx_buffer.data(),
|
||||
hmat.row_ptr.back() + 1);
|
||||
|
||||
for (int i = 0; i < gmat.index.size(); i++) {
|
||||
ASSERT_EQ(gidx[i], gmat.index[i]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
@@ -7,316 +7,114 @@ import xgboost as xgb
|
||||
import numpy as np
|
||||
import unittest
|
||||
from nose.plugins.attrib import attr
|
||||
from sklearn.datasets import load_digits, load_boston, load_breast_cancer, make_regression
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
dpath = 'demo/data/'
|
||||
|
||||
def non_increasing(L, tolerance):
|
||||
return all((y - x) < tolerance for x, y in zip(L, L[1:]))
|
||||
|
||||
#Check result is always decreasing and final accuracy is within tolerance
|
||||
def assert_accuracy(res, tree_method, comparison_tree_method, tolerance):
|
||||
assert non_increasing(res[tree_method], tolerance)
|
||||
assert np.allclose(res[tree_method][-1], res[comparison_tree_method][-1], 1e-3, 1e-2)
|
||||
|
||||
|
||||
def eprint(*args, **kwargs):
|
||||
print(*args, file=sys.stderr, **kwargs)
|
||||
print(*args, file=sys.stdout, **kwargs)
|
||||
def train_boston(param_in, comparison_tree_method):
|
||||
data = load_boston()
|
||||
dtrain = xgb.DMatrix(data.data, label=data.target)
|
||||
param = {}
|
||||
param.update(param_in)
|
||||
res_tmp = {}
|
||||
res = {}
|
||||
num_rounds = 10
|
||||
xgb.train(param, dtrain, num_rounds, [(dtrain, 'train')], evals_result=res_tmp)
|
||||
res[param['tree_method']] = res_tmp['train']['rmse']
|
||||
param["tree_method"] = comparison_tree_method
|
||||
xgb.train(param, dtrain, num_rounds, [(dtrain, 'train')], evals_result=res_tmp)
|
||||
res[comparison_tree_method] = res_tmp['train']['rmse']
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def train_digits(param_in, comparison_tree_method):
|
||||
data = load_digits()
|
||||
dtrain = xgb.DMatrix(data.data, label=data.target)
|
||||
param = {}
|
||||
param['objective'] = 'multi:softmax'
|
||||
param['num_class'] = 10
|
||||
param.update(param_in)
|
||||
res_tmp = {}
|
||||
res = {}
|
||||
num_rounds = 10
|
||||
xgb.train(param, dtrain, num_rounds, [(dtrain, 'train')], evals_result=res_tmp)
|
||||
res[param['tree_method']] = res_tmp['train']['merror']
|
||||
param["tree_method"] = comparison_tree_method
|
||||
xgb.train(param, dtrain, num_rounds, [(dtrain, 'train')], evals_result=res_tmp)
|
||||
res[comparison_tree_method] = res_tmp['train']['merror']
|
||||
return res
|
||||
|
||||
|
||||
def train_cancer(param_in, comparison_tree_method):
|
||||
data = load_breast_cancer()
|
||||
dtrain = xgb.DMatrix(data.data, label=data.target)
|
||||
param = {}
|
||||
param['objective'] = 'binary:logistic'
|
||||
param.update(param_in)
|
||||
res_tmp = {}
|
||||
res = {}
|
||||
num_rounds = 10
|
||||
xgb.train(param, dtrain, num_rounds, [(dtrain, 'train')], evals_result=res_tmp)
|
||||
res[param['tree_method']] = res_tmp['train']['error']
|
||||
param["tree_method"] = comparison_tree_method
|
||||
xgb.train(param, dtrain, num_rounds, [(dtrain, 'train')], evals_result=res_tmp)
|
||||
res[comparison_tree_method] = res_tmp['train']['error']
|
||||
return res
|
||||
|
||||
|
||||
def train_sparse(param_in, comparison_tree_method):
|
||||
n = 5000
|
||||
sparsity = 0.75
|
||||
X, y = make_regression(n, random_state=rng)
|
||||
X = np.array([[np.nan if rng.uniform(0, 1) < sparsity else x for x in x_row] for x_row in X])
|
||||
dtrain = xgb.DMatrix(X, label=y)
|
||||
param = {}
|
||||
param.update(param_in)
|
||||
res_tmp = {}
|
||||
res = {}
|
||||
num_rounds = 10
|
||||
bst = xgb.train(param, dtrain, num_rounds, [(dtrain, 'train')], evals_result=res_tmp)
|
||||
res[param['tree_method']] = res_tmp['train']['rmse']
|
||||
param["tree_method"] = comparison_tree_method
|
||||
bst = xgb.train(param, dtrain, num_rounds, [(dtrain, 'train')], evals_result=res_tmp)
|
||||
res[comparison_tree_method] = res_tmp['train']['rmse']
|
||||
return res
|
||||
|
||||
|
||||
def assert_updater_accuracy(tree_method, comparison_tree_method, variable_param, tolerance):
|
||||
param = {'tree_method': tree_method}
|
||||
for k, set in variable_param.items():
|
||||
for val in set:
|
||||
param_tmp = param.copy()
|
||||
param_tmp[k] = val
|
||||
print(param_tmp, file=sys.stderr)
|
||||
assert_accuracy(train_boston(param_tmp, comparison_tree_method), tree_method, comparison_tree_method, tolerance)
|
||||
assert_accuracy(train_digits(param_tmp, comparison_tree_method), tree_method, comparison_tree_method, tolerance)
|
||||
assert_accuracy(train_cancer(param_tmp, comparison_tree_method), tree_method, comparison_tree_method, tolerance)
|
||||
assert_accuracy(train_sparse(param_tmp, comparison_tree_method), tree_method, comparison_tree_method, tolerance)
|
||||
|
||||
|
||||
@attr('gpu')
|
||||
class TestGPU(unittest.TestCase):
|
||||
def test_grow_gpu(self):
|
||||
from sklearn.datasets import load_digits
|
||||
try:
|
||||
from sklearn.model_selection import train_test_split
|
||||
except:
|
||||
from sklearn.cross_validation import train_test_split
|
||||
def test_gpu_hist(self):
|
||||
variable_param = {'max_depth': [2, 6, 11], 'max_bin': [2, 16, 1024], 'n_gpus': [1, -1]}
|
||||
assert_updater_accuracy('gpu_hist', 'hist', variable_param, 0.02)
|
||||
|
||||
ag_dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
ag_dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||
def test_gpu_exact(self):
|
||||
variable_param = {'max_depth': [2, 6, 15]}
|
||||
assert_updater_accuracy('gpu_exact', 'exact', variable_param, 0.02)
|
||||
|
||||
ag_param = {'max_depth': 2,
|
||||
'tree_method': 'exact',
|
||||
'nthread': 0,
|
||||
'eta': 1,
|
||||
'silent': 1,
|
||||
'debug_verbose': 0,
|
||||
'objective': 'binary:logistic',
|
||||
'eval_metric': 'auc'}
|
||||
ag_param2 = {'max_depth': 2,
|
||||
'tree_method': 'gpu_exact',
|
||||
'nthread': 0,
|
||||
'eta': 1,
|
||||
'silent': 1,
|
||||
'debug_verbose': 0,
|
||||
'objective': 'binary:logistic',
|
||||
'eval_metric': 'auc'}
|
||||
ag_res = {}
|
||||
ag_res2 = {}
|
||||
|
||||
num_rounds = 10
|
||||
xgb.train(ag_param, ag_dtrain, num_rounds, [(ag_dtrain, 'train'), (ag_dtest, 'test')],
|
||||
evals_result=ag_res)
|
||||
xgb.train(ag_param2, ag_dtrain, num_rounds, [(ag_dtrain, 'train'), (ag_dtest, 'test')],
|
||||
evals_result=ag_res2)
|
||||
assert ag_res['train']['auc'] == ag_res2['train']['auc']
|
||||
assert ag_res['test']['auc'] == ag_res2['test']['auc']
|
||||
|
||||
digits = load_digits(2)
|
||||
X = digits['data']
|
||||
y = digits['target']
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
|
||||
dtrain = xgb.DMatrix(X_train, y_train)
|
||||
dtest = xgb.DMatrix(X_test, y_test)
|
||||
|
||||
param = {'objective': 'binary:logistic',
|
||||
'nthread': 0,
|
||||
'tree_method': 'gpu_exact',
|
||||
'max_depth': 3,
|
||||
'debug_verbose': 0,
|
||||
'eval_metric': 'auc'}
|
||||
res = {}
|
||||
xgb.train(param, dtrain, num_rounds, [(dtrain, 'train'), (dtest, 'test')],
|
||||
evals_result=res)
|
||||
assert self.non_decreasing(res['train']['auc'])
|
||||
assert self.non_decreasing(res['test']['auc'])
|
||||
|
||||
# fail-safe test for dense data
|
||||
from sklearn.datasets import load_svmlight_file
|
||||
X2, y2 = load_svmlight_file(dpath + 'agaricus.txt.train')
|
||||
X2 = X2.toarray()
|
||||
dtrain2 = xgb.DMatrix(X2, label=y2)
|
||||
|
||||
param = {'objective': 'binary:logistic',
|
||||
'nthread': 0,
|
||||
'tree_method': 'gpu_exact',
|
||||
'max_depth': 2,
|
||||
'debug_verbose': 0,
|
||||
'eval_metric': 'auc'}
|
||||
res = {}
|
||||
xgb.train(param, dtrain2, num_rounds, [(dtrain2, 'train')], evals_result=res)
|
||||
|
||||
assert self.non_decreasing(res['train']['auc'])
|
||||
assert res['train']['auc'][0] >= 0.85
|
||||
|
||||
for j in range(X2.shape[1]):
|
||||
for i in rng.choice(X2.shape[0], size=num_rounds, replace=False):
|
||||
X2[i, j] = 2
|
||||
|
||||
dtrain3 = xgb.DMatrix(X2, label=y2)
|
||||
res = {}
|
||||
|
||||
xgb.train(param, dtrain3, num_rounds, [(dtrain3, 'train')], evals_result=res)
|
||||
|
||||
assert self.non_decreasing(res['train']['auc'])
|
||||
assert res['train']['auc'][0] >= 0.85
|
||||
|
||||
for j in range(X2.shape[1]):
|
||||
for i in np.random.choice(X2.shape[0], size=num_rounds, replace=False):
|
||||
X2[i, j] = 3
|
||||
|
||||
dtrain4 = xgb.DMatrix(X2, label=y2)
|
||||
res = {}
|
||||
xgb.train(param, dtrain4, num_rounds, [(dtrain4, 'train')], evals_result=res)
|
||||
assert self.non_decreasing(res['train']['auc'])
|
||||
assert res['train']['auc'][0] >= 0.85
|
||||
|
||||
def test_grow_gpu_hist(self):
|
||||
n_gpus = -1
|
||||
from sklearn.datasets import load_digits
|
||||
try:
|
||||
from sklearn.model_selection import train_test_split
|
||||
except:
|
||||
from sklearn.cross_validation import train_test_split
|
||||
|
||||
ag_dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||
ag_dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||
|
||||
for max_depth in range(3, 10): # TODO: Doesn't work with 2 for some tests
|
||||
# eprint("max_depth=%d" % (max_depth))
|
||||
|
||||
for max_bin_i in range(3, 11):
|
||||
max_bin = np.power(2, max_bin_i)
|
||||
# eprint("max_bin=%d" % (max_bin))
|
||||
|
||||
|
||||
|
||||
# regression test --- hist must be same as exact on all-categorial data
|
||||
ag_param = {'max_depth': max_depth,
|
||||
'tree_method': 'exact',
|
||||
'nthread': 0,
|
||||
'eta': 1,
|
||||
'silent': 1,
|
||||
'debug_verbose': 0,
|
||||
'objective': 'binary:logistic',
|
||||
'eval_metric': 'auc'}
|
||||
ag_param2 = {'max_depth': max_depth,
|
||||
'nthread': 0,
|
||||
'tree_method': 'gpu_hist',
|
||||
'eta': 1,
|
||||
'silent': 1,
|
||||
'debug_verbose': 0,
|
||||
'n_gpus': 1,
|
||||
'objective': 'binary:logistic',
|
||||
'max_bin': max_bin,
|
||||
'eval_metric': 'auc'}
|
||||
ag_param3 = {'max_depth': max_depth,
|
||||
'nthread': 0,
|
||||
'tree_method': 'gpu_hist',
|
||||
'eta': 1,
|
||||
'silent': 1,
|
||||
'debug_verbose': 0,
|
||||
'n_gpus': n_gpus,
|
||||
'objective': 'binary:logistic',
|
||||
'max_bin': max_bin,
|
||||
'eval_metric': 'auc'}
|
||||
ag_res = {}
|
||||
ag_res2 = {}
|
||||
ag_res3 = {}
|
||||
|
||||
num_rounds = 10
|
||||
# eprint("normal updater");
|
||||
xgb.train(ag_param, ag_dtrain, num_rounds, [(ag_dtrain, 'train'), (ag_dtest, 'test')],
|
||||
evals_result=ag_res)
|
||||
# eprint("grow_gpu_hist updater 1 gpu");
|
||||
xgb.train(ag_param2, ag_dtrain, num_rounds, [(ag_dtrain, 'train'), (ag_dtest, 'test')],
|
||||
evals_result=ag_res2)
|
||||
# eprint("grow_gpu_hist updater %d gpus" % (n_gpus));
|
||||
xgb.train(ag_param3, ag_dtrain, num_rounds, [(ag_dtrain, 'train'), (ag_dtest, 'test')],
|
||||
evals_result=ag_res3)
|
||||
# assert 1==0
|
||||
assert ag_res['train']['auc'] == ag_res2['train']['auc']
|
||||
assert ag_res['test']['auc'] == ag_res2['test']['auc']
|
||||
assert ag_res['test']['auc'] == ag_res3['test']['auc']
|
||||
|
||||
######################################################################
|
||||
digits = load_digits(2)
|
||||
X = digits['data']
|
||||
y = digits['target']
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
|
||||
dtrain = xgb.DMatrix(X_train, y_train)
|
||||
dtest = xgb.DMatrix(X_test, y_test)
|
||||
|
||||
param = {'objective': 'binary:logistic',
|
||||
'tree_method': 'gpu_hist',
|
||||
'nthread': 0,
|
||||
'max_depth': max_depth,
|
||||
'n_gpus': 1,
|
||||
'max_bin': max_bin,
|
||||
'debug_verbose': 0,
|
||||
'eval_metric': 'auc'}
|
||||
res = {}
|
||||
# eprint("digits: grow_gpu_hist updater 1 gpu");
|
||||
xgb.train(param, dtrain, num_rounds, [(dtrain, 'train'), (dtest, 'test')],
|
||||
evals_result=res)
|
||||
assert self.non_decreasing(res['train']['auc'])
|
||||
# assert self.non_decreasing(res['test']['auc'])
|
||||
param2 = {'objective': 'binary:logistic',
|
||||
'nthread': 0,
|
||||
'tree_method': 'gpu_hist',
|
||||
'max_depth': max_depth,
|
||||
'n_gpus': n_gpus,
|
||||
'max_bin': max_bin,
|
||||
'debug_verbose': 0,
|
||||
'eval_metric': 'auc'}
|
||||
res2 = {}
|
||||
# eprint("digits: grow_gpu_hist updater %d gpus" % (n_gpus));
|
||||
xgb.train(param2, dtrain, num_rounds, [(dtrain, 'train'), (dtest, 'test')],
|
||||
evals_result=res2)
|
||||
assert self.non_decreasing(res2['train']['auc'])
|
||||
# assert self.non_decreasing(res2['test']['auc'])
|
||||
assert res['train']['auc'] == res2['train']['auc']
|
||||
# assert res['test']['auc'] == res2['test']['auc']
|
||||
|
||||
######################################################################
|
||||
# fail-safe test for dense data
|
||||
from sklearn.datasets import load_svmlight_file
|
||||
X2, y2 = load_svmlight_file(dpath + 'agaricus.txt.train')
|
||||
X2 = X2.toarray()
|
||||
dtrain2 = xgb.DMatrix(X2, label=y2)
|
||||
|
||||
param = {'objective': 'binary:logistic',
|
||||
'nthread': 0,
|
||||
'tree_method': 'gpu_hist',
|
||||
'max_depth': max_depth,
|
||||
'n_gpus': n_gpus,
|
||||
'max_bin': max_bin,
|
||||
'debug_verbose': 0,
|
||||
'eval_metric': 'auc'}
|
||||
res = {}
|
||||
xgb.train(param, dtrain2, num_rounds, [(dtrain2, 'train')], evals_result=res)
|
||||
|
||||
assert self.non_decreasing(res['train']['auc'])
|
||||
if max_bin > 32:
|
||||
assert res['train']['auc'][0] >= 0.85
|
||||
|
||||
for j in range(X2.shape[1]):
|
||||
for i in rng.choice(X2.shape[0], size=num_rounds, replace=False):
|
||||
X2[i, j] = 2
|
||||
|
||||
dtrain3 = xgb.DMatrix(X2, label=y2)
|
||||
res = {}
|
||||
|
||||
xgb.train(param, dtrain3, num_rounds, [(dtrain3, 'train')], evals_result=res)
|
||||
|
||||
assert self.non_decreasing(res['train']['auc'])
|
||||
if max_bin > 32:
|
||||
assert res['train']['auc'][0] >= 0.85
|
||||
|
||||
for j in range(X2.shape[1]):
|
||||
for i in np.random.choice(X2.shape[0], size=num_rounds, replace=False):
|
||||
X2[i, j] = 3
|
||||
|
||||
dtrain4 = xgb.DMatrix(X2, label=y2)
|
||||
res = {}
|
||||
xgb.train(param, dtrain4, num_rounds, [(dtrain4, 'train')], evals_result=res)
|
||||
assert self.non_decreasing(res['train']['auc'])
|
||||
if max_bin > 32:
|
||||
assert res['train']['auc'][0] >= 0.85
|
||||
|
||||
######################################################################
|
||||
# fail-safe test for max_bin
|
||||
param = {'objective': 'binary:logistic',
|
||||
'nthread': 0,
|
||||
'tree_method': 'gpu_hist',
|
||||
'max_depth': max_depth,
|
||||
'n_gpus': n_gpus,
|
||||
'debug_verbose': 0,
|
||||
'eval_metric': 'auc',
|
||||
'max_bin': max_bin}
|
||||
res = {}
|
||||
xgb.train(param, dtrain2, num_rounds, [(dtrain2, 'train')], evals_result=res)
|
||||
assert self.non_decreasing(res['train']['auc'])
|
||||
if max_bin > 32:
|
||||
assert res['train']['auc'][0] >= 0.85
|
||||
######################################################################
|
||||
# subsampling
|
||||
param = {'objective': 'binary:logistic',
|
||||
'nthread': 0,
|
||||
'tree_method': 'gpu_hist',
|
||||
'max_depth': max_depth,
|
||||
'n_gpus': n_gpus,
|
||||
'eval_metric': 'auc',
|
||||
'colsample_bytree': 0.5,
|
||||
'colsample_bylevel': 0.5,
|
||||
'subsample': 0.5,
|
||||
'debug_verbose': 0,
|
||||
'max_bin': max_bin}
|
||||
res = {}
|
||||
xgb.train(param, dtrain2, num_rounds, [(dtrain2, 'train')], evals_result=res)
|
||||
assert self.non_decreasing(res['train']['auc'])
|
||||
if max_bin > 32:
|
||||
assert res['train']['auc'][0] >= 0.85
|
||||
######################################################################
|
||||
# fail-safe test for max_bin=2
|
||||
param = {'objective': 'binary:logistic',
|
||||
'nthread': 0,
|
||||
'tree_method': 'gpu_hist',
|
||||
'max_depth': 2,
|
||||
'n_gpus': n_gpus,
|
||||
'debug_verbose': 0,
|
||||
'eval_metric': 'auc',
|
||||
'max_bin': 2}
|
||||
res = {}
|
||||
xgb.train(param, dtrain2, num_rounds, [(dtrain2, 'train')], evals_result=res)
|
||||
assert self.non_decreasing(res['train']['auc'])
|
||||
if max_bin > 32:
|
||||
assert res['train']['auc'][0] >= 0.85
|
||||
|
||||
def non_decreasing(self, L):
|
||||
return all((x - y) < 0.001 for x, y in zip(L, L[1:]))
|
||||
def test_gpu_hist_experimental(self):
|
||||
variable_param = {'max_depth': [2, 6], 'max_leaves': [255, 4], 'max_bin': [2, 16, 1024]}
|
||||
assert_updater_accuracy('gpu_hist_experimental', 'hist', variable_param, 0.01)
|
||||
|
||||
Reference in New Issue
Block a user