Multi-threaded XGDMatrixCreateFromMat for faster DMatrix creation (#2530)

* Multi-threaded XGDMatrixCreateFromMat for faster DMatrix creation from numpy arrays for python interface.
This commit is contained in:
PSEUDOTENSOR / Jonathan McKinney
2017-07-20 19:43:17 -07:00
committed by Rory Mitchell
parent 56550ff3f1
commit 6b375f6ad8
9 changed files with 324 additions and 73 deletions

View File

@@ -5,13 +5,18 @@ import numpy as np
from sklearn.datasets import make_classification
import time
n = 1000000
num_rounds = 500
def run_benchmark(args, gpu_algorithm, cpu_algorithm):
print("Generating dataset: {} rows * {} columns".format(args.rows,args.columns))
tmp = time.time()
X, y = make_classification(args.rows, n_features=args.columns, random_state=7)
dtrain = xgb.DMatrix(X, y)
print ("Generate Time: %s seconds" % (str(time.time() - tmp)))
tmp = time.time()
print ("DMatrix Start")
# omp way
dtrain = xgb.DMatrix(X, y, nthread=-1)
# non-omp way
#dtrain = xgb.DMatrix(X, y)
print ("DMatrix Time: %s seconds" % (str(time.time() - tmp)))
param = {'objective': 'binary:logistic',
'max_depth': 6,
@@ -24,7 +29,7 @@ def run_benchmark(args, gpu_algorithm, cpu_algorithm):
print("Training with '%s'" % param['tree_method'])
tmp = time.time()
xgb.train(param, dtrain, args.iterations)
print ("Time: %s seconds" % (str(time.time() - tmp)))
print ("Train Time: %s seconds" % (str(time.time() - tmp)))
param['silent'] = 1
param['tree_method'] = cpu_algorithm

View File

@@ -343,8 +343,6 @@ void GPUHistBuilder::InitData(const std::vector<bst_gpair>& gpair,
}
void GPUHistBuilder::BuildHist(int depth) {
// dh::Timer time;
for (int d_idx = 0; d_idx < n_devices; d_idx++) {
int device_idx = dList[d_idx];
size_t begin = device_element_segments[d_idx];
@@ -1070,9 +1068,9 @@ void GPUHistBuilder::Update(const std::vector<bst_gpair>& gpair,
this->InitData(gpair, *p_fmat, *p_tree);
this->InitFirstNode(gpair);
this->ColSampleTree();
for (int depth = 0; depth < param.max_depth; depth++) {
this->ColSampleLevel();
this->BuildHist(depth);
this->FindSplit(depth);
this->UpdatePosition(depth);

View File

@@ -29,13 +29,14 @@ class TestGPU(unittest.TestCase):
ag_param = {'max_depth': 2,
'tree_method': 'exact',
'nthread': 1,
'nthread': 0,
'eta': 1,
'silent': 1,
'objective': 'binary:logistic',
'eval_metric': 'auc'}
ag_param2 = {'max_depth': 2,
'tree_method': 'gpu_exact',
'nthread': 0,
'eta': 1,
'silent': 1,
'objective': 'binary:logistic',
@@ -59,6 +60,7 @@ class TestGPU(unittest.TestCase):
dtest = xgb.DMatrix(X_test, y_test)
param = {'objective': 'binary:logistic',
'nthread': 0,
'tree_method': 'gpu_exact',
'max_depth': 3,
'eval_metric': 'auc'}
@@ -75,6 +77,7 @@ class TestGPU(unittest.TestCase):
dtrain2 = xgb.DMatrix(X2, label=y2)
param = {'objective': 'binary:logistic',
'nthread': 0,
'tree_method': 'gpu_exact',
'max_depth': 2,
'eval_metric': 'auc'}
@@ -128,26 +131,28 @@ class TestGPU(unittest.TestCase):
# regression test --- hist must be same as exact on all-categorial data
ag_param = {'max_depth': max_depth,
'tree_method': 'exact',
'nthread': 1,
'nthread': 0,
'eta': 1,
'silent': 1,
'objective': 'binary:logistic',
'eval_metric': 'auc'}
ag_param2 = {'max_depth': max_depth,
'nthread': 0,
'tree_method': 'gpu_hist',
'eta': 1,
'silent': 1,
'n_gpus': 1,
'objective': 'binary:logistic',
'max_bin': max_bin,
'max_bin': max_bin,
'eval_metric': 'auc'}
ag_param3 = {'max_depth': max_depth,
'nthread': 0,
'tree_method': 'gpu_hist',
'eta': 1,
'silent': 1,
'n_gpus': n_gpus,
'objective': 'binary:logistic',
'max_bin': max_bin,
'objective': 'binary:logistic',
'max_bin': max_bin,
'eval_metric': 'auc'}
ag_res = {}
ag_res2 = {}
@@ -178,6 +183,7 @@ class TestGPU(unittest.TestCase):
param = {'objective': 'binary:logistic',
'tree_method': 'gpu_hist',
'nthread': 0,
'max_depth': max_depth,
'n_gpus': 1,
'max_bin': max_bin,
@@ -189,6 +195,7 @@ class TestGPU(unittest.TestCase):
assert self.non_decreasing(res['train']['auc'])
#assert self.non_decreasing(res['test']['auc'])
param2 = {'objective': 'binary:logistic',
'nthread': 0,
'tree_method': 'gpu_hist',
'max_depth': max_depth,
'n_gpus': n_gpus,
@@ -211,6 +218,7 @@ class TestGPU(unittest.TestCase):
dtrain2 = xgb.DMatrix(X2, label=y2)
param = {'objective': 'binary:logistic',
'nthread': 0,
'tree_method': 'gpu_hist',
'max_depth': max_depth,
'n_gpus': n_gpus,
@@ -250,6 +258,7 @@ class TestGPU(unittest.TestCase):
######################################################################
# fail-safe test for max_bin
param = {'objective': 'binary:logistic',
'nthread': 0,
'tree_method': 'gpu_hist',
'max_depth': max_depth,
'n_gpus': n_gpus,
@@ -263,6 +272,7 @@ class TestGPU(unittest.TestCase):
######################################################################
# subsampling
param = {'objective': 'binary:logistic',
'nthread': 0,
'tree_method': 'gpu_hist',
'max_depth': max_depth,
'n_gpus': n_gpus,
@@ -279,6 +289,7 @@ class TestGPU(unittest.TestCase):
######################################################################
# fail-safe test for max_bin=2
param = {'objective': 'binary:logistic',
'nthread': 0,
'tree_method': 'gpu_hist',
'max_depth': 2,
'n_gpus': n_gpus,

View File

@@ -18,62 +18,48 @@ rng = np.random.RandomState(1994)
# "realistic" size based upon http://stat-computing.org/dataexpo/2009/ , which has been processed to one-hot encode categoricalsxsy
cols = 31
# reduced to fit onto 1 gpu but still be large
rows2 = 5000 # medium
#rows2 = 4032 # fake large for testing
rows3 = 5000 # small
rows2 = 4360032 # medium
rows1 = 42360032 # large
#rows2 = 152360032 # can do this for multi-gpu test (very large)
rowslist = [rows1, rows2]
#rows1 = 152360032 # can do this for multi-gpu test (very large)
rowslist = [rows1, rows2, rows3]
class TestGPU(unittest.TestCase):
def test_large(self):
eprint("Starting test for large data")
tm._skip_if_no_sklearn()
from sklearn.datasets import load_digits
try:
from sklearn.model_selection import train_test_split
except:
from sklearn.cross_validation import train_test_split
for rows in rowslist:
eprint("Creating train data rows=%d cols=%d" % (rows,cols))
X, y = make_classification(rows, n_features=cols, random_state=7)
rowstest = int(rows*0.2)
eprint("Creating test data rows=%d cols=%d" % (rowstest,cols))
# note the new random state. if chose same as train random state, exact methods can memorize and do very well on test even for random data, while hist cannot
Xtest, ytest = make_classification(rowstest, n_features=cols, random_state=8)
np.random.seed(7)
X = np.random.rand(rows, cols)
y = np.random.rand(rows)
eprint("Starting DMatrix(X,y)")
ag_dtrain = xgb.DMatrix(X,y)
eprint("Starting DMatrix(Xtest,ytest)")
ag_dtest = xgb.DMatrix(Xtest,ytest)
ag_dtrain = xgb.DMatrix(X,y,nthread=0)
max_depth=6
max_bin=1024
# regression test --- hist must be same as exact on all-categorial data
ag_param = {'max_depth': max_depth,
'tree_method': 'exact',
#'nthread': 1,
'nthread': 0,
'eta': 1,
'silent': 0,
'objective': 'binary:logistic',
'eval_metric': 'auc'}
ag_paramb = {'max_depth': max_depth,
'tree_method': 'hist',
#'nthread': 1,
'nthread': 0,
'eta': 1,
'silent': 0,
'objective': 'binary:logistic',
'eval_metric': 'auc'}
ag_param2 = {'max_depth': max_depth,
'tree_method': 'gpu_hist',
'nthread': 0,
'eta': 1,
'silent': 0,
'n_gpus': 1,
@@ -82,26 +68,18 @@ class TestGPU(unittest.TestCase):
'eval_metric': 'auc'}
ag_param3 = {'max_depth': max_depth,
'tree_method': 'gpu_hist',
'nthread': 0,
'eta': 1,
'silent': 0,
'n_gpus': -1,
'objective': 'binary:logistic',
'max_bin': max_bin,
'eval_metric': 'auc'}
#ag_param4 = {'max_depth': max_depth,
# 'tree_method': 'gpu_exact',
# 'eta': 1,
# 'silent': 0,
# 'n_gpus': 1,
# 'objective': 'binary:logistic',
# 'max_bin': max_bin,
# 'eval_metric': 'auc'}
ag_res = {}
ag_resb = {}
ag_res2 = {}
ag_res3 = {}
#ag_res4 = {}
num_rounds = 1
eprint("normal updater")
@@ -116,19 +94,10 @@ class TestGPU(unittest.TestCase):
eprint("gpu_hist updater all gpus")
xgb.train(ag_param3, ag_dtrain, num_rounds, [(ag_dtrain, 'train'), (ag_dtest, 'test')],
evals_result=ag_res3)
#eprint("gpu_exact updater")
#xgb.train(ag_param4, ag_dtrain, num_rounds, [(ag_dtrain, 'train'), (ag_dtest, 'test')],
# evals_result=ag_res4)
assert np.fabs(ag_res['train']['auc'][0] - ag_resb['train']['auc'][0])<0.001
assert np.fabs(ag_res['train']['auc'][0] - ag_res2['train']['auc'][0])<0.001
assert np.fabs(ag_res['train']['auc'][0] - ag_res3['train']['auc'][0])<0.001
#assert np.fabs(ag_res['train']['auc'][0] - ag_res4['train']['auc'][0])<0.001
assert np.fabs(ag_res['test']['auc'][0] - ag_resb['test']['auc'][0])<0.01
assert np.fabs(ag_res['test']['auc'][0] - ag_res2['test']['auc'][0])<0.01
assert np.fabs(ag_res['test']['auc'][0] - ag_res3['test']['auc'][0])<0.01
#assert np.fabs(ag_res['test']['auc'][0] - ag_res4['test']['auc'][0])<0.01