Multi-threaded XGDMatrixCreateFromMat for faster DMatrix creation (#2530)
* Multi-threaded XGDMatrixCreateFromMat for faster DMatrix creation from numpy arrays for python interface.
This commit is contained in:
committed by
Rory Mitchell
parent
56550ff3f1
commit
6b375f6ad8
@@ -5,13 +5,18 @@ import numpy as np
|
||||
from sklearn.datasets import make_classification
|
||||
import time
|
||||
|
||||
n = 1000000
|
||||
num_rounds = 500
|
||||
|
||||
def run_benchmark(args, gpu_algorithm, cpu_algorithm):
|
||||
print("Generating dataset: {} rows * {} columns".format(args.rows,args.columns))
|
||||
tmp = time.time()
|
||||
X, y = make_classification(args.rows, n_features=args.columns, random_state=7)
|
||||
dtrain = xgb.DMatrix(X, y)
|
||||
print ("Generate Time: %s seconds" % (str(time.time() - tmp)))
|
||||
tmp = time.time()
|
||||
print ("DMatrix Start")
|
||||
# omp way
|
||||
dtrain = xgb.DMatrix(X, y, nthread=-1)
|
||||
# non-omp way
|
||||
#dtrain = xgb.DMatrix(X, y)
|
||||
print ("DMatrix Time: %s seconds" % (str(time.time() - tmp)))
|
||||
|
||||
param = {'objective': 'binary:logistic',
|
||||
'max_depth': 6,
|
||||
@@ -24,7 +29,7 @@ def run_benchmark(args, gpu_algorithm, cpu_algorithm):
|
||||
print("Training with '%s'" % param['tree_method'])
|
||||
tmp = time.time()
|
||||
xgb.train(param, dtrain, args.iterations)
|
||||
print ("Time: %s seconds" % (str(time.time() - tmp)))
|
||||
print ("Train Time: %s seconds" % (str(time.time() - tmp)))
|
||||
|
||||
param['silent'] = 1
|
||||
param['tree_method'] = cpu_algorithm
|
||||
|
||||
@@ -343,8 +343,6 @@ void GPUHistBuilder::InitData(const std::vector<bst_gpair>& gpair,
|
||||
}
|
||||
|
||||
void GPUHistBuilder::BuildHist(int depth) {
|
||||
// dh::Timer time;
|
||||
|
||||
for (int d_idx = 0; d_idx < n_devices; d_idx++) {
|
||||
int device_idx = dList[d_idx];
|
||||
size_t begin = device_element_segments[d_idx];
|
||||
@@ -1070,9 +1068,9 @@ void GPUHistBuilder::Update(const std::vector<bst_gpair>& gpair,
|
||||
this->InitData(gpair, *p_fmat, *p_tree);
|
||||
this->InitFirstNode(gpair);
|
||||
this->ColSampleTree();
|
||||
|
||||
for (int depth = 0; depth < param.max_depth; depth++) {
|
||||
this->ColSampleLevel();
|
||||
|
||||
this->BuildHist(depth);
|
||||
this->FindSplit(depth);
|
||||
this->UpdatePosition(depth);
|
||||
|
||||
@@ -29,13 +29,14 @@ class TestGPU(unittest.TestCase):
|
||||
|
||||
ag_param = {'max_depth': 2,
|
||||
'tree_method': 'exact',
|
||||
'nthread': 1,
|
||||
'nthread': 0,
|
||||
'eta': 1,
|
||||
'silent': 1,
|
||||
'objective': 'binary:logistic',
|
||||
'eval_metric': 'auc'}
|
||||
ag_param2 = {'max_depth': 2,
|
||||
'tree_method': 'gpu_exact',
|
||||
'nthread': 0,
|
||||
'eta': 1,
|
||||
'silent': 1,
|
||||
'objective': 'binary:logistic',
|
||||
@@ -59,6 +60,7 @@ class TestGPU(unittest.TestCase):
|
||||
dtest = xgb.DMatrix(X_test, y_test)
|
||||
|
||||
param = {'objective': 'binary:logistic',
|
||||
'nthread': 0,
|
||||
'tree_method': 'gpu_exact',
|
||||
'max_depth': 3,
|
||||
'eval_metric': 'auc'}
|
||||
@@ -75,6 +77,7 @@ class TestGPU(unittest.TestCase):
|
||||
dtrain2 = xgb.DMatrix(X2, label=y2)
|
||||
|
||||
param = {'objective': 'binary:logistic',
|
||||
'nthread': 0,
|
||||
'tree_method': 'gpu_exact',
|
||||
'max_depth': 2,
|
||||
'eval_metric': 'auc'}
|
||||
@@ -128,26 +131,28 @@ class TestGPU(unittest.TestCase):
|
||||
# regression test --- hist must be same as exact on all-categorial data
|
||||
ag_param = {'max_depth': max_depth,
|
||||
'tree_method': 'exact',
|
||||
'nthread': 1,
|
||||
'nthread': 0,
|
||||
'eta': 1,
|
||||
'silent': 1,
|
||||
'objective': 'binary:logistic',
|
||||
'eval_metric': 'auc'}
|
||||
ag_param2 = {'max_depth': max_depth,
|
||||
'nthread': 0,
|
||||
'tree_method': 'gpu_hist',
|
||||
'eta': 1,
|
||||
'silent': 1,
|
||||
'n_gpus': 1,
|
||||
'objective': 'binary:logistic',
|
||||
'max_bin': max_bin,
|
||||
'max_bin': max_bin,
|
||||
'eval_metric': 'auc'}
|
||||
ag_param3 = {'max_depth': max_depth,
|
||||
'nthread': 0,
|
||||
'tree_method': 'gpu_hist',
|
||||
'eta': 1,
|
||||
'silent': 1,
|
||||
'n_gpus': n_gpus,
|
||||
'objective': 'binary:logistic',
|
||||
'max_bin': max_bin,
|
||||
'objective': 'binary:logistic',
|
||||
'max_bin': max_bin,
|
||||
'eval_metric': 'auc'}
|
||||
ag_res = {}
|
||||
ag_res2 = {}
|
||||
@@ -178,6 +183,7 @@ class TestGPU(unittest.TestCase):
|
||||
|
||||
param = {'objective': 'binary:logistic',
|
||||
'tree_method': 'gpu_hist',
|
||||
'nthread': 0,
|
||||
'max_depth': max_depth,
|
||||
'n_gpus': 1,
|
||||
'max_bin': max_bin,
|
||||
@@ -189,6 +195,7 @@ class TestGPU(unittest.TestCase):
|
||||
assert self.non_decreasing(res['train']['auc'])
|
||||
#assert self.non_decreasing(res['test']['auc'])
|
||||
param2 = {'objective': 'binary:logistic',
|
||||
'nthread': 0,
|
||||
'tree_method': 'gpu_hist',
|
||||
'max_depth': max_depth,
|
||||
'n_gpus': n_gpus,
|
||||
@@ -211,6 +218,7 @@ class TestGPU(unittest.TestCase):
|
||||
dtrain2 = xgb.DMatrix(X2, label=y2)
|
||||
|
||||
param = {'objective': 'binary:logistic',
|
||||
'nthread': 0,
|
||||
'tree_method': 'gpu_hist',
|
||||
'max_depth': max_depth,
|
||||
'n_gpus': n_gpus,
|
||||
@@ -250,6 +258,7 @@ class TestGPU(unittest.TestCase):
|
||||
######################################################################
|
||||
# fail-safe test for max_bin
|
||||
param = {'objective': 'binary:logistic',
|
||||
'nthread': 0,
|
||||
'tree_method': 'gpu_hist',
|
||||
'max_depth': max_depth,
|
||||
'n_gpus': n_gpus,
|
||||
@@ -263,6 +272,7 @@ class TestGPU(unittest.TestCase):
|
||||
######################################################################
|
||||
# subsampling
|
||||
param = {'objective': 'binary:logistic',
|
||||
'nthread': 0,
|
||||
'tree_method': 'gpu_hist',
|
||||
'max_depth': max_depth,
|
||||
'n_gpus': n_gpus,
|
||||
@@ -279,6 +289,7 @@ class TestGPU(unittest.TestCase):
|
||||
######################################################################
|
||||
# fail-safe test for max_bin=2
|
||||
param = {'objective': 'binary:logistic',
|
||||
'nthread': 0,
|
||||
'tree_method': 'gpu_hist',
|
||||
'max_depth': 2,
|
||||
'n_gpus': n_gpus,
|
||||
|
||||
@@ -18,62 +18,48 @@ rng = np.random.RandomState(1994)
|
||||
# "realistic" size based upon http://stat-computing.org/dataexpo/2009/ , which has been processed to one-hot encode categoricalsxsy
|
||||
cols = 31
|
||||
# reduced to fit onto 1 gpu but still be large
|
||||
rows2 = 5000 # medium
|
||||
#rows2 = 4032 # fake large for testing
|
||||
rows3 = 5000 # small
|
||||
rows2 = 4360032 # medium
|
||||
rows1 = 42360032 # large
|
||||
#rows2 = 152360032 # can do this for multi-gpu test (very large)
|
||||
rowslist = [rows1, rows2]
|
||||
#rows1 = 152360032 # can do this for multi-gpu test (very large)
|
||||
rowslist = [rows1, rows2, rows3]
|
||||
|
||||
|
||||
class TestGPU(unittest.TestCase):
|
||||
def test_large(self):
|
||||
eprint("Starting test for large data")
|
||||
tm._skip_if_no_sklearn()
|
||||
from sklearn.datasets import load_digits
|
||||
try:
|
||||
from sklearn.model_selection import train_test_split
|
||||
except:
|
||||
from sklearn.cross_validation import train_test_split
|
||||
|
||||
|
||||
for rows in rowslist:
|
||||
|
||||
eprint("Creating train data rows=%d cols=%d" % (rows,cols))
|
||||
X, y = make_classification(rows, n_features=cols, random_state=7)
|
||||
rowstest = int(rows*0.2)
|
||||
eprint("Creating test data rows=%d cols=%d" % (rowstest,cols))
|
||||
# note the new random state. if chose same as train random state, exact methods can memorize and do very well on test even for random data, while hist cannot
|
||||
Xtest, ytest = make_classification(rowstest, n_features=cols, random_state=8)
|
||||
|
||||
np.random.seed(7)
|
||||
X = np.random.rand(rows, cols)
|
||||
y = np.random.rand(rows)
|
||||
eprint("Starting DMatrix(X,y)")
|
||||
ag_dtrain = xgb.DMatrix(X,y)
|
||||
eprint("Starting DMatrix(Xtest,ytest)")
|
||||
ag_dtest = xgb.DMatrix(Xtest,ytest)
|
||||
ag_dtrain = xgb.DMatrix(X,y,nthread=0)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
max_depth=6
|
||||
max_bin=1024
|
||||
|
||||
# regression test --- hist must be same as exact on all-categorial data
|
||||
ag_param = {'max_depth': max_depth,
|
||||
'tree_method': 'exact',
|
||||
#'nthread': 1,
|
||||
'nthread': 0,
|
||||
'eta': 1,
|
||||
'silent': 0,
|
||||
'objective': 'binary:logistic',
|
||||
'eval_metric': 'auc'}
|
||||
ag_paramb = {'max_depth': max_depth,
|
||||
'tree_method': 'hist',
|
||||
#'nthread': 1,
|
||||
'nthread': 0,
|
||||
'eta': 1,
|
||||
'silent': 0,
|
||||
'objective': 'binary:logistic',
|
||||
'eval_metric': 'auc'}
|
||||
ag_param2 = {'max_depth': max_depth,
|
||||
'tree_method': 'gpu_hist',
|
||||
'nthread': 0,
|
||||
'eta': 1,
|
||||
'silent': 0,
|
||||
'n_gpus': 1,
|
||||
@@ -82,26 +68,18 @@ class TestGPU(unittest.TestCase):
|
||||
'eval_metric': 'auc'}
|
||||
ag_param3 = {'max_depth': max_depth,
|
||||
'tree_method': 'gpu_hist',
|
||||
'nthread': 0,
|
||||
'eta': 1,
|
||||
'silent': 0,
|
||||
'n_gpus': -1,
|
||||
'objective': 'binary:logistic',
|
||||
'max_bin': max_bin,
|
||||
'eval_metric': 'auc'}
|
||||
#ag_param4 = {'max_depth': max_depth,
|
||||
# 'tree_method': 'gpu_exact',
|
||||
# 'eta': 1,
|
||||
# 'silent': 0,
|
||||
# 'n_gpus': 1,
|
||||
# 'objective': 'binary:logistic',
|
||||
# 'max_bin': max_bin,
|
||||
# 'eval_metric': 'auc'}
|
||||
ag_res = {}
|
||||
ag_resb = {}
|
||||
ag_res2 = {}
|
||||
ag_res3 = {}
|
||||
#ag_res4 = {}
|
||||
|
||||
|
||||
num_rounds = 1
|
||||
|
||||
eprint("normal updater")
|
||||
@@ -116,19 +94,10 @@ class TestGPU(unittest.TestCase):
|
||||
eprint("gpu_hist updater all gpus")
|
||||
xgb.train(ag_param3, ag_dtrain, num_rounds, [(ag_dtrain, 'train'), (ag_dtest, 'test')],
|
||||
evals_result=ag_res3)
|
||||
#eprint("gpu_exact updater")
|
||||
#xgb.train(ag_param4, ag_dtrain, num_rounds, [(ag_dtrain, 'train'), (ag_dtest, 'test')],
|
||||
# evals_result=ag_res4)
|
||||
|
||||
|
||||
assert np.fabs(ag_res['train']['auc'][0] - ag_resb['train']['auc'][0])<0.001
|
||||
assert np.fabs(ag_res['train']['auc'][0] - ag_res2['train']['auc'][0])<0.001
|
||||
assert np.fabs(ag_res['train']['auc'][0] - ag_res3['train']['auc'][0])<0.001
|
||||
#assert np.fabs(ag_res['train']['auc'][0] - ag_res4['train']['auc'][0])<0.001
|
||||
|
||||
assert np.fabs(ag_res['test']['auc'][0] - ag_resb['test']['auc'][0])<0.01
|
||||
assert np.fabs(ag_res['test']['auc'][0] - ag_res2['test']['auc'][0])<0.01
|
||||
assert np.fabs(ag_res['test']['auc'][0] - ag_res3['test']['auc'][0])<0.01
|
||||
#assert np.fabs(ag_res['test']['auc'][0] - ag_res4['test']['auc'][0])<0.01
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user