Add GPU documentation (#2695)

* Add GPU documentation

* Update Python GPU tests
This commit is contained in:
Rory Mitchell
2017-09-10 19:42:46 +12:00
committed by GitHub
parent e6a9063344
commit 9c85903f0b
4 changed files with 176 additions and 82 deletions

View File

@@ -1,39 +1,38 @@
from __future__ import print_function
#pylint: skip-file
import sys
import time
sys.path.append("../../tests/python")
import xgboost as xgb
import testing as tm
import numpy as np
import unittest
from nose.plugins.attrib import attr
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs) ; sys.stderr.flush()
print(*args, file=sys.stdout, **kwargs) ; sys.stdout.flush()
print(*args, file=sys.stderr, **kwargs)
sys.stderr.flush()
print(*args, file=sys.stdout, **kwargs)
sys.stdout.flush()
rng = np.random.RandomState(1994)
# "realistic" size based upon http://stat-computing.org/dataexpo/2009/ , which has been processed to one-hot encode categoricalsxsy
cols = 31
# reduced to fit onto 1 gpu but still be large
rows3 = 5000 # small
rows2 = 4360032 # medium
rows1 = 42360032 # large
#rows1 = 152360032 # can do this for multi-gpu test (very large)
rows3 = 5000 # small
rows2 = 4360032 # medium
rows1 = 42360032 # large
# rows1 = 152360032 # can do this for multi-gpu test (very large)
rowslist = [rows1, rows2, rows3]
@attr('slow')
class TestGPU(unittest.TestCase):
def test_large(self):
eprint("Starting test for large data")
tm._skip_if_no_sklearn()
for rows in rowslist:
eprint("Creating train data rows=%d cols=%d" % (rows,cols))
eprint("Creating train data rows=%d cols=%d" % (rows, cols))
tmp = time.time()
np.random.seed(7)
X = np.random.rand(rows, cols)
@@ -42,12 +41,12 @@ class TestGPU(unittest.TestCase):
eprint("Starting DMatrix(X,y)")
tmp = time.time()
ag_dtrain = xgb.DMatrix(X,y,nthread=40)
ag_dtrain = xgb.DMatrix(X, y, nthread=40)
print("Time to DMatrix: %r" % (time.time() - tmp))
max_depth=6
max_bin=1024
max_depth = 6
max_bin = 1024
# regression test --- hist must be same as exact on all-categorial data
ag_param = {'max_depth': max_depth,
'tree_method': 'exact',
@@ -58,23 +57,23 @@ class TestGPU(unittest.TestCase):
'objective': 'binary:logistic',
'eval_metric': 'auc'}
ag_paramb = {'max_depth': max_depth,
'tree_method': 'hist',
'nthread': 0,
'eta': 1,
'silent': 0,
'debug_verbose': 5,
'objective': 'binary:logistic',
'eval_metric': 'auc'}
'tree_method': 'hist',
'nthread': 0,
'eta': 1,
'silent': 0,
'debug_verbose': 5,
'objective': 'binary:logistic',
'eval_metric': 'auc'}
ag_param2 = {'max_depth': max_depth,
'tree_method': 'gpu_hist',
'nthread': 0,
'eta': 1,
'silent': 0,
'debug_verbose': 5,
'n_gpus': 1,
'objective': 'binary:logistic',
'max_bin': max_bin,
'eval_metric': 'auc'}
'tree_method': 'gpu_hist',
'nthread': 0,
'eta': 1,
'silent': 0,
'debug_verbose': 5,
'n_gpus': 1,
'objective': 'binary:logistic',
'max_bin': max_bin,
'eval_metric': 'auc'}
ag_param3 = {'max_depth': max_depth,
'tree_method': 'gpu_hist',
'nthread': 0,
@@ -92,10 +91,10 @@ class TestGPU(unittest.TestCase):
num_rounds = 1
tmp = time.time()
#eprint("hist updater")
#xgb.train(ag_paramb, ag_dtrain, num_rounds, [(ag_dtrain, 'train')],
# eprint("hist updater")
# xgb.train(ag_paramb, ag_dtrain, num_rounds, [(ag_dtrain, 'train')],
# evals_result=ag_resb)
#print("Time to Train: %s seconds" % (str(time.time() - tmp)))
# print("Time to Train: %s seconds" % (str(time.time() - tmp)))
tmp = time.time()
eprint("gpu_hist updater 1 gpu")
@@ -108,5 +107,3 @@ class TestGPU(unittest.TestCase):
xgb.train(ag_param3, ag_dtrain, num_rounds, [(ag_dtrain, 'train')],
evals_result=ag_res3)
print("Time to Train: %s seconds" % (str(time.time() - tmp)))