[GPU Plugin] Fast histogram speed improvements. Updated benchmarks. (#2258)

This commit is contained in:
Rory Mitchell
2017-05-09 04:21:38 +12:00
committed by Tianqi Chen
parent 98ea461532
commit 6bf968efe6
12 changed files with 433 additions and 241 deletions

View File

@@ -1,32 +1,50 @@
#pylint: skip-file
# pylint: skip-file
import sys, argparse
import xgboost as xgb
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
import time
n = 1000000
num_rounds = 100
num_rounds = 500
X,y = make_classification(n, n_features=50, random_state=7)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
dtrain = xgb.DMatrix(X_train, y_train)
dtest = xgb.DMatrix(X_test, y_test)
def run_benchmark(args, gpu_algorithm, cpu_algorithm):
print("Generating dataset: {} rows * {} columns".format(args.rows,args.columns))
X, y = make_classification(args.rows, n_features=args.columns, random_state=7)
dtrain = xgb.DMatrix(X, y)
param = {'objective': 'binary:logistic',
'tree_method': 'exact',
'updater': 'grow_gpu_hist',
'max_depth': 8,
'silent': 1,
'eval_metric': 'auc'}
res = {}
tmp = time.time()
xgb.train(param, dtrain, num_rounds, [(dtrain, 'train'), (dtest, 'test')],
evals_result=res)
print ("GPU: %s seconds" % (str(time.time() - tmp)))
param = {'objective': 'binary:logistic',
'tree_method': 'exact',
'max_depth': 6,
'silent': 1,
'eval_metric': 'auc'}
tmp = time.time()
param['updater'] = 'grow_fast_histmaker'
xgb.train(param, dtrain, num_rounds, [(dtrain, 'train'), (dtest, 'test')], evals_result=res)
print ("CPU: %s seconds" % (str(time.time() - tmp)))
param['updater'] = gpu_algorithm
print("Training with '%s'" % param['updater'])
tmp = time.time()
xgb.train(param, dtrain, args.iterations)
print ("Time: %s seconds" % (str(time.time() - tmp)))
param['updater'] = cpu_algorithm
print("Training with '%s'" % param['updater'])
tmp = time.time()
xgb.train(param, dtrain, args.iterations)
print ("Time: %s seconds" % (str(time.time() - tmp)))
parser = argparse.ArgumentParser()
parser.add_argument('--algorithm', choices=['all', 'grow_gpu', 'grow_gpu_hist'], required=True)
parser.add_argument('--rows',type=int,default=1000000)
parser.add_argument('--columns',type=int,default=50)
parser.add_argument('--iterations',type=int,default=500)
args = parser.parse_args()
if 'grow_gpu_hist' in args.algorithm:
run_benchmark(args, args.algorithm, 'grow_fast_histmaker')
if 'grow_gpu ' in args.algorithm:
run_benchmark(args, args.algorithm, 'grow_colmaker')
if 'all' in args.algorithm:
run_benchmark(args, 'grow_gpu', 'grow_colmaker')
run_benchmark(args, 'grow_gpu_hist', 'grow_fast_histmaker')