[GPU-Plugin] Change GPU plugin to use tree_method parameter, bump cmake version to 3.5 for GPU plugin, add compute architecture 3.5, remove unused cmake files (#2455)

This commit is contained in:
Rory Mitchell
2017-06-29 16:19:45 +12:00
committed by GitHub
parent 88488fdbb9
commit 48f3003302
8 changed files with 168 additions and 835 deletions

View File

@@ -1,16 +1,16 @@
# CUDA Accelerated Tree Construction Algorithms
This plugin adds GPU accelerated tree construction algorithms to XGBoost.
## Usage
Specify the 'updater' parameter as one of the following algorithms.
Specify the 'tree_method' parameter as one of the following algorithms.
### Algorithms
| updater | Description |
| tree_method | Description |
| --- | --- |
grow_gpu | The standard XGBoost tree construction algorithm. Performs exact search for splits. Slower and uses considerably more memory than 'grow_gpu_hist' |
grow_gpu_hist | Equivalent to the XGBoost fast histogram algorithm. Faster and uses considerably less memory. Splits may be less accurate. |
gpu_exact | The standard XGBoost tree construction algorithm. Performs exact search for splits. Slower and uses considerably more memory than 'gpu_hist' |
gpu_hist | Equivalent to the XGBoost fast histogram algorithm. Faster and uses considerably less memory. Splits may be less accurate. |
### Supported parameters
| parameter | grow_gpu | grow_gpu_hist |
| parameter | gpu_exact | gpu_hist |
| --- | --- | --- |
subsample | ✔ | ✔ |
colsample_bytree | ✔ | ✔|
@@ -29,7 +29,7 @@ Python example:
```python
param['gpu_id'] = 1
param['max_bin'] = 16
param['updater'] = 'grow_gpu_hist'
param['tree_method'] = 'gpu_hist'
```
## Benchmarks
To run benchmarks on synthetic data for binary classification:
@@ -39,18 +39,18 @@ $ python benchmark/benchmark.py
Training time time on 1000000 rows x 50 columns with 500 boosting iterations on i7-6700K CPU @ 4.00GHz and Pascal Titan X.
| Updater | Time (s) |
| tree_method | Time (s) |
| --- | --- |
| grow_gpu_hist | 11.09 |
| grow_fast_histmaker (histogram XGBoost - CPU) | 41.75 |
| grow_gpu | 193.90 |
| grow_colmaker (standard XGBoost - CPU) | 720.12 |
| gpu_hist | 11.09 |
| hist (histogram XGBoost - CPU) | 41.75 |
| gpu_exact | 193.90 |
| exact (standard XGBoost - CPU) | 720.12 |
[See here](http://dmlc.ml/2016/12/14/GPU-accelerated-xgboost.html) for additional performance benchmarks of the 'grow_gpu' updater.
[See here](http://dmlc.ml/2016/12/14/GPU-accelerated-xgboost.html) for additional performance benchmarks of the 'gpu_exact' tree_method.
## Test
To run tests:
To run tests:Will
```bash
$ python -m nose test/python/
```
@@ -122,6 +122,13 @@ $ make PLUGIN_UPDATER_GPU=ON GTEST_PATH=${CACHE_PREFIX} test
```
## Changelog
##### 2017/6/26
* Change API to use tree_method parameter
* Increase required cmake version to 3.5
* Add compute arch 3.5 to default archs
* Set default n_gpus to 1
##### 2017/6/5
* Multi-GPU support for histogram method using NVIDIA NCCL.

View File

@@ -14,19 +14,18 @@ def run_benchmark(args, gpu_algorithm, cpu_algorithm):
dtrain = xgb.DMatrix(X, y)
param = {'objective': 'binary:logistic',
'tree_method': 'exact',
'max_depth': 6,
'silent': 1,
'eval_metric': 'auc'}
param['updater'] = gpu_algorithm
print("Training with '%s'" % param['updater'])
param['tree_method'] = gpu_algorithm
print("Training with '%s'" % param['tree_method'])
tmp = time.time()
xgb.train(param, dtrain, args.iterations)
print ("Time: %s seconds" % (str(time.time() - tmp)))
param['updater'] = cpu_algorithm
print("Training with '%s'" % param['updater'])
param['tree_method'] = cpu_algorithm
print("Training with '%s'" % param['tree_method'])
tmp = time.time()
xgb.train(param, dtrain, args.iterations)
print ("Time: %s seconds" % (str(time.time() - tmp)))
@@ -34,17 +33,17 @@ def run_benchmark(args, gpu_algorithm, cpu_algorithm):
parser = argparse.ArgumentParser()
parser.add_argument('--algorithm', choices=['all', 'grow_gpu', 'grow_gpu_hist'], required=True)
parser.add_argument('--algorithm', choices=['all', 'gpu_exact', 'gpu_hist'], default='all')
parser.add_argument('--rows',type=int,default=1000000)
parser.add_argument('--columns',type=int,default=50)
parser.add_argument('--iterations',type=int,default=500)
args = parser.parse_args()
if 'grow_gpu_hist' in args.algorithm:
run_benchmark(args, args.algorithm, 'grow_fast_histmaker')
if 'grow_gpu' in args.algorithm:
run_benchmark(args, args.algorithm, 'grow_colmaker')
if 'gpu_hist' in args.algorithm:
run_benchmark(args, args.algorithm, 'hist')
if 'gpu_exact' in args.algorithm:
run_benchmark(args, args.algorithm, 'exact')
if 'all' in args.algorithm:
run_benchmark(args, 'grow_gpu', 'grow_colmaker')
run_benchmark(args, 'grow_gpu_hist', 'grow_fast_histmaker')
run_benchmark(args, 'gpu_exact', 'exact')
run_benchmark(args, 'gpu_hist', 'hist')

View File

@@ -35,7 +35,7 @@ class TestGPU(unittest.TestCase):
'objective': 'binary:logistic',
'eval_metric': 'auc'}
ag_param2 = {'max_depth': 2,
'updater': 'grow_gpu',
'tree_method': 'gpu_exact',
'eta': 1,
'silent': 1,
'objective': 'binary:logistic',
@@ -59,7 +59,7 @@ class TestGPU(unittest.TestCase):
dtest = xgb.DMatrix(X_test, y_test)
param = {'objective': 'binary:logistic',
'updater': 'grow_gpu',
'tree_method': 'gpu_exact',
'max_depth': 3,
'eval_metric': 'auc'}
res = {}
@@ -75,7 +75,7 @@ class TestGPU(unittest.TestCase):
dtrain2 = xgb.DMatrix(X2, label=y2)
param = {'objective': 'binary:logistic',
'updater': 'grow_gpu',
'tree_method': 'gpu_exact',
'max_depth': 2,
'eval_metric': 'auc'}
res = {}
@@ -134,7 +134,7 @@ class TestGPU(unittest.TestCase):
'objective': 'binary:logistic',
'eval_metric': 'auc'}
ag_param2 = {'max_depth': max_depth,
'updater': 'grow_gpu_hist',
'tree_method': 'gpu_hist',
'eta': 1,
'silent': 1,
'n_gpus': 1,
@@ -142,7 +142,7 @@ class TestGPU(unittest.TestCase):
'max_bin': max_bin,
'eval_metric': 'auc'}
ag_param3 = {'max_depth': max_depth,
'updater': 'grow_gpu_hist',
'tree_method': 'gpu_hist',
'eta': 1,
'silent': 1,
'n_gpus': n_gpus,
@@ -177,7 +177,7 @@ class TestGPU(unittest.TestCase):
dtest = xgb.DMatrix(X_test, y_test)
param = {'objective': 'binary:logistic',
'updater': 'grow_gpu_hist',
'tree_method': 'gpu_hist',
'max_depth': max_depth,
'n_gpus': 1,
'max_bin': max_bin,
@@ -189,7 +189,7 @@ class TestGPU(unittest.TestCase):
assert self.non_decreasing(res['train']['auc'])
#assert self.non_decreasing(res['test']['auc'])
param2 = {'objective': 'binary:logistic',
'updater': 'grow_gpu_hist',
'tree_method': 'gpu_hist',
'max_depth': max_depth,
'n_gpus': n_gpus,
'max_bin': max_bin,
@@ -211,7 +211,7 @@ class TestGPU(unittest.TestCase):
dtrain2 = xgb.DMatrix(X2, label=y2)
param = {'objective': 'binary:logistic',
'updater': 'grow_gpu_hist',
'tree_method': 'gpu_hist',
'max_depth': max_depth,
'n_gpus': n_gpus,
'max_bin': max_bin,
@@ -250,7 +250,7 @@ class TestGPU(unittest.TestCase):
######################################################################
# fail-safe test for max_bin
param = {'objective': 'binary:logistic',
'updater': 'grow_gpu_hist',
'tree_method': 'gpu_hist',
'max_depth': max_depth,
'n_gpus': n_gpus,
'eval_metric': 'auc',
@@ -263,7 +263,7 @@ class TestGPU(unittest.TestCase):
######################################################################
# subsampling
param = {'objective': 'binary:logistic',
'updater': 'grow_gpu_hist',
'tree_method': 'gpu_hist',
'max_depth': max_depth,
'n_gpus': n_gpus,
'eval_metric': 'auc',
@@ -279,7 +279,7 @@ class TestGPU(unittest.TestCase):
######################################################################
# fail-safe test for max_bin=2
param = {'objective': 'binary:logistic',
'updater': 'grow_gpu_hist',
'tree_method': 'gpu_hist',
'max_depth': 2,
'n_gpus': n_gpus,
'eval_metric': 'auc',