[GPU-Plugin] Change GPU plugin to use tree_method parameter, bump cmake version to 3.5 for GPU plugin, add compute architecture 3.5, remove unused cmake files (#2455)
This commit is contained in:
@@ -1,16 +1,16 @@
|
||||
# CUDA Accelerated Tree Construction Algorithms
|
||||
This plugin adds GPU accelerated tree construction algorithms to XGBoost.
|
||||
## Usage
|
||||
Specify the 'updater' parameter as one of the following algorithms.
|
||||
Specify the 'tree_method' parameter as one of the following algorithms.
|
||||
|
||||
### Algorithms
|
||||
| updater | Description |
|
||||
| tree_method | Description |
|
||||
| --- | --- |
|
||||
grow_gpu | The standard XGBoost tree construction algorithm. Performs exact search for splits. Slower and uses considerably more memory than 'grow_gpu_hist' |
|
||||
grow_gpu_hist | Equivalent to the XGBoost fast histogram algorithm. Faster and uses considerably less memory. Splits may be less accurate. |
|
||||
gpu_exact | The standard XGBoost tree construction algorithm. Performs exact search for splits. Slower and uses considerably more memory than 'gpu_hist' |
|
||||
gpu_hist | Equivalent to the XGBoost fast histogram algorithm. Faster and uses considerably less memory. Splits may be less accurate. |
|
||||
|
||||
### Supported parameters
|
||||
| parameter | grow_gpu | grow_gpu_hist |
|
||||
| parameter | gpu_exact | gpu_hist |
|
||||
| --- | --- | --- |
|
||||
subsample | ✔ | ✔ |
|
||||
colsample_bytree | ✔ | ✔|
|
||||
@@ -29,7 +29,7 @@ Python example:
|
||||
```python
|
||||
param['gpu_id'] = 1
|
||||
param['max_bin'] = 16
|
||||
param['updater'] = 'grow_gpu_hist'
|
||||
param['tree_method'] = 'gpu_hist'
|
||||
```
|
||||
## Benchmarks
|
||||
To run benchmarks on synthetic data for binary classification:
|
||||
@@ -39,18 +39,18 @@ $ python benchmark/benchmark.py
|
||||
|
||||
Training time time on 1000000 rows x 50 columns with 500 boosting iterations on i7-6700K CPU @ 4.00GHz and Pascal Titan X.
|
||||
|
||||
| Updater | Time (s) |
|
||||
| tree_method | Time (s) |
|
||||
| --- | --- |
|
||||
| grow_gpu_hist | 11.09 |
|
||||
| grow_fast_histmaker (histogram XGBoost - CPU) | 41.75 |
|
||||
| grow_gpu | 193.90 |
|
||||
| grow_colmaker (standard XGBoost - CPU) | 720.12 |
|
||||
| gpu_hist | 11.09 |
|
||||
| hist (histogram XGBoost - CPU) | 41.75 |
|
||||
| gpu_exact | 193.90 |
|
||||
| exact (standard XGBoost - CPU) | 720.12 |
|
||||
|
||||
|
||||
[See here](http://dmlc.ml/2016/12/14/GPU-accelerated-xgboost.html) for additional performance benchmarks of the 'grow_gpu' updater.
|
||||
[See here](http://dmlc.ml/2016/12/14/GPU-accelerated-xgboost.html) for additional performance benchmarks of the 'gpu_exact' tree_method.
|
||||
|
||||
## Test
|
||||
To run tests:
|
||||
To run tests:Will
|
||||
```bash
|
||||
$ python -m nose test/python/
|
||||
```
|
||||
@@ -122,6 +122,13 @@ $ make PLUGIN_UPDATER_GPU=ON GTEST_PATH=${CACHE_PREFIX} test
|
||||
```
|
||||
|
||||
## Changelog
|
||||
##### 2017/6/26
|
||||
|
||||
* Change API to use tree_method parameter
|
||||
* Increase required cmake version to 3.5
|
||||
* Add compute arch 3.5 to default archs
|
||||
* Set default n_gpus to 1
|
||||
|
||||
##### 2017/6/5
|
||||
|
||||
* Multi-GPU support for histogram method using NVIDIA NCCL.
|
||||
|
||||
@@ -14,19 +14,18 @@ def run_benchmark(args, gpu_algorithm, cpu_algorithm):
|
||||
dtrain = xgb.DMatrix(X, y)
|
||||
|
||||
param = {'objective': 'binary:logistic',
|
||||
'tree_method': 'exact',
|
||||
'max_depth': 6,
|
||||
'silent': 1,
|
||||
'eval_metric': 'auc'}
|
||||
|
||||
param['updater'] = gpu_algorithm
|
||||
print("Training with '%s'" % param['updater'])
|
||||
param['tree_method'] = gpu_algorithm
|
||||
print("Training with '%s'" % param['tree_method'])
|
||||
tmp = time.time()
|
||||
xgb.train(param, dtrain, args.iterations)
|
||||
print ("Time: %s seconds" % (str(time.time() - tmp)))
|
||||
|
||||
param['updater'] = cpu_algorithm
|
||||
print("Training with '%s'" % param['updater'])
|
||||
param['tree_method'] = cpu_algorithm
|
||||
print("Training with '%s'" % param['tree_method'])
|
||||
tmp = time.time()
|
||||
xgb.train(param, dtrain, args.iterations)
|
||||
print ("Time: %s seconds" % (str(time.time() - tmp)))
|
||||
@@ -34,17 +33,17 @@ def run_benchmark(args, gpu_algorithm, cpu_algorithm):
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--algorithm', choices=['all', 'grow_gpu', 'grow_gpu_hist'], required=True)
|
||||
parser.add_argument('--algorithm', choices=['all', 'gpu_exact', 'gpu_hist'], default='all')
|
||||
parser.add_argument('--rows',type=int,default=1000000)
|
||||
parser.add_argument('--columns',type=int,default=50)
|
||||
parser.add_argument('--iterations',type=int,default=500)
|
||||
args = parser.parse_args()
|
||||
|
||||
if 'grow_gpu_hist' in args.algorithm:
|
||||
run_benchmark(args, args.algorithm, 'grow_fast_histmaker')
|
||||
if 'grow_gpu' in args.algorithm:
|
||||
run_benchmark(args, args.algorithm, 'grow_colmaker')
|
||||
if 'gpu_hist' in args.algorithm:
|
||||
run_benchmark(args, args.algorithm, 'hist')
|
||||
if 'gpu_exact' in args.algorithm:
|
||||
run_benchmark(args, args.algorithm, 'exact')
|
||||
if 'all' in args.algorithm:
|
||||
run_benchmark(args, 'grow_gpu', 'grow_colmaker')
|
||||
run_benchmark(args, 'grow_gpu_hist', 'grow_fast_histmaker')
|
||||
run_benchmark(args, 'gpu_exact', 'exact')
|
||||
run_benchmark(args, 'gpu_hist', 'hist')
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ class TestGPU(unittest.TestCase):
|
||||
'objective': 'binary:logistic',
|
||||
'eval_metric': 'auc'}
|
||||
ag_param2 = {'max_depth': 2,
|
||||
'updater': 'grow_gpu',
|
||||
'tree_method': 'gpu_exact',
|
||||
'eta': 1,
|
||||
'silent': 1,
|
||||
'objective': 'binary:logistic',
|
||||
@@ -59,7 +59,7 @@ class TestGPU(unittest.TestCase):
|
||||
dtest = xgb.DMatrix(X_test, y_test)
|
||||
|
||||
param = {'objective': 'binary:logistic',
|
||||
'updater': 'grow_gpu',
|
||||
'tree_method': 'gpu_exact',
|
||||
'max_depth': 3,
|
||||
'eval_metric': 'auc'}
|
||||
res = {}
|
||||
@@ -75,7 +75,7 @@ class TestGPU(unittest.TestCase):
|
||||
dtrain2 = xgb.DMatrix(X2, label=y2)
|
||||
|
||||
param = {'objective': 'binary:logistic',
|
||||
'updater': 'grow_gpu',
|
||||
'tree_method': 'gpu_exact',
|
||||
'max_depth': 2,
|
||||
'eval_metric': 'auc'}
|
||||
res = {}
|
||||
@@ -134,7 +134,7 @@ class TestGPU(unittest.TestCase):
|
||||
'objective': 'binary:logistic',
|
||||
'eval_metric': 'auc'}
|
||||
ag_param2 = {'max_depth': max_depth,
|
||||
'updater': 'grow_gpu_hist',
|
||||
'tree_method': 'gpu_hist',
|
||||
'eta': 1,
|
||||
'silent': 1,
|
||||
'n_gpus': 1,
|
||||
@@ -142,7 +142,7 @@ class TestGPU(unittest.TestCase):
|
||||
'max_bin': max_bin,
|
||||
'eval_metric': 'auc'}
|
||||
ag_param3 = {'max_depth': max_depth,
|
||||
'updater': 'grow_gpu_hist',
|
||||
'tree_method': 'gpu_hist',
|
||||
'eta': 1,
|
||||
'silent': 1,
|
||||
'n_gpus': n_gpus,
|
||||
@@ -177,7 +177,7 @@ class TestGPU(unittest.TestCase):
|
||||
dtest = xgb.DMatrix(X_test, y_test)
|
||||
|
||||
param = {'objective': 'binary:logistic',
|
||||
'updater': 'grow_gpu_hist',
|
||||
'tree_method': 'gpu_hist',
|
||||
'max_depth': max_depth,
|
||||
'n_gpus': 1,
|
||||
'max_bin': max_bin,
|
||||
@@ -189,7 +189,7 @@ class TestGPU(unittest.TestCase):
|
||||
assert self.non_decreasing(res['train']['auc'])
|
||||
#assert self.non_decreasing(res['test']['auc'])
|
||||
param2 = {'objective': 'binary:logistic',
|
||||
'updater': 'grow_gpu_hist',
|
||||
'tree_method': 'gpu_hist',
|
||||
'max_depth': max_depth,
|
||||
'n_gpus': n_gpus,
|
||||
'max_bin': max_bin,
|
||||
@@ -211,7 +211,7 @@ class TestGPU(unittest.TestCase):
|
||||
dtrain2 = xgb.DMatrix(X2, label=y2)
|
||||
|
||||
param = {'objective': 'binary:logistic',
|
||||
'updater': 'grow_gpu_hist',
|
||||
'tree_method': 'gpu_hist',
|
||||
'max_depth': max_depth,
|
||||
'n_gpus': n_gpus,
|
||||
'max_bin': max_bin,
|
||||
@@ -250,7 +250,7 @@ class TestGPU(unittest.TestCase):
|
||||
######################################################################
|
||||
# fail-safe test for max_bin
|
||||
param = {'objective': 'binary:logistic',
|
||||
'updater': 'grow_gpu_hist',
|
||||
'tree_method': 'gpu_hist',
|
||||
'max_depth': max_depth,
|
||||
'n_gpus': n_gpus,
|
||||
'eval_metric': 'auc',
|
||||
@@ -263,7 +263,7 @@ class TestGPU(unittest.TestCase):
|
||||
######################################################################
|
||||
# subsampling
|
||||
param = {'objective': 'binary:logistic',
|
||||
'updater': 'grow_gpu_hist',
|
||||
'tree_method': 'gpu_hist',
|
||||
'max_depth': max_depth,
|
||||
'n_gpus': n_gpus,
|
||||
'eval_metric': 'auc',
|
||||
@@ -279,7 +279,7 @@ class TestGPU(unittest.TestCase):
|
||||
######################################################################
|
||||
# fail-safe test for max_bin=2
|
||||
param = {'objective': 'binary:logistic',
|
||||
'updater': 'grow_gpu_hist',
|
||||
'tree_method': 'gpu_hist',
|
||||
'max_depth': 2,
|
||||
'n_gpus': n_gpus,
|
||||
'eval_metric': 'auc',
|
||||
|
||||
Reference in New Issue
Block a user