From 332b26df957efc58afb954cb01a6a39a9630e166 Mon Sep 17 00:00:00 2001
From: Rory Mitchell <r.a.mitchell.nz@gmail.com>
Date: Sat, 19 Aug 2017 21:27:48 +1200
Subject: [PATCH] Update GPU acceleration demo (#2617)

* Update GPU acceleration demo

* Fix parameter formatting
---
 demo/gpu_acceleration/README.md     | 37 ++------------------------
 demo/gpu_acceleration/bosch.py      | 41 -----------------------------
 demo/gpu_acceleration/cover_type.py | 40 ++++++++++++++++++++++++++++
 doc/parameter.md                    |  1 +
 4 files changed, 43 insertions(+), 76 deletions(-)
 delete mode 100644 demo/gpu_acceleration/bosch.py
 create mode 100644 demo/gpu_acceleration/cover_type.py

diff --git a/demo/gpu_acceleration/README.md b/demo/gpu_acceleration/README.md
index 43591fc67..25841d39b 100644
--- a/demo/gpu_acceleration/README.md
+++ b/demo/gpu_acceleration/README.md
@@ -1,42 +1,9 @@
 # GPU Acceleration Demo
 
-This demo shows how to perform a cross validation on the kaggle Bosch dataset with GPU acceleration. The Bosch numerical dataset has over 1 million rows and 968 features, making it time consuming to process.
+This demo shows how to train a model on the [forest cover type](https://archive.ics.uci.edu/ml/datasets/covertype) dataset using GPU acceleration. The forest cover type dataset has 581,012 rows and 54 features, making it time consuming to process. We compare the run-time and accuracy of the GPU and CPU histogram algorithms.
 
 This demo requires the [GPU plug-in](https://github.com/dmlc/xgboost/tree/master/plugin/updater_gpu) to be built and installed.
 
-The dataset is available from:
-https://www.kaggle.com/c/bosch-production-line-performance/data
+The dataset is automatically loaded via the sklearn script. 
 
-Copy train_numeric.csv into xgboost/demo/data.
-
-The subset parameter changes the proportion of rows loaded from the CSV file. Processing the entire dataset can take a long time and requires about 8GB of device memory. It is initially set to 0.4, using about 2650/3380MB on a GTX 970. Lower the parameter if your device runs out of memory.
-
-```python
-subset = 0.4
-```
-
-Parameters are set as usual except that we set silent to 0 to see how much memory is being allocated on the GPU and we change 'updater' to 'grow_gpu' to activate the GPU plugin.
-
-```python
-param['silent'] = 0
-param['updater'] = 'grow_gpu'
-```
-
-We use the sklearn cross validation function instead of the xgboost cv function as the xgboost cv will try to fit all folds in GPU memory at the same time.
-
-Using the sklearn cv we can run each fold separately to fit a very large dataset onto the GPU.
-
-Also note the line:
-```python
-del bst
-```
-
-This hints to the python garbage collection that it should delete the booster for the current fold before beginning the next. Without this line python may keep 'bst' from the previous fold in memory, using up precious GPU memory. 
-
-You can change the updater parameter to run the equivalent algorithm for the CPU:
-```python
-param['updater'] = 'grow_colmaker'
-```
-
-Expect some minor variations in accuracy between the two versions.
 
diff --git a/demo/gpu_acceleration/bosch.py b/demo/gpu_acceleration/bosch.py
deleted file mode 100644
index 441cf4432..000000000
--- a/demo/gpu_acceleration/bosch.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import numpy as np
-import pandas as pd
-import xgboost as xgb
-import time
-import random
-from sklearn.model_selection import StratifiedKFold
-
-#For sampling rows from input file
-random_seed = 9
-subset = 0.4
-
-n_rows = 1183747;
-train_rows = int(n_rows * subset)
-random.seed(random_seed)
-skip = sorted(random.sample(range(1,n_rows + 1),n_rows-train_rows))
-data = pd.read_csv("../data/train_numeric.csv", index_col=0, dtype=np.float32, skiprows=skip)
-y = data['Response'].values
-del data['Response']
-X = data.values
-
-param = {}
-param['objective'] = 'binary:logistic'
-param['eval_metric'] = 'auc'
-param['max_depth'] = 5
-param['eta'] = 0.3
-param['silent'] = 0
-param['tree_method'] = 'gpu_exact'
-
-num_round = 20
-
-skf = StratifiedKFold(n_splits=5)
-
-for i, (train, test) in enumerate(skf.split(X, y)):
-    dtrain = xgb.DMatrix(X[train], label=y[train])
-    tmp = time.time()
-    bst = xgb.train(param, dtrain, num_round)
-    boost_time = time.time() - tmp
-    res = bst.eval(xgb.DMatrix(X[test], label=y[test]))
-    print("Fold {}: {}, Boost Time {}".format(i, res, str(boost_time)))
-    del bst
-
diff --git a/demo/gpu_acceleration/cover_type.py b/demo/gpu_acceleration/cover_type.py
new file mode 100644
index 000000000..5183cd7e7
--- /dev/null
+++ b/demo/gpu_acceleration/cover_type.py
@@ -0,0 +1,40 @@
+import xgboost as xgb
+import numpy as np
+from sklearn.datasets import fetch_covtype
+from sklearn.model_selection import train_test_split
+import time
+
+# Fetch dataset using sklearn
+cov = fetch_covtype()
+X = cov.data
+y = cov.target
+
+# Create 0.75/0.25 train/test split
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, train_size=0.75,
+                                                    random_state=42)
+
+# Specify sufficient boosting iterations to reach a minimum
+num_round = 3000
+
+# Leave most parameters as default
+param = {'objective': 'multi:softmax', # Specify multiclass classification
+         'num_class': 8, # Number of possible output classes
+         'tree_method': 'gpu_hist' # Use GPU accelerated algorithm
+         }
+
+# Convert input data from numpy to XGBoost format
+dtrain = xgb.DMatrix(X_train, label=y_train)
+dtest = xgb.DMatrix(X_test, label=y_test)
+
+gpu_res = {} # Store accuracy result
+tmp = time.time()
+# Train model
+xgb.train(param, dtrain, numround, evals=[(dtest, 'test')], evals_result=gpu_res)
+print("GPU Training Time: %s seconds" % (str(time.time() - tmp)))
+
+# Repeat for CPU algorithm
+tmp = time.time()
+param['tree_method'] = 'hist'
+cpu_res = {}
+xgb.train(param, dtrain, numround, evals=[(dtest, 'test')], evals_result=cpu_res)
+print("CPU Training Time: %s seconds" % (str(time.time() - tmp)))
diff --git a/doc/parameter.md b/doc/parameter.md
index b2fcabbae..a50cecd4e 100644
--- a/doc/parameter.md
+++ b/doc/parameter.md
@@ -113,6 +113,7 @@ Parameters for Tree Booster
   - The type of predictor algorithm to use. Provides the same results but allows the use of GPU or CPU.
     - 'cpu_predictor': Multicore CPU prediction algorithm.
     - 'gpu_predictor': Prediction using GPU. Default for 'gpu_exact' and 'gpu_hist' tree method.
+
 Additional parameters for Dart Booster
 --------------------------------------
 * sample_type [default="uniform"]