Document for device ordinal. (#9398)
- Rewrite GPU demos. notebook is converted to script to avoid committing additional png plots. - Add GPU demos into the sphinx gallery. - Add RMM demos into the sphinx gallery. - Test for firing threads with different device ordinals.
This commit is contained in:
@@ -1,5 +0,0 @@
|
||||
# GPU Acceleration Demo
|
||||
|
||||
`cover_type.py` shows how to train a model on the [forest cover type](https://archive.ics.uci.edu/ml/datasets/covertype) dataset using GPU acceleration. The forest cover type dataset has 581,012 rows and 54 features, making it time consuming to process. We compare the run-time and accuracy of the GPU and CPU histogram algorithms.
|
||||
|
||||
`shap.ipynb` demonstrates using GPU acceleration to compute SHAP values for feature importance.
|
||||
8
demo/gpu_acceleration/README.rst
Normal file
8
demo/gpu_acceleration/README.rst
Normal file
@@ -0,0 +1,8 @@
|
||||
:orphan:
|
||||
|
||||
GPU Acceleration Demo
|
||||
=====================
|
||||
|
||||
This is a collection of demonstration scripts to showcase the basic usage of GPU. Please
|
||||
see :doc:`/gpu/index` for more info. There are other demonstrations for distributed GPU
|
||||
training using dask or spark.
|
||||
@@ -1,41 +1,49 @@
|
||||
"""
|
||||
Using xgboost on GPU devices
|
||||
============================
|
||||
|
||||
Shows how to train a model on the `forest cover type
|
||||
<https://archive.ics.uci.edu/ml/datasets/covertype>`_ dataset using GPU
|
||||
acceleration. The forest cover type dataset has 581,012 rows and 54 features, making it
|
||||
time consuming to process. We compare the run-time and accuracy of the GPU and CPU
|
||||
histogram algorithms.
|
||||
|
||||
In addition, The demo showcases using GPU with other GPU-related libraries including
|
||||
cupy and cuml. These libraries are not strictly required.
|
||||
|
||||
"""
|
||||
import time
|
||||
|
||||
import cupy as cp
|
||||
from cuml.model_selection import train_test_split
|
||||
from sklearn.datasets import fetch_covtype
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
import xgboost as xgb
|
||||
|
||||
# Fetch dataset using sklearn
|
||||
cov = fetch_covtype()
|
||||
X = cov.data
|
||||
y = cov.target
|
||||
X, y = fetch_covtype(return_X_y=True)
|
||||
X = cp.array(X)
|
||||
y = cp.array(y)
|
||||
y -= y.min()
|
||||
|
||||
# Create 0.75/0.25 train/test split
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, train_size=0.75,
|
||||
random_state=42)
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.25, train_size=0.75, random_state=42
|
||||
)
|
||||
|
||||
# Specify sufficient boosting iterations to reach a minimum
|
||||
num_round = 3000
|
||||
|
||||
# Leave most parameters as default
|
||||
param = {'objective': 'multi:softmax', # Specify multiclass classification
|
||||
'num_class': 8, # Number of possible output classes
|
||||
'tree_method': 'gpu_hist' # Use GPU accelerated algorithm
|
||||
}
|
||||
|
||||
# Convert input data from numpy to XGBoost format
|
||||
dtrain = xgb.DMatrix(X_train, label=y_train)
|
||||
dtest = xgb.DMatrix(X_test, label=y_test)
|
||||
|
||||
gpu_res = {} # Store accuracy result
|
||||
tmp = time.time()
|
||||
clf = xgb.XGBClassifier(device="cuda", n_estimators=num_round)
|
||||
# Train model
|
||||
xgb.train(param, dtrain, num_round, evals=[(dtest, 'test')], evals_result=gpu_res)
|
||||
print("GPU Training Time: %s seconds" % (str(time.time() - tmp)))
|
||||
start = time.time()
|
||||
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
|
||||
gpu_res = clf.evals_result()
|
||||
print("GPU Training Time: %s seconds" % (str(time.time() - start)))
|
||||
|
||||
# Repeat for CPU algorithm
|
||||
tmp = time.time()
|
||||
param['tree_method'] = 'hist'
|
||||
cpu_res = {}
|
||||
xgb.train(param, dtrain, num_round, evals=[(dtest, 'test')], evals_result=cpu_res)
|
||||
print("CPU Training Time: %s seconds" % (str(time.time() - tmp)))
|
||||
clf = xgb.XGBClassifier(device="cpu", n_estimators=num_round)
|
||||
start = time.time()
|
||||
cpu_res = clf.evals_result()
|
||||
print("CPU Training Time: %s seconds" % (str(time.time() - start)))
|
||||
|
||||
File diff suppressed because one or more lines are too long
55
demo/gpu_acceleration/tree_shap.py
Normal file
55
demo/gpu_acceleration/tree_shap.py
Normal file
@@ -0,0 +1,55 @@
|
||||
"""
|
||||
Use GPU to speedup SHAP value computation
|
||||
=========================================
|
||||
|
||||
Demonstrates using GPU acceleration to compute SHAP values for feature importance.
|
||||
|
||||
"""
|
||||
import shap
|
||||
from sklearn.datasets import fetch_california_housing
|
||||
|
||||
import xgboost as xgb
|
||||
|
||||
# Fetch dataset using sklearn
|
||||
data = fetch_california_housing()
|
||||
print(data.DESCR)
|
||||
X = data.data
|
||||
y = data.target
|
||||
|
||||
num_round = 500
|
||||
|
||||
param = {
|
||||
"eta": 0.05,
|
||||
"max_depth": 10,
|
||||
"tree_method": "hist",
|
||||
"device": "cuda",
|
||||
}
|
||||
|
||||
# GPU accelerated training
|
||||
dtrain = xgb.DMatrix(X, label=y, feature_names=data.feature_names)
|
||||
model = xgb.train(param, dtrain, num_round)
|
||||
|
||||
# Compute shap values using GPU with xgboost
|
||||
model.set_param({"device": "cuda"})
|
||||
shap_values = model.predict(dtrain, pred_contribs=True)
|
||||
|
||||
# Compute shap interaction values using GPU
|
||||
shap_interaction_values = model.predict(dtrain, pred_interactions=True)
|
||||
|
||||
|
||||
# shap will call the GPU accelerated version as long as the device parameter is set to
|
||||
# "cuda"
|
||||
explainer = shap.TreeExplainer(model)
|
||||
shap_values = explainer.shap_values(X)
|
||||
|
||||
# visualize the first prediction's explanation
|
||||
shap.force_plot(
|
||||
explainer.expected_value,
|
||||
shap_values[0, :],
|
||||
X[0, :],
|
||||
feature_names=data.feature_names,
|
||||
matplotlib=True,
|
||||
)
|
||||
|
||||
# Show a summary of feature importance
|
||||
shap.summary_plot(shap_values, X, plot_type="bar", feature_names=data.feature_names)
|
||||
Reference in New Issue
Block a user