temp merge, disable 1 line, SetValid
This commit is contained in:
4
demo/CLI/README.rst
Normal file
4
demo/CLI/README.rst
Normal file
@@ -0,0 +1,4 @@
|
||||
XGBoost Command Line Interface Walkthrough
|
||||
==========================================
|
||||
|
||||
Please note that the command line interface is deprecated in 2.1.0, use other language bindings instead. For a list of available bindings, see https://xgboost.readthedocs.io/en/stable/
|
||||
@@ -106,7 +106,7 @@ Please send pull requests if you find ones that are missing here.
|
||||
- Prarthana Bhat, 2nd place winner in [DYD Competition](https://datahack.analyticsvidhya.com/contest/date-your-data/). Link to [Solution](https://github.com/analyticsvidhya/DateYourData/blob/master/Prathna_Bhat_Model.R).
|
||||
|
||||
## Talks
|
||||
- [XGBoost: A Scalable Tree Boosting System](http://datascience.la/xgboost-workshop-and-meetup-talk-with-tianqi-chen/) (video+slides) by Tianqi Chen at the Los Angeles Data Science meetup
|
||||
- XGBoost: A Scalable Tree Boosting System ([video] (https://www.youtube.com/watch?v=Vly8xGnNiWs) + [slides](https://speakerdeck.com/datasciencela/tianqi-chen-xgboost-overview-and-latest-news-la-meetup-talk)) by Tianqi Chen at the Los Angeles Data Science meetup
|
||||
|
||||
## Tutorials
|
||||
|
||||
@@ -145,7 +145,7 @@ Send a PR to add a one sentence description:)
|
||||
## Tools using XGBoost
|
||||
|
||||
- [BayesBoost](https://github.com/mpearmain/BayesBoost) - Bayesian Optimization using xgboost and sklearn API
|
||||
- [FLAML](https://github.com/microsoft/FLAML) - An open source AutoML library
|
||||
- [FLAML](https://github.com/microsoft/FLAML) - An open source AutoML library
|
||||
designed to automatically produce accurate machine learning models with low computational cost. FLAML includes [XGBoost as one of the default learners](https://github.com/microsoft/FLAML/blob/main/flaml/model.py) and can also be used as a fast hyperparameter tuning tool for XGBoost ([code example](https://microsoft.github.io/FLAML/docs/Examples/AutoML-for-XGBoost)).
|
||||
- [gp_xgboost_gridsearch](https://github.com/vatsan/gp_xgboost_gridsearch) - In-database parallel grid-search for XGBoost on [Greenplum](https://github.com/greenplum-db/gpdb) using PL/Python
|
||||
- [tpot](https://github.com/rhiever/tpot) - A Python tool that automatically creates and optimizes machine learning pipelines using genetic programming.
|
||||
|
||||
@@ -11,33 +11,43 @@ import numpy as np
|
||||
|
||||
import xgboost as xgb
|
||||
|
||||
plt.rcParams.update({'font.size': 13})
|
||||
plt.rcParams.update({"font.size": 13})
|
||||
|
||||
|
||||
# Function to visualize censored labels
|
||||
def plot_censored_labels(X, y_lower, y_upper):
|
||||
def replace_inf(x, target_value):
|
||||
def plot_censored_labels(
|
||||
X: np.ndarray, y_lower: np.ndarray, y_upper: np.ndarray
|
||||
) -> None:
|
||||
def replace_inf(x: np.ndarray, target_value: float) -> np.ndarray:
|
||||
x[np.isinf(x)] = target_value
|
||||
return x
|
||||
plt.plot(X, y_lower, 'o', label='y_lower', color='blue')
|
||||
plt.plot(X, y_upper, 'o', label='y_upper', color='fuchsia')
|
||||
plt.vlines(X, ymin=replace_inf(y_lower, 0.01), ymax=replace_inf(y_upper, 1000),
|
||||
label='Range for y', color='gray')
|
||||
|
||||
plt.plot(X, y_lower, "o", label="y_lower", color="blue")
|
||||
plt.plot(X, y_upper, "o", label="y_upper", color="fuchsia")
|
||||
plt.vlines(
|
||||
X,
|
||||
ymin=replace_inf(y_lower, 0.01),
|
||||
ymax=replace_inf(y_upper, 1000.0),
|
||||
label="Range for y",
|
||||
color="gray",
|
||||
)
|
||||
|
||||
|
||||
# Toy data
|
||||
X = np.array([1, 2, 3, 4, 5]).reshape((-1, 1))
|
||||
INF = np.inf
|
||||
y_lower = np.array([ 10, 15, -INF, 30, 100])
|
||||
y_upper = np.array([INF, INF, 20, 50, INF])
|
||||
y_lower = np.array([10, 15, -INF, 30, 100])
|
||||
y_upper = np.array([INF, INF, 20, 50, INF])
|
||||
|
||||
# Visualize toy data
|
||||
plt.figure(figsize=(5, 4))
|
||||
plot_censored_labels(X, y_lower, y_upper)
|
||||
plt.ylim((6, 200))
|
||||
plt.legend(loc='lower right')
|
||||
plt.title('Toy data')
|
||||
plt.xlabel('Input feature')
|
||||
plt.ylabel('Label')
|
||||
plt.yscale('log')
|
||||
plt.legend(loc="lower right")
|
||||
plt.title("Toy data")
|
||||
plt.xlabel("Input feature")
|
||||
plt.ylabel("Label")
|
||||
plt.yscale("log")
|
||||
plt.tight_layout()
|
||||
plt.show(block=True)
|
||||
|
||||
@@ -46,54 +56,83 @@ grid_pts = np.linspace(0.8, 5.2, 1000).reshape((-1, 1))
|
||||
|
||||
# Train AFT model using XGBoost
|
||||
dmat = xgb.DMatrix(X)
|
||||
dmat.set_float_info('label_lower_bound', y_lower)
|
||||
dmat.set_float_info('label_upper_bound', y_upper)
|
||||
params = {'max_depth': 3, 'objective':'survival:aft', 'min_child_weight': 0}
|
||||
dmat.set_float_info("label_lower_bound", y_lower)
|
||||
dmat.set_float_info("label_upper_bound", y_upper)
|
||||
params = {"max_depth": 3, "objective": "survival:aft", "min_child_weight": 0}
|
||||
|
||||
accuracy_history = []
|
||||
def plot_intermediate_model_callback(env):
|
||||
"""Custom callback to plot intermediate models"""
|
||||
# Compute y_pred = prediction using the intermediate model, at current boosting iteration
|
||||
y_pred = env.model.predict(dmat)
|
||||
# "Accuracy" = the number of data points whose ranged label (y_lower, y_upper) includes
|
||||
# the corresponding predicted label (y_pred)
|
||||
acc = np.sum(np.logical_and(y_pred >= y_lower, y_pred <= y_upper)/len(X) * 100)
|
||||
accuracy_history.append(acc)
|
||||
|
||||
# Plot ranged labels as well as predictions by the model
|
||||
plt.subplot(5, 3, env.iteration + 1)
|
||||
plot_censored_labels(X, y_lower, y_upper)
|
||||
y_pred_grid_pts = env.model.predict(xgb.DMatrix(grid_pts))
|
||||
plt.plot(grid_pts, y_pred_grid_pts, 'r-', label='XGBoost AFT model', linewidth=4)
|
||||
plt.title('Iteration {}'.format(env.iteration), x=0.5, y=0.8)
|
||||
plt.xlim((0.8, 5.2))
|
||||
plt.ylim((1 if np.min(y_pred) < 6 else 6, 200))
|
||||
plt.yscale('log')
|
||||
|
||||
res = {}
|
||||
plt.figure(figsize=(12,13))
|
||||
bst = xgb.train(params, dmat, 15, [(dmat, 'train')], evals_result=res,
|
||||
callbacks=[plot_intermediate_model_callback])
|
||||
class PlotIntermediateModel(xgb.callback.TrainingCallback):
|
||||
"""Custom callback to plot intermediate models."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
|
||||
def after_iteration(
|
||||
self,
|
||||
model: xgb.Booster,
|
||||
epoch: int,
|
||||
evals_log: xgb.callback.TrainingCallback.EvalsLog,
|
||||
) -> bool:
|
||||
"""Run after training is finished."""
|
||||
# Compute y_pred = prediction using the intermediate model, at current boosting
|
||||
# iteration
|
||||
y_pred = model.predict(dmat)
|
||||
# "Accuracy" = the number of data points whose ranged label (y_lower, y_upper)
|
||||
# includes the corresponding predicted label (y_pred)
|
||||
acc = np.sum(
|
||||
np.logical_and(y_pred >= y_lower, y_pred <= y_upper) / len(X) * 100
|
||||
)
|
||||
accuracy_history.append(acc)
|
||||
|
||||
# Plot ranged labels as well as predictions by the model
|
||||
plt.subplot(5, 3, epoch + 1)
|
||||
plot_censored_labels(X, y_lower, y_upper)
|
||||
y_pred_grid_pts = model.predict(xgb.DMatrix(grid_pts))
|
||||
plt.plot(
|
||||
grid_pts, y_pred_grid_pts, "r-", label="XGBoost AFT model", linewidth=4
|
||||
)
|
||||
plt.title("Iteration {}".format(epoch), x=0.5, y=0.8)
|
||||
plt.xlim((0.8, 5.2))
|
||||
plt.ylim((1 if np.min(y_pred) < 6 else 6, 200))
|
||||
plt.yscale("log")
|
||||
return False
|
||||
|
||||
|
||||
res: xgb.callback.TrainingCallback.EvalsLog = {}
|
||||
plt.figure(figsize=(12, 13))
|
||||
bst = xgb.train(
|
||||
params,
|
||||
dmat,
|
||||
15,
|
||||
[(dmat, "train")],
|
||||
evals_result=res,
|
||||
callbacks=[PlotIntermediateModel()],
|
||||
)
|
||||
plt.tight_layout()
|
||||
plt.legend(loc='lower center', ncol=4,
|
||||
bbox_to_anchor=(0.5, 0),
|
||||
bbox_transform=plt.gcf().transFigure)
|
||||
plt.legend(
|
||||
loc="lower center",
|
||||
ncol=4,
|
||||
bbox_to_anchor=(0.5, 0),
|
||||
bbox_transform=plt.gcf().transFigure,
|
||||
)
|
||||
plt.tight_layout()
|
||||
|
||||
# Plot negative log likelihood over boosting iterations
|
||||
plt.figure(figsize=(8,3))
|
||||
plt.figure(figsize=(8, 3))
|
||||
plt.subplot(1, 2, 1)
|
||||
plt.plot(res['train']['aft-nloglik'], 'b-o', label='aft-nloglik')
|
||||
plt.xlabel('# Boosting Iterations')
|
||||
plt.legend(loc='best')
|
||||
plt.plot(res["train"]["aft-nloglik"], "b-o", label="aft-nloglik")
|
||||
plt.xlabel("# Boosting Iterations")
|
||||
plt.legend(loc="best")
|
||||
|
||||
# Plot "accuracy" over boosting iterations
|
||||
# "Accuracy" = the number of data points whose ranged label (y_lower, y_upper) includes
|
||||
# the corresponding predicted label (y_pred)
|
||||
plt.subplot(1, 2, 2)
|
||||
plt.plot(accuracy_history, 'r-o', label='Accuracy (%)')
|
||||
plt.xlabel('# Boosting Iterations')
|
||||
plt.legend(loc='best')
|
||||
plt.plot(accuracy_history, "r-o", label="Accuracy (%)")
|
||||
plt.xlabel("# Boosting Iterations")
|
||||
plt.legend(loc="best")
|
||||
plt.tight_layout()
|
||||
|
||||
plt.show()
|
||||
|
||||
@@ -53,15 +53,7 @@ int main() {
|
||||
// configure the training
|
||||
// available parameters are described here:
|
||||
// https://xgboost.readthedocs.io/en/latest/parameter.html
|
||||
safe_xgboost(XGBoosterSetParam(booster, "tree_method", use_gpu ? "gpu_hist" : "hist"));
|
||||
if (use_gpu) {
|
||||
// set the GPU to use;
|
||||
// this is not necessary, but provided here as an illustration
|
||||
safe_xgboost(XGBoosterSetParam(booster, "gpu_id", "0"));
|
||||
} else {
|
||||
// avoid evaluating objective and metric on a GPU
|
||||
safe_xgboost(XGBoosterSetParam(booster, "gpu_id", "-1"));
|
||||
}
|
||||
safe_xgboost(XGBoosterSetParam(booster, "device", use_gpu ? "cuda" : "cpu"));
|
||||
|
||||
safe_xgboost(XGBoosterSetParam(booster, "objective", "binary:logistic"));
|
||||
safe_xgboost(XGBoosterSetParam(booster, "min_child_weight", "1"));
|
||||
|
||||
@@ -18,43 +18,45 @@ def main(client):
|
||||
# The Veterans' Administration Lung Cancer Trial
|
||||
# The Statistical Analysis of Failure Time Data by Kalbfleisch J. and Prentice R (1980)
|
||||
CURRENT_DIR = os.path.dirname(__file__)
|
||||
df = dd.read_csv(os.path.join(CURRENT_DIR, os.pardir, 'data', 'veterans_lung_cancer.csv'))
|
||||
df = dd.read_csv(
|
||||
os.path.join(CURRENT_DIR, os.pardir, "data", "veterans_lung_cancer.csv")
|
||||
)
|
||||
|
||||
# DaskDMatrix acts like normal DMatrix, works as a proxy for local
|
||||
# DMatrix scatter around workers.
|
||||
# For AFT survival, you'd need to extract the lower and upper bounds for the label
|
||||
# and pass them as arguments to DaskDMatrix.
|
||||
y_lower_bound = df['Survival_label_lower_bound']
|
||||
y_upper_bound = df['Survival_label_upper_bound']
|
||||
X = df.drop(['Survival_label_lower_bound',
|
||||
'Survival_label_upper_bound'], axis=1)
|
||||
dtrain = DaskDMatrix(client, X, label_lower_bound=y_lower_bound,
|
||||
label_upper_bound=y_upper_bound)
|
||||
y_lower_bound = df["Survival_label_lower_bound"]
|
||||
y_upper_bound = df["Survival_label_upper_bound"]
|
||||
X = df.drop(["Survival_label_lower_bound", "Survival_label_upper_bound"], axis=1)
|
||||
dtrain = DaskDMatrix(
|
||||
client, X, label_lower_bound=y_lower_bound, label_upper_bound=y_upper_bound
|
||||
)
|
||||
|
||||
# Use train method from xgboost.dask instead of xgboost. This
|
||||
# distributed version of train returns a dictionary containing the
|
||||
# resulting booster and evaluation history obtained from
|
||||
# evaluation metrics.
|
||||
params = {'verbosity': 1,
|
||||
'objective': 'survival:aft',
|
||||
'eval_metric': 'aft-nloglik',
|
||||
'learning_rate': 0.05,
|
||||
'aft_loss_distribution_scale': 1.20,
|
||||
'aft_loss_distribution': 'normal',
|
||||
'max_depth': 6,
|
||||
'lambda': 0.01,
|
||||
'alpha': 0.02}
|
||||
output = xgb.dask.train(client,
|
||||
params,
|
||||
dtrain,
|
||||
num_boost_round=100,
|
||||
evals=[(dtrain, 'train')])
|
||||
bst = output['booster']
|
||||
history = output['history']
|
||||
params = {
|
||||
"verbosity": 1,
|
||||
"objective": "survival:aft",
|
||||
"eval_metric": "aft-nloglik",
|
||||
"learning_rate": 0.05,
|
||||
"aft_loss_distribution_scale": 1.20,
|
||||
"aft_loss_distribution": "normal",
|
||||
"max_depth": 6,
|
||||
"lambda": 0.01,
|
||||
"alpha": 0.02,
|
||||
}
|
||||
output = xgb.dask.train(
|
||||
client, params, dtrain, num_boost_round=100, evals=[(dtrain, "train")]
|
||||
)
|
||||
bst = output["booster"]
|
||||
history = output["history"]
|
||||
|
||||
# you can pass output directly into `predict` too.
|
||||
prediction = xgb.dask.predict(client, bst, dtrain)
|
||||
print('Evaluation history: ', history)
|
||||
print("Evaluation history: ", history)
|
||||
|
||||
# Uncomment the following line to save the model to the disk
|
||||
# bst.save_model('survival_model.json')
|
||||
@@ -62,7 +64,7 @@ def main(client):
|
||||
return prediction
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
# or use other clusters for scaling
|
||||
with LocalCluster(n_workers=7, threads_per_worker=4) as cluster:
|
||||
with Client(cluster) as client:
|
||||
|
||||
@@ -15,7 +15,7 @@ def main(client):
|
||||
m = 100000
|
||||
n = 100
|
||||
X = da.random.random(size=(m, n), chunks=100)
|
||||
y = da.random.random(size=(m, ), chunks=100)
|
||||
y = da.random.random(size=(m,), chunks=100)
|
||||
|
||||
# DaskDMatrix acts like normal DMatrix, works as a proxy for local
|
||||
# DMatrix scatter around workers.
|
||||
@@ -25,21 +25,23 @@ def main(client):
|
||||
# distributed version of train returns a dictionary containing the
|
||||
# resulting booster and evaluation history obtained from
|
||||
# evaluation metrics.
|
||||
output = xgb.dask.train(client,
|
||||
{'verbosity': 1,
|
||||
'tree_method': 'hist'},
|
||||
dtrain,
|
||||
num_boost_round=4, evals=[(dtrain, 'train')])
|
||||
bst = output['booster']
|
||||
history = output['history']
|
||||
output = xgb.dask.train(
|
||||
client,
|
||||
{"verbosity": 1, "tree_method": "hist"},
|
||||
dtrain,
|
||||
num_boost_round=4,
|
||||
evals=[(dtrain, "train")],
|
||||
)
|
||||
bst = output["booster"]
|
||||
history = output["history"]
|
||||
|
||||
# you can pass output directly into `predict` too.
|
||||
prediction = xgb.dask.predict(client, bst, dtrain)
|
||||
print('Evaluation history:', history)
|
||||
print("Evaluation history:", history)
|
||||
return prediction
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
# or use other clusters for scaling
|
||||
with LocalCluster(n_workers=7, threads_per_worker=4) as cluster:
|
||||
with Client(cluster) as client:
|
||||
|
||||
@@ -13,33 +13,38 @@ from xgboost import dask as dxgb
|
||||
from xgboost.dask import DaskDMatrix
|
||||
|
||||
|
||||
def using_dask_matrix(client: Client, X, y):
|
||||
# DaskDMatrix acts like normal DMatrix, works as a proxy for local
|
||||
# DMatrix scatter around workers.
|
||||
def using_dask_matrix(client: Client, X: da.Array, y: da.Array) -> da.Array:
|
||||
# DaskDMatrix acts like normal DMatrix, works as a proxy for local DMatrix scatter
|
||||
# around workers.
|
||||
dtrain = DaskDMatrix(client, X, y)
|
||||
|
||||
# Use train method from xgboost.dask instead of xgboost. This
|
||||
# distributed version of train returns a dictionary containing the
|
||||
# resulting booster and evaluation history obtained from
|
||||
# evaluation metrics.
|
||||
output = xgb.dask.train(client,
|
||||
{'verbosity': 2,
|
||||
# Golden line for GPU training
|
||||
'tree_method': 'gpu_hist'},
|
||||
dtrain,
|
||||
num_boost_round=4, evals=[(dtrain, 'train')])
|
||||
bst = output['booster']
|
||||
history = output['history']
|
||||
# Use train method from xgboost.dask instead of xgboost. This distributed version
|
||||
# of train returns a dictionary containing the resulting booster and evaluation
|
||||
# history obtained from evaluation metrics.
|
||||
output = xgb.dask.train(
|
||||
client,
|
||||
{
|
||||
"verbosity": 2,
|
||||
"tree_method": "hist",
|
||||
# Golden line for GPU training
|
||||
"device": "cuda",
|
||||
},
|
||||
dtrain,
|
||||
num_boost_round=4,
|
||||
evals=[(dtrain, "train")],
|
||||
)
|
||||
bst = output["booster"]
|
||||
history = output["history"]
|
||||
|
||||
# you can pass output directly into `predict` too.
|
||||
prediction = xgb.dask.predict(client, bst, dtrain)
|
||||
print('Evaluation history:', history)
|
||||
print("Evaluation history:", history)
|
||||
return prediction
|
||||
|
||||
|
||||
def using_quantile_device_dmatrix(client: Client, X, y):
|
||||
"""`DaskQuantileDMatrix` is a data type specialized for `gpu_hist` and `hist` tree
|
||||
methods for reducing memory usage.
|
||||
def using_quantile_device_dmatrix(client: Client, X: da.Array, y: da.Array) -> da.Array:
|
||||
"""`DaskQuantileDMatrix` is a data type specialized for `hist` tree methods for
|
||||
reducing memory usage.
|
||||
|
||||
.. versionadded:: 1.2.0
|
||||
|
||||
@@ -52,26 +57,28 @@ def using_quantile_device_dmatrix(client: Client, X, y):
|
||||
# the `ref` argument of `DaskQuantileDMatrix`.
|
||||
dtrain = dxgb.DaskQuantileDMatrix(client, X, y)
|
||||
output = xgb.dask.train(
|
||||
client, {"verbosity": 2, "tree_method": "gpu_hist"}, dtrain, num_boost_round=4
|
||||
client,
|
||||
{"verbosity": 2, "tree_method": "hist", "device": "cuda"},
|
||||
dtrain,
|
||||
num_boost_round=4,
|
||||
)
|
||||
|
||||
prediction = xgb.dask.predict(client, output, X)
|
||||
return prediction
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
# `LocalCUDACluster` is used for assigning GPU to XGBoost processes. Here
|
||||
# `n_workers` represents the number of GPUs since we use one GPU per worker
|
||||
# process.
|
||||
# `n_workers` represents the number of GPUs since we use one GPU per worker process.
|
||||
with LocalCUDACluster(n_workers=2, threads_per_worker=4) as cluster:
|
||||
with Client(cluster) as client:
|
||||
# generate some random data for demonstration
|
||||
m = 100000
|
||||
n = 100
|
||||
X = da.random.random(size=(m, n), chunks=10000)
|
||||
y = da.random.random(size=(m, ), chunks=10000)
|
||||
y = da.random.random(size=(m,), chunks=10000)
|
||||
|
||||
print('Using DaskQuantileDMatrix')
|
||||
print("Using DaskQuantileDMatrix")
|
||||
from_ddqdm = using_quantile_device_dmatrix(client, X, y)
|
||||
print('Using DMatrix')
|
||||
print("Using DMatrix")
|
||||
from_dmatrix = using_dask_matrix(client, X, y)
|
||||
|
||||
@@ -21,7 +21,8 @@ def main(client):
|
||||
y = da.random.random(m, partition_size)
|
||||
|
||||
regressor = xgboost.dask.DaskXGBRegressor(verbosity=1)
|
||||
regressor.set_params(tree_method='gpu_hist')
|
||||
# set the device to CUDA
|
||||
regressor.set_params(tree_method="hist", device="cuda")
|
||||
# assigning client here is optional
|
||||
regressor.client = client
|
||||
|
||||
@@ -31,13 +32,13 @@ def main(client):
|
||||
bst = regressor.get_booster()
|
||||
history = regressor.evals_result()
|
||||
|
||||
print('Evaluation history:', history)
|
||||
print("Evaluation history:", history)
|
||||
# returned prediction is always a dask array.
|
||||
assert isinstance(prediction, da.Array)
|
||||
return bst # returning the trained model
|
||||
return bst # returning the trained model
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
# With dask cuda, one can scale up XGBoost to arbitrary GPU clusters.
|
||||
# `LocalCUDACluster` used here is only for demonstration purpose.
|
||||
with LocalCUDACluster() as cluster:
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
# GPU Acceleration Demo
|
||||
|
||||
`cover_type.py` shows how to train a model on the [forest cover type](https://archive.ics.uci.edu/ml/datasets/covertype) dataset using GPU acceleration. The forest cover type dataset has 581,012 rows and 54 features, making it time consuming to process. We compare the run-time and accuracy of the GPU and CPU histogram algorithms.
|
||||
|
||||
`shap.ipynb` demonstrates using GPU acceleration to compute SHAP values for feature importance.
|
||||
8
demo/gpu_acceleration/README.rst
Normal file
8
demo/gpu_acceleration/README.rst
Normal file
@@ -0,0 +1,8 @@
|
||||
:orphan:
|
||||
|
||||
GPU Acceleration Demo
|
||||
=====================
|
||||
|
||||
This is a collection of demonstration scripts to showcase the basic usage of GPU. Please
|
||||
see :doc:`/gpu/index` for more info. There are other demonstrations for distributed GPU
|
||||
training using dask or spark.
|
||||
@@ -1,41 +1,49 @@
|
||||
"""
|
||||
Using xgboost on GPU devices
|
||||
============================
|
||||
|
||||
Shows how to train a model on the `forest cover type
|
||||
<https://archive.ics.uci.edu/ml/datasets/covertype>`_ dataset using GPU
|
||||
acceleration. The forest cover type dataset has 581,012 rows and 54 features, making it
|
||||
time consuming to process. We compare the run-time and accuracy of the GPU and CPU
|
||||
histogram algorithms.
|
||||
|
||||
In addition, The demo showcases using GPU with other GPU-related libraries including
|
||||
cupy and cuml. These libraries are not strictly required.
|
||||
|
||||
"""
|
||||
import time
|
||||
|
||||
import cupy as cp
|
||||
from cuml.model_selection import train_test_split
|
||||
from sklearn.datasets import fetch_covtype
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
import xgboost as xgb
|
||||
|
||||
# Fetch dataset using sklearn
|
||||
cov = fetch_covtype()
|
||||
X = cov.data
|
||||
y = cov.target
|
||||
X, y = fetch_covtype(return_X_y=True)
|
||||
X = cp.array(X)
|
||||
y = cp.array(y)
|
||||
y -= y.min()
|
||||
|
||||
# Create 0.75/0.25 train/test split
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, train_size=0.75,
|
||||
random_state=42)
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.25, train_size=0.75, random_state=42
|
||||
)
|
||||
|
||||
# Specify sufficient boosting iterations to reach a minimum
|
||||
num_round = 3000
|
||||
|
||||
# Leave most parameters as default
|
||||
param = {'objective': 'multi:softmax', # Specify multiclass classification
|
||||
'num_class': 8, # Number of possible output classes
|
||||
'tree_method': 'gpu_hist' # Use GPU accelerated algorithm
|
||||
}
|
||||
|
||||
# Convert input data from numpy to XGBoost format
|
||||
dtrain = xgb.DMatrix(X_train, label=y_train)
|
||||
dtest = xgb.DMatrix(X_test, label=y_test)
|
||||
|
||||
gpu_res = {} # Store accuracy result
|
||||
tmp = time.time()
|
||||
clf = xgb.XGBClassifier(device="cuda", n_estimators=num_round)
|
||||
# Train model
|
||||
xgb.train(param, dtrain, num_round, evals=[(dtest, 'test')], evals_result=gpu_res)
|
||||
print("GPU Training Time: %s seconds" % (str(time.time() - tmp)))
|
||||
start = time.time()
|
||||
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
|
||||
gpu_res = clf.evals_result()
|
||||
print("GPU Training Time: %s seconds" % (str(time.time() - start)))
|
||||
|
||||
# Repeat for CPU algorithm
|
||||
tmp = time.time()
|
||||
param['tree_method'] = 'hist'
|
||||
cpu_res = {}
|
||||
xgb.train(param, dtrain, num_round, evals=[(dtest, 'test')], evals_result=cpu_res)
|
||||
print("CPU Training Time: %s seconds" % (str(time.time() - tmp)))
|
||||
clf = xgb.XGBClassifier(device="cpu", n_estimators=num_round)
|
||||
start = time.time()
|
||||
cpu_res = clf.evals_result()
|
||||
print("CPU Training Time: %s seconds" % (str(time.time() - start)))
|
||||
|
||||
File diff suppressed because one or more lines are too long
55
demo/gpu_acceleration/tree_shap.py
Normal file
55
demo/gpu_acceleration/tree_shap.py
Normal file
@@ -0,0 +1,55 @@
|
||||
"""
|
||||
Use GPU to speedup SHAP value computation
|
||||
=========================================
|
||||
|
||||
Demonstrates using GPU acceleration to compute SHAP values for feature importance.
|
||||
|
||||
"""
|
||||
import shap
|
||||
from sklearn.datasets import fetch_california_housing
|
||||
|
||||
import xgboost as xgb
|
||||
|
||||
# Fetch dataset using sklearn
|
||||
data = fetch_california_housing()
|
||||
print(data.DESCR)
|
||||
X = data.data
|
||||
y = data.target
|
||||
|
||||
num_round = 500
|
||||
|
||||
param = {
|
||||
"eta": 0.05,
|
||||
"max_depth": 10,
|
||||
"tree_method": "hist",
|
||||
"device": "cuda",
|
||||
}
|
||||
|
||||
# GPU accelerated training
|
||||
dtrain = xgb.DMatrix(X, label=y, feature_names=data.feature_names)
|
||||
model = xgb.train(param, dtrain, num_round)
|
||||
|
||||
# Compute shap values using GPU with xgboost
|
||||
model.set_param({"device": "cuda"})
|
||||
shap_values = model.predict(dtrain, pred_contribs=True)
|
||||
|
||||
# Compute shap interaction values using GPU
|
||||
shap_interaction_values = model.predict(dtrain, pred_interactions=True)
|
||||
|
||||
|
||||
# shap will call the GPU accelerated version as long as the device parameter is set to
|
||||
# "cuda"
|
||||
explainer = shap.TreeExplainer(model)
|
||||
shap_values = explainer.shap_values(X)
|
||||
|
||||
# visualize the first prediction's explanation
|
||||
shap.force_plot(
|
||||
explainer.expected_value,
|
||||
shap_values[0, :],
|
||||
X[0, :],
|
||||
feature_names=data.feature_names,
|
||||
matplotlib=True,
|
||||
)
|
||||
|
||||
# Show a summary of feature importance
|
||||
shap.summary_plot(shap_values, X, plot_type="bar", feature_names=data.feature_names)
|
||||
@@ -1,9 +1,9 @@
|
||||
'''
|
||||
"""
|
||||
Demo for using and defining callback functions
|
||||
==============================================
|
||||
|
||||
.. versionadded:: 1.3.0
|
||||
'''
|
||||
"""
|
||||
import argparse
|
||||
import os
|
||||
import tempfile
|
||||
@@ -17,10 +17,11 @@ import xgboost as xgb
|
||||
|
||||
|
||||
class Plotting(xgb.callback.TrainingCallback):
|
||||
'''Plot evaluation result during training. Only for demonstration purpose as it's quite
|
||||
"""Plot evaluation result during training. Only for demonstration purpose as it's quite
|
||||
slow to draw.
|
||||
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, rounds):
|
||||
self.fig = plt.figure()
|
||||
self.ax = self.fig.add_subplot(111)
|
||||
@@ -31,16 +32,16 @@ class Plotting(xgb.callback.TrainingCallback):
|
||||
plt.ion()
|
||||
|
||||
def _get_key(self, data, metric):
|
||||
return f'{data}-{metric}'
|
||||
return f"{data}-{metric}"
|
||||
|
||||
def after_iteration(self, model, epoch, evals_log):
|
||||
'''Update the plot.'''
|
||||
"""Update the plot."""
|
||||
if not self.lines:
|
||||
for data, metric in evals_log.items():
|
||||
for metric_name, log in metric.items():
|
||||
key = self._get_key(data, metric_name)
|
||||
expanded = log + [0] * (self.rounds - len(log))
|
||||
self.lines[key], = self.ax.plot(self.x, expanded, label=key)
|
||||
(self.lines[key],) = self.ax.plot(self.x, expanded, label=key)
|
||||
self.ax.legend()
|
||||
else:
|
||||
# https://pythonspot.com/matplotlib-update-plot/
|
||||
@@ -55,8 +56,8 @@ class Plotting(xgb.callback.TrainingCallback):
|
||||
|
||||
|
||||
def custom_callback():
|
||||
'''Demo for defining a custom callback function that plots evaluation result during
|
||||
training.'''
|
||||
"""Demo for defining a custom callback function that plots evaluation result during
|
||||
training."""
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
X_train, X_valid, y_train, y_valid = train_test_split(X, y, random_state=0)
|
||||
|
||||
@@ -69,14 +70,16 @@ def custom_callback():
|
||||
# Pass it to the `callbacks` parameter as a list.
|
||||
xgb.train(
|
||||
{
|
||||
'objective': 'binary:logistic',
|
||||
'eval_metric': ['error', 'rmse'],
|
||||
'tree_method': 'gpu_hist'
|
||||
"objective": "binary:logistic",
|
||||
"eval_metric": ["error", "rmse"],
|
||||
"tree_method": "hist",
|
||||
"device": "cuda",
|
||||
},
|
||||
D_train,
|
||||
evals=[(D_train, 'Train'), (D_valid, 'Valid')],
|
||||
evals=[(D_train, "Train"), (D_valid, "Valid")],
|
||||
num_boost_round=num_boost_round,
|
||||
callbacks=[plotting])
|
||||
callbacks=[plotting],
|
||||
)
|
||||
|
||||
|
||||
def check_point_callback():
|
||||
@@ -89,10 +92,10 @@ def check_point_callback():
|
||||
if i == 0:
|
||||
continue
|
||||
if as_pickle:
|
||||
path = os.path.join(tmpdir, 'model_' + str(i) + '.pkl')
|
||||
path = os.path.join(tmpdir, "model_" + str(i) + ".pkl")
|
||||
else:
|
||||
path = os.path.join(tmpdir, 'model_' + str(i) + '.json')
|
||||
assert(os.path.exists(path))
|
||||
path = os.path.join(tmpdir, "model_" + str(i) + ".json")
|
||||
assert os.path.exists(path)
|
||||
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
m = xgb.DMatrix(X, y)
|
||||
@@ -100,31 +103,36 @@ def check_point_callback():
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
# Use callback class from xgboost.callback
|
||||
# Feel free to subclass/customize it to suit your need.
|
||||
check_point = xgb.callback.TrainingCheckPoint(directory=tmpdir,
|
||||
iterations=rounds,
|
||||
name='model')
|
||||
xgb.train({'objective': 'binary:logistic'}, m,
|
||||
num_boost_round=10,
|
||||
verbose_eval=False,
|
||||
callbacks=[check_point])
|
||||
check_point = xgb.callback.TrainingCheckPoint(
|
||||
directory=tmpdir, iterations=rounds, name="model"
|
||||
)
|
||||
xgb.train(
|
||||
{"objective": "binary:logistic"},
|
||||
m,
|
||||
num_boost_round=10,
|
||||
verbose_eval=False,
|
||||
callbacks=[check_point],
|
||||
)
|
||||
check(False)
|
||||
|
||||
# This version of checkpoint saves everything including parameters and
|
||||
# model. See: doc/tutorials/saving_model.rst
|
||||
check_point = xgb.callback.TrainingCheckPoint(directory=tmpdir,
|
||||
iterations=rounds,
|
||||
as_pickle=True,
|
||||
name='model')
|
||||
xgb.train({'objective': 'binary:logistic'}, m,
|
||||
num_boost_round=10,
|
||||
verbose_eval=False,
|
||||
callbacks=[check_point])
|
||||
check_point = xgb.callback.TrainingCheckPoint(
|
||||
directory=tmpdir, iterations=rounds, as_pickle=True, name="model"
|
||||
)
|
||||
xgb.train(
|
||||
{"objective": "binary:logistic"},
|
||||
m,
|
||||
num_boost_round=10,
|
||||
verbose_eval=False,
|
||||
callbacks=[check_point],
|
||||
)
|
||||
check(True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--plot', default=1, type=int)
|
||||
parser.add_argument("--plot", default=1, type=int)
|
||||
args = parser.parse_args()
|
||||
|
||||
check_point_callback()
|
||||
|
||||
@@ -63,7 +63,8 @@ def load_cat_in_the_dat() -> tuple[pd.DataFrame, pd.Series]:
|
||||
|
||||
|
||||
params = {
|
||||
"tree_method": "gpu_hist",
|
||||
"tree_method": "hist",
|
||||
"device": "cuda",
|
||||
"n_estimators": 32,
|
||||
"colsample_bylevel": 0.7,
|
||||
}
|
||||
|
||||
@@ -58,13 +58,13 @@ def main() -> None:
|
||||
# Specify `enable_categorical` to True, also we use onehot encoding based split
|
||||
# here for demonstration. For details see the document of `max_cat_to_onehot`.
|
||||
reg = xgb.XGBRegressor(
|
||||
tree_method="gpu_hist", enable_categorical=True, max_cat_to_onehot=5
|
||||
tree_method="hist", enable_categorical=True, max_cat_to_onehot=5, device="cuda"
|
||||
)
|
||||
reg.fit(X, y, eval_set=[(X, y)])
|
||||
|
||||
# Pass in already encoded data
|
||||
X_enc, y_enc = make_categorical(100, 10, 4, True)
|
||||
reg_enc = xgb.XGBRegressor(tree_method="gpu_hist")
|
||||
reg_enc = xgb.XGBRegressor(tree_method="hist", device="cuda")
|
||||
reg_enc.fit(X_enc, y_enc, eval_set=[(X_enc, y_enc)])
|
||||
|
||||
reg_results = np.array(reg.evals_result()["validation_0"]["rmse"])
|
||||
|
||||
@@ -76,9 +76,7 @@ def softprob_obj(predt: np.ndarray, data: xgb.DMatrix):
|
||||
grad[r, c] = g
|
||||
hess[r, c] = h
|
||||
|
||||
# Right now (XGBoost 1.0.0), reshaping is necessary
|
||||
grad = grad.reshape((kRows * kClasses, 1))
|
||||
hess = hess.reshape((kRows * kClasses, 1))
|
||||
# After 2.1.0, pass the gradient as it is.
|
||||
return grad, hess
|
||||
|
||||
|
||||
|
||||
@@ -22,7 +22,10 @@ import xgboost
|
||||
|
||||
|
||||
def make_batches(
|
||||
n_samples_per_batch: int, n_features: int, n_batches: int, tmpdir: str,
|
||||
n_samples_per_batch: int,
|
||||
n_features: int,
|
||||
n_batches: int,
|
||||
tmpdir: str,
|
||||
) -> List[Tuple[str, str]]:
|
||||
files: List[Tuple[str, str]] = []
|
||||
rng = np.random.RandomState(1994)
|
||||
@@ -38,6 +41,7 @@ def make_batches(
|
||||
|
||||
class Iterator(xgboost.DataIter):
|
||||
"""A custom iterator for loading files in batches."""
|
||||
|
||||
def __init__(self, file_paths: List[Tuple[str, str]]):
|
||||
self._file_paths = file_paths
|
||||
self._it = 0
|
||||
@@ -82,10 +86,11 @@ def main(tmpdir: str) -> xgboost.Booster:
|
||||
missing = np.NaN
|
||||
Xy = xgboost.DMatrix(it, missing=missing, enable_categorical=False)
|
||||
|
||||
# Other tree methods including ``hist`` and ``gpu_hist`` also work, see tutorial in
|
||||
# ``approx`` is also supported, but less efficient due to sketching. GPU behaves
|
||||
# differently than CPU tree methods as it uses a hybrid approach. See tutorial in
|
||||
# doc for details.
|
||||
booster = xgboost.train(
|
||||
{"tree_method": "approx", "max_depth": 2},
|
||||
{"tree_method": "hist", "max_depth": 4},
|
||||
Xy,
|
||||
evals=[(Xy, "Train")],
|
||||
num_boost_round=10,
|
||||
|
||||
@@ -104,7 +104,8 @@ def ranking_demo(args: argparse.Namespace) -> None:
|
||||
qid_test = qid_test[sorted_idx]
|
||||
|
||||
ranker = xgb.XGBRanker(
|
||||
tree_method="gpu_hist",
|
||||
tree_method="hist",
|
||||
device="cuda",
|
||||
lambdarank_pair_method="topk",
|
||||
lambdarank_num_pair_per_sample=13,
|
||||
eval_metric=["ndcg@1", "ndcg@8"],
|
||||
@@ -161,7 +162,8 @@ def click_data_demo(args: argparse.Namespace) -> None:
|
||||
|
||||
ranker = xgb.XGBRanker(
|
||||
n_estimators=512,
|
||||
tree_method="gpu_hist",
|
||||
tree_method="hist",
|
||||
device="cuda",
|
||||
learning_rate=0.01,
|
||||
reg_lambda=1.5,
|
||||
subsample=0.8,
|
||||
|
||||
@@ -68,22 +68,21 @@ def rmse_model(plot_result: bool, strategy: str) -> None:
|
||||
def custom_rmse_model(plot_result: bool, strategy: str) -> None:
|
||||
"""Train using Python implementation of Squared Error."""
|
||||
|
||||
# As the experimental support status, custom objective doesn't support matrix as
|
||||
# gradient and hessian, which will be changed in future release.
|
||||
def gradient(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:
|
||||
"""Compute the gradient squared error."""
|
||||
y = dtrain.get_label().reshape(predt.shape)
|
||||
return (predt - y).reshape(y.size)
|
||||
return predt - y
|
||||
|
||||
def hessian(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:
|
||||
"""Compute the hessian for squared error."""
|
||||
return np.ones(predt.shape).reshape(predt.size)
|
||||
return np.ones(predt.shape)
|
||||
|
||||
def squared_log(
|
||||
predt: np.ndarray, dtrain: xgb.DMatrix
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
grad = gradient(predt, dtrain)
|
||||
hess = hessian(predt, dtrain)
|
||||
# both numpy.ndarray and cupy.ndarray works.
|
||||
return grad, hess
|
||||
|
||||
def rmse(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:
|
||||
|
||||
@@ -23,22 +23,23 @@ import numpy
|
||||
import xgboost
|
||||
|
||||
COLS = 64
|
||||
ROWS_PER_BATCH = 1000 # data is splited by rows
|
||||
ROWS_PER_BATCH = 1000 # data is splited by rows
|
||||
BATCHES = 32
|
||||
|
||||
|
||||
class IterForDMatrixDemo(xgboost.core.DataIter):
|
||||
'''A data iterator for XGBoost DMatrix.
|
||||
"""A data iterator for XGBoost DMatrix.
|
||||
|
||||
`reset` and `next` are required for any data iterator, other functions here
|
||||
are utilites for demonstration's purpose.
|
||||
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
'''Generate some random data for demostration.
|
||||
"""Generate some random data for demostration.
|
||||
|
||||
Actual data can be anything that is currently supported by XGBoost.
|
||||
'''
|
||||
"""
|
||||
self.rows = ROWS_PER_BATCH
|
||||
self.cols = COLS
|
||||
rng = cupy.random.RandomState(1994)
|
||||
@@ -46,7 +47,7 @@ class IterForDMatrixDemo(xgboost.core.DataIter):
|
||||
self._labels = [rng.randn(self.rows)] * BATCHES
|
||||
self._weights = [rng.uniform(size=self.rows)] * BATCHES
|
||||
|
||||
self.it = 0 # set iterator to 0
|
||||
self.it = 0 # set iterator to 0
|
||||
super().__init__()
|
||||
|
||||
def as_array(self):
|
||||
@@ -59,27 +60,26 @@ class IterForDMatrixDemo(xgboost.core.DataIter):
|
||||
return cupy.concatenate(self._weights)
|
||||
|
||||
def data(self):
|
||||
'''Utility function for obtaining current batch of data.'''
|
||||
"""Utility function for obtaining current batch of data."""
|
||||
return self._data[self.it]
|
||||
|
||||
def labels(self):
|
||||
'''Utility function for obtaining current batch of label.'''
|
||||
"""Utility function for obtaining current batch of label."""
|
||||
return self._labels[self.it]
|
||||
|
||||
def weights(self):
|
||||
return self._weights[self.it]
|
||||
|
||||
def reset(self):
|
||||
'''Reset the iterator'''
|
||||
"""Reset the iterator"""
|
||||
self.it = 0
|
||||
|
||||
def next(self, input_data):
|
||||
'''Yield next batch of data.'''
|
||||
"""Yield next batch of data."""
|
||||
if self.it == len(self._data):
|
||||
# Return 0 when there's no more batch.
|
||||
return 0
|
||||
input_data(data=self.data(), label=self.labels(),
|
||||
weight=self.weights())
|
||||
input_data(data=self.data(), label=self.labels(), weight=self.weights())
|
||||
self.it += 1
|
||||
return 1
|
||||
|
||||
@@ -103,18 +103,19 @@ def main():
|
||||
|
||||
assert m_with_it.num_col() == m.num_col()
|
||||
assert m_with_it.num_row() == m.num_row()
|
||||
# Tree meethod must be one of the `hist` or `gpu_hist`. We use `gpu_hist` for GPU
|
||||
# input here.
|
||||
# Tree meethod must be `hist`.
|
||||
reg_with_it = xgboost.train(
|
||||
{"tree_method": "gpu_hist"}, m_with_it, num_boost_round=rounds
|
||||
{"tree_method": "hist", "device": "cuda"}, m_with_it, num_boost_round=rounds
|
||||
)
|
||||
predict_with_it = reg_with_it.predict(m_with_it)
|
||||
|
||||
reg = xgboost.train({"tree_method": "gpu_hist"}, m, num_boost_round=rounds)
|
||||
reg = xgboost.train(
|
||||
{"tree_method": "hist", "device": "cuda"}, m, num_boost_round=rounds
|
||||
)
|
||||
predict = reg.predict(m)
|
||||
|
||||
numpy.testing.assert_allclose(predict_with_it, predict, rtol=1e6)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -7,6 +7,11 @@ Quantile Regression
|
||||
The script is inspired by this awesome example in sklearn:
|
||||
https://scikit-learn.org/stable/auto_examples/ensemble/plot_gradient_boosting_quantile.html
|
||||
|
||||
.. note::
|
||||
|
||||
The feature is only supported using the Python package. In addition, quantile
|
||||
crossing can happen due to limitation in the algorithm.
|
||||
|
||||
"""
|
||||
import argparse
|
||||
from typing import Dict
|
||||
|
||||
@@ -24,7 +24,7 @@ def main():
|
||||
Xy = xgb.DMatrix(X_train, y_train)
|
||||
evals_result: xgb.callback.EvaluationMonitor.EvalsLog = {}
|
||||
booster = xgb.train(
|
||||
{"tree_method": "gpu_hist", "max_depth": 6},
|
||||
{"tree_method": "hist", "max_depth": 6, "device": "cuda"},
|
||||
Xy,
|
||||
num_boost_round=n_rounds,
|
||||
evals=[(Xy, "Train")],
|
||||
@@ -33,8 +33,8 @@ def main():
|
||||
SHAP = booster.predict(Xy, pred_contribs=True)
|
||||
|
||||
# Refresh the leaf value and tree statistic
|
||||
X_refresh = X[X.shape[0] // 2:]
|
||||
y_refresh = y[y.shape[0] // 2:]
|
||||
X_refresh = X[X.shape[0] // 2 :]
|
||||
y_refresh = y[y.shape[0] // 2 :]
|
||||
Xy_refresh = xgb.DMatrix(X_refresh, y_refresh)
|
||||
# The model will adapt to other half of the data by changing leaf value (no change in
|
||||
# split condition) with refresh_leaf set to True.
|
||||
@@ -87,7 +87,7 @@ def main():
|
||||
np.testing.assert_allclose(
|
||||
np.array(prune_result["Original"]["rmse"]),
|
||||
np.array(prune_result["Train"]["rmse"]),
|
||||
atol=1e-5
|
||||
atol=1e-5,
|
||||
)
|
||||
|
||||
|
||||
|
||||
1
demo/nvflare/.gitignore
vendored
Normal file
1
demo/nvflare/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
!config
|
||||
23
demo/nvflare/config/config_fed_client.json
Normal file
23
demo/nvflare/config/config_fed_client.json
Normal file
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"format_version": 2,
|
||||
"executors": [
|
||||
{
|
||||
"tasks": [
|
||||
"train"
|
||||
],
|
||||
"executor": {
|
||||
"path": "trainer.XGBoostTrainer",
|
||||
"args": {
|
||||
"server_address": "localhost:9091",
|
||||
"world_size": 2,
|
||||
"server_cert_path": "server-cert.pem",
|
||||
"client_key_path": "client-key.pem",
|
||||
"client_cert_path": "client-cert.pem",
|
||||
"use_gpus": false
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"task_result_filters": [],
|
||||
"task_data_filters": []
|
||||
}
|
||||
22
demo/nvflare/config/config_fed_server.json
Normal file
22
demo/nvflare/config/config_fed_server.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"format_version": 2,
|
||||
"server": {
|
||||
"heart_beat_timeout": 600
|
||||
},
|
||||
"task_data_filters": [],
|
||||
"task_result_filters": [],
|
||||
"workflows": [
|
||||
{
|
||||
"id": "server_workflow",
|
||||
"path": "controller.XGBoostController",
|
||||
"args": {
|
||||
"port": 9091,
|
||||
"world_size": 2,
|
||||
"server_key_path": "server-key.pem",
|
||||
"server_cert_path": "server-cert.pem",
|
||||
"client_cert_path": "client-cert.pem"
|
||||
}
|
||||
}
|
||||
],
|
||||
"components": []
|
||||
}
|
||||
@@ -6,7 +6,7 @@ This directory contains a demo of Horizontal Federated Learning using
|
||||
## Training with CPU only
|
||||
|
||||
To run the demo, first build XGBoost with the federated learning plugin enabled (see the
|
||||
[README](../../plugin/federated/README.md)).
|
||||
[README](../../../plugin/federated/README.md)).
|
||||
|
||||
Install NVFlare (note that currently NVFlare only supports Python 3.8):
|
||||
```shell
|
||||
|
||||
@@ -70,8 +70,7 @@ class XGBoostTrainer(Executor):
|
||||
param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
|
||||
if self._use_gpus:
|
||||
self.log_info(fl_ctx, f'Training with GPU {rank}')
|
||||
param['tree_method'] = 'gpu_hist'
|
||||
param['gpu_id'] = rank
|
||||
param['device'] = f"cuda:{rank}"
|
||||
|
||||
# Specify validations set to watch performance
|
||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||
|
||||
@@ -16,7 +16,7 @@ split -n l/${world_size} --numeric-suffixes=1 -a 1 ../../data/agaricus.txt.test
|
||||
|
||||
nvflare poc -n 2 --prepare
|
||||
mkdir -p /tmp/nvflare/poc/admin/transfer/horizontal-xgboost
|
||||
cp -fr config custom /tmp/nvflare/poc/admin/transfer/horizontal-xgboost
|
||||
cp -fr ../config custom /tmp/nvflare/poc/admin/transfer/horizontal-xgboost
|
||||
cp server-*.pem client-cert.pem /tmp/nvflare/poc/server/
|
||||
for (( site=1; site<=world_size; site++ )); do
|
||||
cp server-cert.pem client-*.pem /tmp/nvflare/poc/site-"$site"/
|
||||
|
||||
@@ -6,7 +6,7 @@ This directory contains a demo of Vertical Federated Learning using
|
||||
## Training with CPU only
|
||||
|
||||
To run the demo, first build XGBoost with the federated learning plugin enabled (see the
|
||||
[README](../../plugin/federated/README.md)).
|
||||
[README](../../../plugin/federated/README.md)).
|
||||
|
||||
Install NVFlare (note that currently NVFlare only supports Python 3.8):
|
||||
```shell
|
||||
|
||||
@@ -16,7 +16,7 @@ class SupportedTasks(object):
|
||||
|
||||
class XGBoostTrainer(Executor):
|
||||
def __init__(self, server_address: str, world_size: int, server_cert_path: str,
|
||||
client_key_path: str, client_cert_path: str):
|
||||
client_key_path: str, client_cert_path: str, use_gpus: bool):
|
||||
"""Trainer for federated XGBoost.
|
||||
|
||||
Args:
|
||||
@@ -32,6 +32,7 @@ class XGBoostTrainer(Executor):
|
||||
self._server_cert_path = server_cert_path
|
||||
self._client_key_path = client_key_path
|
||||
self._client_cert_path = client_cert_path
|
||||
self._use_gpus = use_gpus
|
||||
|
||||
def execute(self, task_name: str, shareable: Shareable, fl_ctx: FLContext,
|
||||
abort_signal: Signal) -> Shareable:
|
||||
@@ -81,6 +82,8 @@ class XGBoostTrainer(Executor):
|
||||
'objective': 'binary:logistic',
|
||||
'eval_metric': 'auc',
|
||||
}
|
||||
if self._use_gpus:
|
||||
self.log_info(fl_ctx, 'GPUs are not currently supported by vertical federated XGBoost')
|
||||
|
||||
# specify validations set to watch performance
|
||||
watchlist = [(dtest, "eval"), (dtrain, "train")]
|
||||
|
||||
@@ -56,7 +56,7 @@ fi
|
||||
|
||||
nvflare poc -n 2 --prepare
|
||||
mkdir -p /tmp/nvflare/poc/admin/transfer/vertical-xgboost
|
||||
cp -fr config custom /tmp/nvflare/poc/admin/transfer/vertical-xgboost
|
||||
cp -fr ../config custom /tmp/nvflare/poc/admin/transfer/vertical-xgboost
|
||||
cp server-*.pem client-cert.pem /tmp/nvflare/poc/server/
|
||||
for (( site=1; site<=world_size; site++ )); do
|
||||
cp server-cert.pem client-*.pem /tmp/nvflare/poc/site-"${site}"/
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
Using XGBoost with RAPIDS Memory Manager (RMM) plugin (EXPERIMENTAL)
|
||||
====================================================================
|
||||
[RAPIDS Memory Manager (RMM)](https://github.com/rapidsai/rmm) library provides a collection of
|
||||
efficient memory allocators for NVIDIA GPUs. It is now possible to use XGBoost with memory
|
||||
allocators provided by RMM, by enabling the RMM integration plugin.
|
||||
|
||||
The demos in this directory highlights one RMM allocator in particular: **the pool sub-allocator**.
|
||||
This allocator addresses the slow speed of `cudaMalloc()` by allocating a large chunk of memory
|
||||
upfront. Subsequent allocations will draw from the pool of already allocated memory and thus avoid
|
||||
the overhead of calling `cudaMalloc()` directly. See
|
||||
[this GTC talk slides](https://on-demand.gputechconf.com/gtc/2015/presentation/S5530-Stephen-Jones.pdf)
|
||||
for more details.
|
||||
|
||||
Before running the demos, ensure that XGBoost is compiled with the RMM plugin enabled. To do this,
|
||||
run CMake with option `-DPLUGIN_RMM=ON` (`-DUSE_CUDA=ON` also required):
|
||||
```
|
||||
cmake .. -DUSE_CUDA=ON -DUSE_NCCL=ON -DPLUGIN_RMM=ON
|
||||
make -j4
|
||||
```
|
||||
CMake will attempt to locate the RMM library in your build environment. You may choose to build
|
||||
RMM from the source, or install it using the Conda package manager. If CMake cannot find RMM, you
|
||||
should specify the location of RMM with the CMake prefix:
|
||||
```
|
||||
# If using Conda:
|
||||
cmake .. -DUSE_CUDA=ON -DUSE_NCCL=ON -DPLUGIN_RMM=ON -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
|
||||
# If using RMM installed with a custom location
|
||||
cmake .. -DUSE_CUDA=ON -DUSE_NCCL=ON -DPLUGIN_RMM=ON -DCMAKE_PREFIX_PATH=/path/to/rmm
|
||||
```
|
||||
|
||||
# Informing XGBoost about RMM pool
|
||||
|
||||
When XGBoost is compiled with RMM, most of the large size allocation will go through RMM
|
||||
allocators, but some small allocations in performance critical areas are using a different
|
||||
caching allocator so that we can have better control over memory allocation behavior.
|
||||
Users can override this behavior and force the use of rmm for all allocations by setting
|
||||
the global configuration ``use_rmm``:
|
||||
|
||||
``` python
|
||||
with xgb.config_context(use_rmm=True):
|
||||
clf = xgb.XGBClassifier(tree_method="gpu_hist")
|
||||
```
|
||||
|
||||
Depending on the choice of memory pool size or type of allocator, this may have negative
|
||||
performance impact.
|
||||
|
||||
* [Using RMM with a single GPU](./rmm_singlegpu.py)
|
||||
* [Using RMM with a local Dask cluster consisting of multiple GPUs](./rmm_mgpu_with_dask.py)
|
||||
51
demo/rmm_plugin/README.rst
Normal file
51
demo/rmm_plugin/README.rst
Normal file
@@ -0,0 +1,51 @@
|
||||
Using XGBoost with RAPIDS Memory Manager (RMM) plugin (EXPERIMENTAL)
|
||||
====================================================================
|
||||
|
||||
`RAPIDS Memory Manager (RMM) <https://github.com/rapidsai/rmm>`__ library provides a
|
||||
collection of efficient memory allocators for NVIDIA GPUs. It is now possible to use
|
||||
XGBoost with memory allocators provided by RMM, by enabling the RMM integration plugin.
|
||||
|
||||
The demos in this directory highlights one RMM allocator in particular: **the pool
|
||||
sub-allocator**. This allocator addresses the slow speed of ``cudaMalloc()`` by
|
||||
allocating a large chunk of memory upfront. Subsequent allocations will draw from the pool
|
||||
of already allocated memory and thus avoid the overhead of calling ``cudaMalloc()``
|
||||
directly. See `this GTC talk slides
|
||||
<https://on-demand.gputechconf.com/gtc/2015/presentation/S5530-Stephen-Jones.pdf>`_ for
|
||||
more details.
|
||||
|
||||
Before running the demos, ensure that XGBoost is compiled with the RMM plugin enabled. To do this,
|
||||
run CMake with option ``-DPLUGIN_RMM=ON`` (``-DUSE_CUDA=ON`` also required):
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
cmake .. -DUSE_CUDA=ON -DUSE_NCCL=ON -DPLUGIN_RMM=ON
|
||||
make -j$(nproc)
|
||||
|
||||
CMake will attempt to locate the RMM library in your build environment. You may choose to build
|
||||
RMM from the source, or install it using the Conda package manager. If CMake cannot find RMM, you
|
||||
should specify the location of RMM with the CMake prefix:
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
# If using Conda:
|
||||
cmake .. -DUSE_CUDA=ON -DUSE_NCCL=ON -DPLUGIN_RMM=ON -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
|
||||
# If using RMM installed with a custom location
|
||||
cmake .. -DUSE_CUDA=ON -DUSE_NCCL=ON -DPLUGIN_RMM=ON -DCMAKE_PREFIX_PATH=/path/to/rmm
|
||||
|
||||
********************************
|
||||
Informing XGBoost about RMM pool
|
||||
********************************
|
||||
|
||||
When XGBoost is compiled with RMM, most of the large size allocation will go through RMM
|
||||
allocators, but some small allocations in performance critical areas are using a different
|
||||
caching allocator so that we can have better control over memory allocation behavior.
|
||||
Users can override this behavior and force the use of rmm for all allocations by setting
|
||||
the global configuration ``use_rmm``:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
with xgb.config_context(use_rmm=True):
|
||||
clf = xgb.XGBClassifier(tree_method="hist", device="cuda")
|
||||
|
||||
Depending on the choice of memory pool size or type of allocator, this may have negative
|
||||
performance impact.
|
||||
@@ -1,3 +1,7 @@
|
||||
"""
|
||||
Using rmm with Dask
|
||||
===================
|
||||
"""
|
||||
import dask
|
||||
from dask.distributed import Client
|
||||
from dask_cuda import LocalCUDACluster
|
||||
@@ -11,25 +15,33 @@ def main(client):
|
||||
# xgb.set_config(use_rmm=True)
|
||||
|
||||
X, y = make_classification(n_samples=10000, n_informative=5, n_classes=3)
|
||||
# In pratice one should prefer loading the data with dask collections instead of using
|
||||
# `from_array`.
|
||||
# In pratice one should prefer loading the data with dask collections instead of
|
||||
# using `from_array`.
|
||||
X = dask.array.from_array(X)
|
||||
y = dask.array.from_array(y)
|
||||
dtrain = xgb.dask.DaskDMatrix(client, X, label=y)
|
||||
|
||||
params = {'max_depth': 8, 'eta': 0.01, 'objective': 'multi:softprob', 'num_class': 3,
|
||||
'tree_method': 'gpu_hist', 'eval_metric': 'merror'}
|
||||
output = xgb.dask.train(client, params, dtrain, num_boost_round=100,
|
||||
evals=[(dtrain, 'train')])
|
||||
bst = output['booster']
|
||||
history = output['history']
|
||||
for i, e in enumerate(history['train']['merror']):
|
||||
print(f'[{i}] train-merror: {e}')
|
||||
params = {
|
||||
"max_depth": 8,
|
||||
"eta": 0.01,
|
||||
"objective": "multi:softprob",
|
||||
"num_class": 3,
|
||||
"tree_method": "hist",
|
||||
"eval_metric": "merror",
|
||||
"device": "cuda",
|
||||
}
|
||||
output = xgb.dask.train(
|
||||
client, params, dtrain, num_boost_round=100, evals=[(dtrain, "train")]
|
||||
)
|
||||
bst = output["booster"]
|
||||
history = output["history"]
|
||||
for i, e in enumerate(history["train"]["merror"]):
|
||||
print(f"[{i}] train-merror: {e}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# To use RMM pool allocator with a GPU Dask cluster, just add rmm_pool_size option to
|
||||
# LocalCUDACluster constructor.
|
||||
with LocalCUDACluster(rmm_pool_size='2GB') as cluster:
|
||||
if __name__ == "__main__":
|
||||
# To use RMM pool allocator with a GPU Dask cluster, just add rmm_pool_size option
|
||||
# to LocalCUDACluster constructor.
|
||||
with LocalCUDACluster(rmm_pool_size="2GB") as cluster:
|
||||
with Client(cluster) as client:
|
||||
main(client)
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
"""
|
||||
Using rmm on a single node device
|
||||
=================================
|
||||
"""
|
||||
import rmm
|
||||
from sklearn.datasets import make_classification
|
||||
|
||||
@@ -16,7 +20,8 @@ params = {
|
||||
"eta": 0.01,
|
||||
"objective": "multi:softprob",
|
||||
"num_class": 3,
|
||||
"tree_method": "gpu_hist",
|
||||
"tree_method": "hist",
|
||||
"device": "cuda",
|
||||
}
|
||||
# XGBoost will automatically use the RMM pool allocator
|
||||
bst = xgb.train(params, dtrain, num_boost_round=100, evals=[(dtrain, "train")])
|
||||
|
||||
Reference in New Issue
Block a user