rocm enable for v2.0.1

This commit is contained in:
Hui Liu
2023-10-27 18:50:28 -07:00
447 changed files with 13518 additions and 8719 deletions

View File

@@ -106,7 +106,7 @@ Please send pull requests if you find ones that are missing here.
- Prarthana Bhat, 2nd place winner in [DYD Competition](https://datahack.analyticsvidhya.com/contest/date-your-data/). Link to [Solution](https://github.com/analyticsvidhya/DateYourData/blob/master/Prathna_Bhat_Model.R).
## Talks
- [XGBoost: A Scalable Tree Boosting System](http://datascience.la/xgboost-workshop-and-meetup-talk-with-tianqi-chen/) (video+slides) by Tianqi Chen at the Los Angeles Data Science meetup
- XGBoost: A Scalable Tree Boosting System ([video] (https://www.youtube.com/watch?v=Vly8xGnNiWs) + [slides](https://speakerdeck.com/datasciencela/tianqi-chen-xgboost-overview-and-latest-news-la-meetup-talk)) by Tianqi Chen at the Los Angeles Data Science meetup
## Tutorials
@@ -145,7 +145,7 @@ Send a PR to add a one sentence description:)
## Tools using XGBoost
- [BayesBoost](https://github.com/mpearmain/BayesBoost) - Bayesian Optimization using xgboost and sklearn API
- [FLAML](https://github.com/microsoft/FLAML) - An open source AutoML library
- [FLAML](https://github.com/microsoft/FLAML) - An open source AutoML library
designed to automatically produce accurate machine learning models with low computational cost. FLAML includes [XGBoost as one of the default learners](https://github.com/microsoft/FLAML/blob/main/flaml/model.py) and can also be used as a fast hyperparameter tuning tool for XGBoost ([code example](https://microsoft.github.io/FLAML/docs/Examples/AutoML-for-XGBoost)).
- [gp_xgboost_gridsearch](https://github.com/vatsan/gp_xgboost_gridsearch) - In-database parallel grid-search for XGBoost on [Greenplum](https://github.com/greenplum-db/gpdb) using PL/Python
- [tpot](https://github.com/rhiever/tpot) - A Python tool that automatically creates and optimizes machine learning pipelines using genetic programming.

View File

@@ -11,33 +11,43 @@ import numpy as np
import xgboost as xgb
plt.rcParams.update({'font.size': 13})
plt.rcParams.update({"font.size": 13})
# Function to visualize censored labels
def plot_censored_labels(X, y_lower, y_upper):
def replace_inf(x, target_value):
def plot_censored_labels(
X: np.ndarray, y_lower: np.ndarray, y_upper: np.ndarray
) -> None:
def replace_inf(x: np.ndarray, target_value: float) -> np.ndarray:
x[np.isinf(x)] = target_value
return x
plt.plot(X, y_lower, 'o', label='y_lower', color='blue')
plt.plot(X, y_upper, 'o', label='y_upper', color='fuchsia')
plt.vlines(X, ymin=replace_inf(y_lower, 0.01), ymax=replace_inf(y_upper, 1000),
label='Range for y', color='gray')
plt.plot(X, y_lower, "o", label="y_lower", color="blue")
plt.plot(X, y_upper, "o", label="y_upper", color="fuchsia")
plt.vlines(
X,
ymin=replace_inf(y_lower, 0.01),
ymax=replace_inf(y_upper, 1000.0),
label="Range for y",
color="gray",
)
# Toy data
X = np.array([1, 2, 3, 4, 5]).reshape((-1, 1))
INF = np.inf
y_lower = np.array([ 10, 15, -INF, 30, 100])
y_upper = np.array([INF, INF, 20, 50, INF])
y_lower = np.array([10, 15, -INF, 30, 100])
y_upper = np.array([INF, INF, 20, 50, INF])
# Visualize toy data
plt.figure(figsize=(5, 4))
plot_censored_labels(X, y_lower, y_upper)
plt.ylim((6, 200))
plt.legend(loc='lower right')
plt.title('Toy data')
plt.xlabel('Input feature')
plt.ylabel('Label')
plt.yscale('log')
plt.legend(loc="lower right")
plt.title("Toy data")
plt.xlabel("Input feature")
plt.ylabel("Label")
plt.yscale("log")
plt.tight_layout()
plt.show(block=True)
@@ -46,54 +56,83 @@ grid_pts = np.linspace(0.8, 5.2, 1000).reshape((-1, 1))
# Train AFT model using XGBoost
dmat = xgb.DMatrix(X)
dmat.set_float_info('label_lower_bound', y_lower)
dmat.set_float_info('label_upper_bound', y_upper)
params = {'max_depth': 3, 'objective':'survival:aft', 'min_child_weight': 0}
dmat.set_float_info("label_lower_bound", y_lower)
dmat.set_float_info("label_upper_bound", y_upper)
params = {"max_depth": 3, "objective": "survival:aft", "min_child_weight": 0}
accuracy_history = []
def plot_intermediate_model_callback(env):
"""Custom callback to plot intermediate models"""
# Compute y_pred = prediction using the intermediate model, at current boosting iteration
y_pred = env.model.predict(dmat)
# "Accuracy" = the number of data points whose ranged label (y_lower, y_upper) includes
# the corresponding predicted label (y_pred)
acc = np.sum(np.logical_and(y_pred >= y_lower, y_pred <= y_upper)/len(X) * 100)
accuracy_history.append(acc)
# Plot ranged labels as well as predictions by the model
plt.subplot(5, 3, env.iteration + 1)
plot_censored_labels(X, y_lower, y_upper)
y_pred_grid_pts = env.model.predict(xgb.DMatrix(grid_pts))
plt.plot(grid_pts, y_pred_grid_pts, 'r-', label='XGBoost AFT model', linewidth=4)
plt.title('Iteration {}'.format(env.iteration), x=0.5, y=0.8)
plt.xlim((0.8, 5.2))
plt.ylim((1 if np.min(y_pred) < 6 else 6, 200))
plt.yscale('log')
res = {}
plt.figure(figsize=(12,13))
bst = xgb.train(params, dmat, 15, [(dmat, 'train')], evals_result=res,
callbacks=[plot_intermediate_model_callback])
class PlotIntermediateModel(xgb.callback.TrainingCallback):
"""Custom callback to plot intermediate models."""
def __init__(self) -> None:
super().__init__()
def after_iteration(
self,
model: xgb.Booster,
epoch: int,
evals_log: xgb.callback.TrainingCallback.EvalsLog,
) -> bool:
"""Run after training is finished."""
# Compute y_pred = prediction using the intermediate model, at current boosting
# iteration
y_pred = model.predict(dmat)
# "Accuracy" = the number of data points whose ranged label (y_lower, y_upper)
# includes the corresponding predicted label (y_pred)
acc = np.sum(
np.logical_and(y_pred >= y_lower, y_pred <= y_upper) / len(X) * 100
)
accuracy_history.append(acc)
# Plot ranged labels as well as predictions by the model
plt.subplot(5, 3, epoch + 1)
plot_censored_labels(X, y_lower, y_upper)
y_pred_grid_pts = model.predict(xgb.DMatrix(grid_pts))
plt.plot(
grid_pts, y_pred_grid_pts, "r-", label="XGBoost AFT model", linewidth=4
)
plt.title("Iteration {}".format(epoch), x=0.5, y=0.8)
plt.xlim((0.8, 5.2))
plt.ylim((1 if np.min(y_pred) < 6 else 6, 200))
plt.yscale("log")
return False
res: xgb.callback.TrainingCallback.EvalsLog = {}
plt.figure(figsize=(12, 13))
bst = xgb.train(
params,
dmat,
15,
[(dmat, "train")],
evals_result=res,
callbacks=[PlotIntermediateModel()],
)
plt.tight_layout()
plt.legend(loc='lower center', ncol=4,
bbox_to_anchor=(0.5, 0),
bbox_transform=plt.gcf().transFigure)
plt.legend(
loc="lower center",
ncol=4,
bbox_to_anchor=(0.5, 0),
bbox_transform=plt.gcf().transFigure,
)
plt.tight_layout()
# Plot negative log likelihood over boosting iterations
plt.figure(figsize=(8,3))
plt.figure(figsize=(8, 3))
plt.subplot(1, 2, 1)
plt.plot(res['train']['aft-nloglik'], 'b-o', label='aft-nloglik')
plt.xlabel('# Boosting Iterations')
plt.legend(loc='best')
plt.plot(res["train"]["aft-nloglik"], "b-o", label="aft-nloglik")
plt.xlabel("# Boosting Iterations")
plt.legend(loc="best")
# Plot "accuracy" over boosting iterations
# "Accuracy" = the number of data points whose ranged label (y_lower, y_upper) includes
# the corresponding predicted label (y_pred)
plt.subplot(1, 2, 2)
plt.plot(accuracy_history, 'r-o', label='Accuracy (%)')
plt.xlabel('# Boosting Iterations')
plt.legend(loc='best')
plt.plot(accuracy_history, "r-o", label="Accuracy (%)")
plt.xlabel("# Boosting Iterations")
plt.legend(loc="best")
plt.tight_layout()
plt.show()

View File

@@ -53,15 +53,7 @@ int main() {
// configure the training
// available parameters are described here:
// https://xgboost.readthedocs.io/en/latest/parameter.html
safe_xgboost(XGBoosterSetParam(booster, "tree_method", use_gpu ? "gpu_hist" : "hist"));
if (use_gpu) {
// set the GPU to use;
// this is not necessary, but provided here as an illustration
safe_xgboost(XGBoosterSetParam(booster, "gpu_id", "0"));
} else {
// avoid evaluating objective and metric on a GPU
safe_xgboost(XGBoosterSetParam(booster, "gpu_id", "-1"));
}
safe_xgboost(XGBoosterSetParam(booster, "device", use_gpu ? "cuda" : "cpu"));
safe_xgboost(XGBoosterSetParam(booster, "objective", "binary:logistic"));
safe_xgboost(XGBoosterSetParam(booster, "min_child_weight", "1"));

View File

@@ -18,43 +18,45 @@ def main(client):
# The Veterans' Administration Lung Cancer Trial
# The Statistical Analysis of Failure Time Data by Kalbfleisch J. and Prentice R (1980)
CURRENT_DIR = os.path.dirname(__file__)
df = dd.read_csv(os.path.join(CURRENT_DIR, os.pardir, 'data', 'veterans_lung_cancer.csv'))
df = dd.read_csv(
os.path.join(CURRENT_DIR, os.pardir, "data", "veterans_lung_cancer.csv")
)
# DaskDMatrix acts like normal DMatrix, works as a proxy for local
# DMatrix scatter around workers.
# For AFT survival, you'd need to extract the lower and upper bounds for the label
# and pass them as arguments to DaskDMatrix.
y_lower_bound = df['Survival_label_lower_bound']
y_upper_bound = df['Survival_label_upper_bound']
X = df.drop(['Survival_label_lower_bound',
'Survival_label_upper_bound'], axis=1)
dtrain = DaskDMatrix(client, X, label_lower_bound=y_lower_bound,
label_upper_bound=y_upper_bound)
y_lower_bound = df["Survival_label_lower_bound"]
y_upper_bound = df["Survival_label_upper_bound"]
X = df.drop(["Survival_label_lower_bound", "Survival_label_upper_bound"], axis=1)
dtrain = DaskDMatrix(
client, X, label_lower_bound=y_lower_bound, label_upper_bound=y_upper_bound
)
# Use train method from xgboost.dask instead of xgboost. This
# distributed version of train returns a dictionary containing the
# resulting booster and evaluation history obtained from
# evaluation metrics.
params = {'verbosity': 1,
'objective': 'survival:aft',
'eval_metric': 'aft-nloglik',
'learning_rate': 0.05,
'aft_loss_distribution_scale': 1.20,
'aft_loss_distribution': 'normal',
'max_depth': 6,
'lambda': 0.01,
'alpha': 0.02}
output = xgb.dask.train(client,
params,
dtrain,
num_boost_round=100,
evals=[(dtrain, 'train')])
bst = output['booster']
history = output['history']
params = {
"verbosity": 1,
"objective": "survival:aft",
"eval_metric": "aft-nloglik",
"learning_rate": 0.05,
"aft_loss_distribution_scale": 1.20,
"aft_loss_distribution": "normal",
"max_depth": 6,
"lambda": 0.01,
"alpha": 0.02,
}
output = xgb.dask.train(
client, params, dtrain, num_boost_round=100, evals=[(dtrain, "train")]
)
bst = output["booster"]
history = output["history"]
# you can pass output directly into `predict` too.
prediction = xgb.dask.predict(client, bst, dtrain)
print('Evaluation history: ', history)
print("Evaluation history: ", history)
# Uncomment the following line to save the model to the disk
# bst.save_model('survival_model.json')
@@ -62,7 +64,7 @@ def main(client):
return prediction
if __name__ == '__main__':
if __name__ == "__main__":
# or use other clusters for scaling
with LocalCluster(n_workers=7, threads_per_worker=4) as cluster:
with Client(cluster) as client:

View File

@@ -15,7 +15,7 @@ def main(client):
m = 100000
n = 100
X = da.random.random(size=(m, n), chunks=100)
y = da.random.random(size=(m, ), chunks=100)
y = da.random.random(size=(m,), chunks=100)
# DaskDMatrix acts like normal DMatrix, works as a proxy for local
# DMatrix scatter around workers.
@@ -25,21 +25,23 @@ def main(client):
# distributed version of train returns a dictionary containing the
# resulting booster and evaluation history obtained from
# evaluation metrics.
output = xgb.dask.train(client,
{'verbosity': 1,
'tree_method': 'hist'},
dtrain,
num_boost_round=4, evals=[(dtrain, 'train')])
bst = output['booster']
history = output['history']
output = xgb.dask.train(
client,
{"verbosity": 1, "tree_method": "hist"},
dtrain,
num_boost_round=4,
evals=[(dtrain, "train")],
)
bst = output["booster"]
history = output["history"]
# you can pass output directly into `predict` too.
prediction = xgb.dask.predict(client, bst, dtrain)
print('Evaluation history:', history)
print("Evaluation history:", history)
return prediction
if __name__ == '__main__':
if __name__ == "__main__":
# or use other clusters for scaling
with LocalCluster(n_workers=7, threads_per_worker=4) as cluster:
with Client(cluster) as client:

View File

@@ -13,33 +13,38 @@ from xgboost import dask as dxgb
from xgboost.dask import DaskDMatrix
def using_dask_matrix(client: Client, X, y):
# DaskDMatrix acts like normal DMatrix, works as a proxy for local
# DMatrix scatter around workers.
def using_dask_matrix(client: Client, X: da.Array, y: da.Array) -> da.Array:
# DaskDMatrix acts like normal DMatrix, works as a proxy for local DMatrix scatter
# around workers.
dtrain = DaskDMatrix(client, X, y)
# Use train method from xgboost.dask instead of xgboost. This
# distributed version of train returns a dictionary containing the
# resulting booster and evaluation history obtained from
# evaluation metrics.
output = xgb.dask.train(client,
{'verbosity': 2,
# Golden line for GPU training
'tree_method': 'gpu_hist'},
dtrain,
num_boost_round=4, evals=[(dtrain, 'train')])
bst = output['booster']
history = output['history']
# Use train method from xgboost.dask instead of xgboost. This distributed version
# of train returns a dictionary containing the resulting booster and evaluation
# history obtained from evaluation metrics.
output = xgb.dask.train(
client,
{
"verbosity": 2,
"tree_method": "hist",
# Golden line for GPU training
"device": "cuda",
},
dtrain,
num_boost_round=4,
evals=[(dtrain, "train")],
)
bst = output["booster"]
history = output["history"]
# you can pass output directly into `predict` too.
prediction = xgb.dask.predict(client, bst, dtrain)
print('Evaluation history:', history)
print("Evaluation history:", history)
return prediction
def using_quantile_device_dmatrix(client: Client, X, y):
"""`DaskQuantileDMatrix` is a data type specialized for `gpu_hist` and `hist` tree
methods for reducing memory usage.
def using_quantile_device_dmatrix(client: Client, X: da.Array, y: da.Array) -> da.Array:
"""`DaskQuantileDMatrix` is a data type specialized for `hist` tree methods for
reducing memory usage.
.. versionadded:: 1.2.0
@@ -52,26 +57,28 @@ def using_quantile_device_dmatrix(client: Client, X, y):
# the `ref` argument of `DaskQuantileDMatrix`.
dtrain = dxgb.DaskQuantileDMatrix(client, X, y)
output = xgb.dask.train(
client, {"verbosity": 2, "tree_method": "gpu_hist"}, dtrain, num_boost_round=4
client,
{"verbosity": 2, "tree_method": "hist", "device": "cuda"},
dtrain,
num_boost_round=4,
)
prediction = xgb.dask.predict(client, output, X)
return prediction
if __name__ == '__main__':
if __name__ == "__main__":
# `LocalCUDACluster` is used for assigning GPU to XGBoost processes. Here
# `n_workers` represents the number of GPUs since we use one GPU per worker
# process.
# `n_workers` represents the number of GPUs since we use one GPU per worker process.
with LocalCUDACluster(n_workers=2, threads_per_worker=4) as cluster:
with Client(cluster) as client:
# generate some random data for demonstration
m = 100000
n = 100
X = da.random.random(size=(m, n), chunks=10000)
y = da.random.random(size=(m, ), chunks=10000)
y = da.random.random(size=(m,), chunks=10000)
print('Using DaskQuantileDMatrix')
print("Using DaskQuantileDMatrix")
from_ddqdm = using_quantile_device_dmatrix(client, X, y)
print('Using DMatrix')
print("Using DMatrix")
from_dmatrix = using_dask_matrix(client, X, y)

View File

@@ -21,7 +21,8 @@ def main(client):
y = da.random.random(m, partition_size)
regressor = xgboost.dask.DaskXGBRegressor(verbosity=1)
regressor.set_params(tree_method='gpu_hist')
# set the device to CUDA
regressor.set_params(tree_method="hist", device="cuda")
# assigning client here is optional
regressor.client = client
@@ -31,13 +32,13 @@ def main(client):
bst = regressor.get_booster()
history = regressor.evals_result()
print('Evaluation history:', history)
print("Evaluation history:", history)
# returned prediction is always a dask array.
assert isinstance(prediction, da.Array)
return bst # returning the trained model
return bst # returning the trained model
if __name__ == '__main__':
if __name__ == "__main__":
# With dask cuda, one can scale up XGBoost to arbitrary GPU clusters.
# `LocalCUDACluster` used here is only for demonstration purpose.
with LocalCUDACluster() as cluster:

View File

@@ -1,5 +0,0 @@
# GPU Acceleration Demo
`cover_type.py` shows how to train a model on the [forest cover type](https://archive.ics.uci.edu/ml/datasets/covertype) dataset using GPU acceleration. The forest cover type dataset has 581,012 rows and 54 features, making it time consuming to process. We compare the run-time and accuracy of the GPU and CPU histogram algorithms.
`shap.ipynb` demonstrates using GPU acceleration to compute SHAP values for feature importance.

View File

@@ -0,0 +1,8 @@
:orphan:
GPU Acceleration Demo
=====================
This is a collection of demonstration scripts to showcase the basic usage of GPU. Please
see :doc:`/gpu/index` for more info. There are other demonstrations for distributed GPU
training using dask or spark.

View File

@@ -1,41 +1,49 @@
"""
Using xgboost on GPU devices
============================
Shows how to train a model on the `forest cover type
<https://archive.ics.uci.edu/ml/datasets/covertype>`_ dataset using GPU
acceleration. The forest cover type dataset has 581,012 rows and 54 features, making it
time consuming to process. We compare the run-time and accuracy of the GPU and CPU
histogram algorithms.
In addition, The demo showcases using GPU with other GPU-related libraries including
cupy and cuml. These libraries are not strictly required.
"""
import time
import cupy as cp
from cuml.model_selection import train_test_split
from sklearn.datasets import fetch_covtype
from sklearn.model_selection import train_test_split
import xgboost as xgb
# Fetch dataset using sklearn
cov = fetch_covtype()
X = cov.data
y = cov.target
X, y = fetch_covtype(return_X_y=True)
X = cp.array(X)
y = cp.array(y)
y -= y.min()
# Create 0.75/0.25 train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, train_size=0.75,
random_state=42)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.25, train_size=0.75, random_state=42
)
# Specify sufficient boosting iterations to reach a minimum
num_round = 3000
# Leave most parameters as default
param = {'objective': 'multi:softmax', # Specify multiclass classification
'num_class': 8, # Number of possible output classes
'tree_method': 'gpu_hist' # Use GPU accelerated algorithm
}
# Convert input data from numpy to XGBoost format
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)
gpu_res = {} # Store accuracy result
tmp = time.time()
clf = xgb.XGBClassifier(device="cuda", n_estimators=num_round)
# Train model
xgb.train(param, dtrain, num_round, evals=[(dtest, 'test')], evals_result=gpu_res)
print("GPU Training Time: %s seconds" % (str(time.time() - tmp)))
start = time.time()
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
gpu_res = clf.evals_result()
print("GPU Training Time: %s seconds" % (str(time.time() - start)))
# Repeat for CPU algorithm
tmp = time.time()
param['tree_method'] = 'hist'
cpu_res = {}
xgb.train(param, dtrain, num_round, evals=[(dtest, 'test')], evals_result=cpu_res)
print("CPU Training Time: %s seconds" % (str(time.time() - tmp)))
clf = xgb.XGBClassifier(device="cpu", n_estimators=num_round)
start = time.time()
cpu_res = clf.evals_result()
print("CPU Training Time: %s seconds" % (str(time.time() - start)))

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,55 @@
"""
Use GPU to speedup SHAP value computation
=========================================
Demonstrates using GPU acceleration to compute SHAP values for feature importance.
"""
import shap
from sklearn.datasets import fetch_california_housing
import xgboost as xgb
# Fetch dataset using sklearn
data = fetch_california_housing()
print(data.DESCR)
X = data.data
y = data.target
num_round = 500
param = {
"eta": 0.05,
"max_depth": 10,
"tree_method": "hist",
"device": "cuda",
}
# GPU accelerated training
dtrain = xgb.DMatrix(X, label=y, feature_names=data.feature_names)
model = xgb.train(param, dtrain, num_round)
# Compute shap values using GPU with xgboost
model.set_param({"device": "cuda"})
shap_values = model.predict(dtrain, pred_contribs=True)
# Compute shap interaction values using GPU
shap_interaction_values = model.predict(dtrain, pred_interactions=True)
# shap will call the GPU accelerated version as long as the device parameter is set to
# "cuda"
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X)
# visualize the first prediction's explanation
shap.force_plot(
explainer.expected_value,
shap_values[0, :],
X[0, :],
feature_names=data.feature_names,
matplotlib=True,
)
# Show a summary of feature importance
shap.summary_plot(shap_values, X, plot_type="bar", feature_names=data.feature_names)

View File

@@ -1,9 +1,9 @@
'''
"""
Demo for using and defining callback functions
==============================================
.. versionadded:: 1.3.0
'''
"""
import argparse
import os
import tempfile
@@ -17,10 +17,11 @@ import xgboost as xgb
class Plotting(xgb.callback.TrainingCallback):
'''Plot evaluation result during training. Only for demonstration purpose as it's quite
"""Plot evaluation result during training. Only for demonstration purpose as it's quite
slow to draw.
'''
"""
def __init__(self, rounds):
self.fig = plt.figure()
self.ax = self.fig.add_subplot(111)
@@ -31,16 +32,16 @@ class Plotting(xgb.callback.TrainingCallback):
plt.ion()
def _get_key(self, data, metric):
return f'{data}-{metric}'
return f"{data}-{metric}"
def after_iteration(self, model, epoch, evals_log):
'''Update the plot.'''
"""Update the plot."""
if not self.lines:
for data, metric in evals_log.items():
for metric_name, log in metric.items():
key = self._get_key(data, metric_name)
expanded = log + [0] * (self.rounds - len(log))
self.lines[key], = self.ax.plot(self.x, expanded, label=key)
(self.lines[key],) = self.ax.plot(self.x, expanded, label=key)
self.ax.legend()
else:
# https://pythonspot.com/matplotlib-update-plot/
@@ -55,8 +56,8 @@ class Plotting(xgb.callback.TrainingCallback):
def custom_callback():
'''Demo for defining a custom callback function that plots evaluation result during
training.'''
"""Demo for defining a custom callback function that plots evaluation result during
training."""
X, y = load_breast_cancer(return_X_y=True)
X_train, X_valid, y_train, y_valid = train_test_split(X, y, random_state=0)
@@ -69,14 +70,16 @@ def custom_callback():
# Pass it to the `callbacks` parameter as a list.
xgb.train(
{
'objective': 'binary:logistic',
'eval_metric': ['error', 'rmse'],
'tree_method': 'gpu_hist'
"objective": "binary:logistic",
"eval_metric": ["error", "rmse"],
"tree_method": "hist",
"device": "cuda",
},
D_train,
evals=[(D_train, 'Train'), (D_valid, 'Valid')],
evals=[(D_train, "Train"), (D_valid, "Valid")],
num_boost_round=num_boost_round,
callbacks=[plotting])
callbacks=[plotting],
)
def check_point_callback():
@@ -89,10 +92,10 @@ def check_point_callback():
if i == 0:
continue
if as_pickle:
path = os.path.join(tmpdir, 'model_' + str(i) + '.pkl')
path = os.path.join(tmpdir, "model_" + str(i) + ".pkl")
else:
path = os.path.join(tmpdir, 'model_' + str(i) + '.json')
assert(os.path.exists(path))
path = os.path.join(tmpdir, "model_" + str(i) + ".json")
assert os.path.exists(path)
X, y = load_breast_cancer(return_X_y=True)
m = xgb.DMatrix(X, y)
@@ -100,31 +103,36 @@ def check_point_callback():
with tempfile.TemporaryDirectory() as tmpdir:
# Use callback class from xgboost.callback
# Feel free to subclass/customize it to suit your need.
check_point = xgb.callback.TrainingCheckPoint(directory=tmpdir,
iterations=rounds,
name='model')
xgb.train({'objective': 'binary:logistic'}, m,
num_boost_round=10,
verbose_eval=False,
callbacks=[check_point])
check_point = xgb.callback.TrainingCheckPoint(
directory=tmpdir, iterations=rounds, name="model"
)
xgb.train(
{"objective": "binary:logistic"},
m,
num_boost_round=10,
verbose_eval=False,
callbacks=[check_point],
)
check(False)
# This version of checkpoint saves everything including parameters and
# model. See: doc/tutorials/saving_model.rst
check_point = xgb.callback.TrainingCheckPoint(directory=tmpdir,
iterations=rounds,
as_pickle=True,
name='model')
xgb.train({'objective': 'binary:logistic'}, m,
num_boost_round=10,
verbose_eval=False,
callbacks=[check_point])
check_point = xgb.callback.TrainingCheckPoint(
directory=tmpdir, iterations=rounds, as_pickle=True, name="model"
)
xgb.train(
{"objective": "binary:logistic"},
m,
num_boost_round=10,
verbose_eval=False,
callbacks=[check_point],
)
check(True)
if __name__ == '__main__':
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--plot', default=1, type=int)
parser.add_argument("--plot", default=1, type=int)
args = parser.parse_args()
check_point_callback()

View File

@@ -63,7 +63,8 @@ def load_cat_in_the_dat() -> tuple[pd.DataFrame, pd.Series]:
params = {
"tree_method": "gpu_hist",
"tree_method": "hist",
"device": "cuda",
"n_estimators": 32,
"colsample_bylevel": 0.7,
}

View File

@@ -58,13 +58,13 @@ def main() -> None:
# Specify `enable_categorical` to True, also we use onehot encoding based split
# here for demonstration. For details see the document of `max_cat_to_onehot`.
reg = xgb.XGBRegressor(
tree_method="gpu_hist", enable_categorical=True, max_cat_to_onehot=5
tree_method="hist", enable_categorical=True, max_cat_to_onehot=5, device="cuda"
)
reg.fit(X, y, eval_set=[(X, y)])
# Pass in already encoded data
X_enc, y_enc = make_categorical(100, 10, 4, True)
reg_enc = xgb.XGBRegressor(tree_method="gpu_hist")
reg_enc = xgb.XGBRegressor(tree_method="hist", device="cuda")
reg_enc.fit(X_enc, y_enc, eval_set=[(X_enc, y_enc)])
reg_results = np.array(reg.evals_result()["validation_0"]["rmse"])

View File

@@ -22,7 +22,10 @@ import xgboost
def make_batches(
n_samples_per_batch: int, n_features: int, n_batches: int, tmpdir: str,
n_samples_per_batch: int,
n_features: int,
n_batches: int,
tmpdir: str,
) -> List[Tuple[str, str]]:
files: List[Tuple[str, str]] = []
rng = np.random.RandomState(1994)
@@ -38,6 +41,7 @@ def make_batches(
class Iterator(xgboost.DataIter):
"""A custom iterator for loading files in batches."""
def __init__(self, file_paths: List[Tuple[str, str]]):
self._file_paths = file_paths
self._it = 0
@@ -82,10 +86,11 @@ def main(tmpdir: str) -> xgboost.Booster:
missing = np.NaN
Xy = xgboost.DMatrix(it, missing=missing, enable_categorical=False)
# Other tree methods including ``hist`` and ``gpu_hist`` also work, see tutorial in
# ``approx`` is also supported, but less efficient due to sketching. GPU behaves
# differently than CPU tree methods as it uses a hybrid approach. See tutorial in
# doc for details.
booster = xgboost.train(
{"tree_method": "approx", "max_depth": 2},
{"tree_method": "hist", "max_depth": 4},
Xy,
evals=[(Xy, "Train")],
num_boost_round=10,

View File

@@ -104,7 +104,8 @@ def ranking_demo(args: argparse.Namespace) -> None:
qid_test = qid_test[sorted_idx]
ranker = xgb.XGBRanker(
tree_method="gpu_hist",
tree_method="hist",
device="cuda",
lambdarank_pair_method="topk",
lambdarank_num_pair_per_sample=13,
eval_metric=["ndcg@1", "ndcg@8"],
@@ -161,7 +162,8 @@ def click_data_demo(args: argparse.Namespace) -> None:
ranker = xgb.XGBRanker(
n_estimators=512,
tree_method="gpu_hist",
tree_method="hist",
device="cuda",
learning_rate=0.01,
reg_lambda=1.5,
subsample=0.8,

View File

@@ -23,22 +23,23 @@ import numpy
import xgboost
COLS = 64
ROWS_PER_BATCH = 1000 # data is splited by rows
ROWS_PER_BATCH = 1000 # data is splited by rows
BATCHES = 32
class IterForDMatrixDemo(xgboost.core.DataIter):
'''A data iterator for XGBoost DMatrix.
"""A data iterator for XGBoost DMatrix.
`reset` and `next` are required for any data iterator, other functions here
are utilites for demonstration's purpose.
'''
"""
def __init__(self):
'''Generate some random data for demostration.
"""Generate some random data for demostration.
Actual data can be anything that is currently supported by XGBoost.
'''
"""
self.rows = ROWS_PER_BATCH
self.cols = COLS
rng = cupy.random.RandomState(1994)
@@ -46,7 +47,7 @@ class IterForDMatrixDemo(xgboost.core.DataIter):
self._labels = [rng.randn(self.rows)] * BATCHES
self._weights = [rng.uniform(size=self.rows)] * BATCHES
self.it = 0 # set iterator to 0
self.it = 0 # set iterator to 0
super().__init__()
def as_array(self):
@@ -59,27 +60,26 @@ class IterForDMatrixDemo(xgboost.core.DataIter):
return cupy.concatenate(self._weights)
def data(self):
'''Utility function for obtaining current batch of data.'''
"""Utility function for obtaining current batch of data."""
return self._data[self.it]
def labels(self):
'''Utility function for obtaining current batch of label.'''
"""Utility function for obtaining current batch of label."""
return self._labels[self.it]
def weights(self):
return self._weights[self.it]
def reset(self):
'''Reset the iterator'''
"""Reset the iterator"""
self.it = 0
def next(self, input_data):
'''Yield next batch of data.'''
"""Yield next batch of data."""
if self.it == len(self._data):
# Return 0 when there's no more batch.
return 0
input_data(data=self.data(), label=self.labels(),
weight=self.weights())
input_data(data=self.data(), label=self.labels(), weight=self.weights())
self.it += 1
return 1
@@ -103,18 +103,19 @@ def main():
assert m_with_it.num_col() == m.num_col()
assert m_with_it.num_row() == m.num_row()
# Tree meethod must be one of the `hist` or `gpu_hist`. We use `gpu_hist` for GPU
# input here.
# Tree meethod must be `hist`.
reg_with_it = xgboost.train(
{"tree_method": "gpu_hist"}, m_with_it, num_boost_round=rounds
{"tree_method": "hist", "device": "cuda"}, m_with_it, num_boost_round=rounds
)
predict_with_it = reg_with_it.predict(m_with_it)
reg = xgboost.train({"tree_method": "gpu_hist"}, m, num_boost_round=rounds)
reg = xgboost.train(
{"tree_method": "hist", "device": "cuda"}, m, num_boost_round=rounds
)
predict = reg.predict(m)
numpy.testing.assert_allclose(predict_with_it, predict, rtol=1e6)
if __name__ == '__main__':
if __name__ == "__main__":
main()

View File

@@ -7,6 +7,11 @@ Quantile Regression
The script is inspired by this awesome example in sklearn:
https://scikit-learn.org/stable/auto_examples/ensemble/plot_gradient_boosting_quantile.html
.. note::
The feature is only supported using the Python package. In addition, quantile
crossing can happen due to limitation in the algorithm.
"""
import argparse
from typing import Dict

View File

@@ -24,7 +24,7 @@ def main():
Xy = xgb.DMatrix(X_train, y_train)
evals_result: xgb.callback.EvaluationMonitor.EvalsLog = {}
booster = xgb.train(
{"tree_method": "gpu_hist", "max_depth": 6},
{"tree_method": "hist", "max_depth": 6, "device": "cuda"},
Xy,
num_boost_round=n_rounds,
evals=[(Xy, "Train")],
@@ -33,8 +33,8 @@ def main():
SHAP = booster.predict(Xy, pred_contribs=True)
# Refresh the leaf value and tree statistic
X_refresh = X[X.shape[0] // 2:]
y_refresh = y[y.shape[0] // 2:]
X_refresh = X[X.shape[0] // 2 :]
y_refresh = y[y.shape[0] // 2 :]
Xy_refresh = xgb.DMatrix(X_refresh, y_refresh)
# The model will adapt to other half of the data by changing leaf value (no change in
# split condition) with refresh_leaf set to True.
@@ -87,7 +87,7 @@ def main():
np.testing.assert_allclose(
np.array(prune_result["Original"]["rmse"]),
np.array(prune_result["Train"]["rmse"]),
atol=1e-5
atol=1e-5,
)

1
demo/nvflare/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
!config

View File

@@ -0,0 +1,23 @@
{
"format_version": 2,
"executors": [
{
"tasks": [
"train"
],
"executor": {
"path": "trainer.XGBoostTrainer",
"args": {
"server_address": "localhost:9091",
"world_size": 2,
"server_cert_path": "server-cert.pem",
"client_key_path": "client-key.pem",
"client_cert_path": "client-cert.pem",
"use_gpus": false
}
}
}
],
"task_result_filters": [],
"task_data_filters": []
}

View File

@@ -0,0 +1,22 @@
{
"format_version": 2,
"server": {
"heart_beat_timeout": 600
},
"task_data_filters": [],
"task_result_filters": [],
"workflows": [
{
"id": "server_workflow",
"path": "controller.XGBoostController",
"args": {
"port": 9091,
"world_size": 2,
"server_key_path": "server-key.pem",
"server_cert_path": "server-cert.pem",
"client_cert_path": "client-cert.pem"
}
}
],
"components": []
}

View File

@@ -6,7 +6,7 @@ This directory contains a demo of Horizontal Federated Learning using
## Training with CPU only
To run the demo, first build XGBoost with the federated learning plugin enabled (see the
[README](../../plugin/federated/README.md)).
[README](../../../plugin/federated/README.md)).
Install NVFlare (note that currently NVFlare only supports Python 3.8):
```shell

View File

@@ -70,8 +70,7 @@ class XGBoostTrainer(Executor):
param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
if self._use_gpus:
self.log_info(fl_ctx, f'Training with GPU {rank}')
param['tree_method'] = 'gpu_hist'
param['gpu_id'] = rank
param['device'] = f"cuda:{rank}"
# Specify validations set to watch performance
watchlist = [(dtest, 'eval'), (dtrain, 'train')]

View File

@@ -16,7 +16,7 @@ split -n l/${world_size} --numeric-suffixes=1 -a 1 ../../data/agaricus.txt.test
nvflare poc -n 2 --prepare
mkdir -p /tmp/nvflare/poc/admin/transfer/horizontal-xgboost
cp -fr config custom /tmp/nvflare/poc/admin/transfer/horizontal-xgboost
cp -fr ../config custom /tmp/nvflare/poc/admin/transfer/horizontal-xgboost
cp server-*.pem client-cert.pem /tmp/nvflare/poc/server/
for (( site=1; site<=world_size; site++ )); do
cp server-cert.pem client-*.pem /tmp/nvflare/poc/site-"$site"/

View File

@@ -6,7 +6,7 @@ This directory contains a demo of Vertical Federated Learning using
## Training with CPU only
To run the demo, first build XGBoost with the federated learning plugin enabled (see the
[README](../../plugin/federated/README.md)).
[README](../../../plugin/federated/README.md)).
Install NVFlare (note that currently NVFlare only supports Python 3.8):
```shell

View File

@@ -16,7 +16,7 @@ class SupportedTasks(object):
class XGBoostTrainer(Executor):
def __init__(self, server_address: str, world_size: int, server_cert_path: str,
client_key_path: str, client_cert_path: str):
client_key_path: str, client_cert_path: str, use_gpus: bool):
"""Trainer for federated XGBoost.
Args:
@@ -32,6 +32,7 @@ class XGBoostTrainer(Executor):
self._server_cert_path = server_cert_path
self._client_key_path = client_key_path
self._client_cert_path = client_cert_path
self._use_gpus = use_gpus
def execute(self, task_name: str, shareable: Shareable, fl_ctx: FLContext,
abort_signal: Signal) -> Shareable:
@@ -81,6 +82,8 @@ class XGBoostTrainer(Executor):
'objective': 'binary:logistic',
'eval_metric': 'auc',
}
if self._use_gpus:
self.log_info(fl_ctx, 'GPUs are not currently supported by vertical federated XGBoost')
# specify validations set to watch performance
watchlist = [(dtest, "eval"), (dtrain, "train")]

View File

@@ -56,7 +56,7 @@ fi
nvflare poc -n 2 --prepare
mkdir -p /tmp/nvflare/poc/admin/transfer/vertical-xgboost
cp -fr config custom /tmp/nvflare/poc/admin/transfer/vertical-xgboost
cp -fr ../config custom /tmp/nvflare/poc/admin/transfer/vertical-xgboost
cp server-*.pem client-cert.pem /tmp/nvflare/poc/server/
for (( site=1; site<=world_size; site++ )); do
cp server-cert.pem client-*.pem /tmp/nvflare/poc/site-"${site}"/

View File

@@ -1,47 +0,0 @@
Using XGBoost with RAPIDS Memory Manager (RMM) plugin (EXPERIMENTAL)
====================================================================
[RAPIDS Memory Manager (RMM)](https://github.com/rapidsai/rmm) library provides a collection of
efficient memory allocators for NVIDIA GPUs. It is now possible to use XGBoost with memory
allocators provided by RMM, by enabling the RMM integration plugin.
The demos in this directory highlights one RMM allocator in particular: **the pool sub-allocator**.
This allocator addresses the slow speed of `cudaMalloc()` by allocating a large chunk of memory
upfront. Subsequent allocations will draw from the pool of already allocated memory and thus avoid
the overhead of calling `cudaMalloc()` directly. See
[this GTC talk slides](https://on-demand.gputechconf.com/gtc/2015/presentation/S5530-Stephen-Jones.pdf)
for more details.
Before running the demos, ensure that XGBoost is compiled with the RMM plugin enabled. To do this,
run CMake with option `-DPLUGIN_RMM=ON` (`-DUSE_CUDA=ON` also required):
```
cmake .. -DUSE_CUDA=ON -DUSE_NCCL=ON -DPLUGIN_RMM=ON
make -j4
```
CMake will attempt to locate the RMM library in your build environment. You may choose to build
RMM from the source, or install it using the Conda package manager. If CMake cannot find RMM, you
should specify the location of RMM with the CMake prefix:
```
# If using Conda:
cmake .. -DUSE_CUDA=ON -DUSE_NCCL=ON -DPLUGIN_RMM=ON -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
# If using RMM installed with a custom location
cmake .. -DUSE_CUDA=ON -DUSE_NCCL=ON -DPLUGIN_RMM=ON -DCMAKE_PREFIX_PATH=/path/to/rmm
```
# Informing XGBoost about RMM pool
When XGBoost is compiled with RMM, most of the large size allocation will go through RMM
allocators, but some small allocations in performance critical areas are using a different
caching allocator so that we can have better control over memory allocation behavior.
Users can override this behavior and force the use of rmm for all allocations by setting
the global configuration ``use_rmm``:
``` python
with xgb.config_context(use_rmm=True):
clf = xgb.XGBClassifier(tree_method="gpu_hist")
```
Depending on the choice of memory pool size or type of allocator, this may have negative
performance impact.
* [Using RMM with a single GPU](./rmm_singlegpu.py)
* [Using RMM with a local Dask cluster consisting of multiple GPUs](./rmm_mgpu_with_dask.py)

View File

@@ -0,0 +1,51 @@
Using XGBoost with RAPIDS Memory Manager (RMM) plugin (EXPERIMENTAL)
====================================================================
`RAPIDS Memory Manager (RMM) <https://github.com/rapidsai/rmm>`__ library provides a
collection of efficient memory allocators for NVIDIA GPUs. It is now possible to use
XGBoost with memory allocators provided by RMM, by enabling the RMM integration plugin.
The demos in this directory highlights one RMM allocator in particular: **the pool
sub-allocator**. This allocator addresses the slow speed of ``cudaMalloc()`` by
allocating a large chunk of memory upfront. Subsequent allocations will draw from the pool
of already allocated memory and thus avoid the overhead of calling ``cudaMalloc()``
directly. See `this GTC talk slides
<https://on-demand.gputechconf.com/gtc/2015/presentation/S5530-Stephen-Jones.pdf>`_ for
more details.
Before running the demos, ensure that XGBoost is compiled with the RMM plugin enabled. To do this,
run CMake with option ``-DPLUGIN_RMM=ON`` (``-DUSE_CUDA=ON`` also required):
.. code-block:: sh
cmake .. -DUSE_CUDA=ON -DUSE_NCCL=ON -DPLUGIN_RMM=ON
make -j$(nproc)
CMake will attempt to locate the RMM library in your build environment. You may choose to build
RMM from the source, or install it using the Conda package manager. If CMake cannot find RMM, you
should specify the location of RMM with the CMake prefix:
.. code-block:: sh
# If using Conda:
cmake .. -DUSE_CUDA=ON -DUSE_NCCL=ON -DPLUGIN_RMM=ON -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
# If using RMM installed with a custom location
cmake .. -DUSE_CUDA=ON -DUSE_NCCL=ON -DPLUGIN_RMM=ON -DCMAKE_PREFIX_PATH=/path/to/rmm
********************************
Informing XGBoost about RMM pool
********************************
When XGBoost is compiled with RMM, most of the large size allocation will go through RMM
allocators, but some small allocations in performance critical areas are using a different
caching allocator so that we can have better control over memory allocation behavior.
Users can override this behavior and force the use of rmm for all allocations by setting
the global configuration ``use_rmm``:
.. code-block:: python
with xgb.config_context(use_rmm=True):
clf = xgb.XGBClassifier(tree_method="hist", device="cuda")
Depending on the choice of memory pool size or type of allocator, this may have negative
performance impact.

View File

@@ -1,3 +1,7 @@
"""
Using rmm with Dask
===================
"""
import dask
from dask.distributed import Client
from dask_cuda import LocalCUDACluster
@@ -11,25 +15,33 @@ def main(client):
# xgb.set_config(use_rmm=True)
X, y = make_classification(n_samples=10000, n_informative=5, n_classes=3)
# In pratice one should prefer loading the data with dask collections instead of using
# `from_array`.
# In pratice one should prefer loading the data with dask collections instead of
# using `from_array`.
X = dask.array.from_array(X)
y = dask.array.from_array(y)
dtrain = xgb.dask.DaskDMatrix(client, X, label=y)
params = {'max_depth': 8, 'eta': 0.01, 'objective': 'multi:softprob', 'num_class': 3,
'tree_method': 'gpu_hist', 'eval_metric': 'merror'}
output = xgb.dask.train(client, params, dtrain, num_boost_round=100,
evals=[(dtrain, 'train')])
bst = output['booster']
history = output['history']
for i, e in enumerate(history['train']['merror']):
print(f'[{i}] train-merror: {e}')
params = {
"max_depth": 8,
"eta": 0.01,
"objective": "multi:softprob",
"num_class": 3,
"tree_method": "hist",
"eval_metric": "merror",
"device": "cuda",
}
output = xgb.dask.train(
client, params, dtrain, num_boost_round=100, evals=[(dtrain, "train")]
)
bst = output["booster"]
history = output["history"]
for i, e in enumerate(history["train"]["merror"]):
print(f"[{i}] train-merror: {e}")
if __name__ == '__main__':
# To use RMM pool allocator with a GPU Dask cluster, just add rmm_pool_size option to
# LocalCUDACluster constructor.
with LocalCUDACluster(rmm_pool_size='2GB') as cluster:
if __name__ == "__main__":
# To use RMM pool allocator with a GPU Dask cluster, just add rmm_pool_size option
# to LocalCUDACluster constructor.
with LocalCUDACluster(rmm_pool_size="2GB") as cluster:
with Client(cluster) as client:
main(client)

View File

@@ -1,3 +1,7 @@
"""
Using rmm on a single node device
=================================
"""
import rmm
from sklearn.datasets import make_classification
@@ -16,7 +20,8 @@ params = {
"eta": 0.01,
"objective": "multi:softprob",
"num_class": 3,
"tree_method": "gpu_hist",
"tree_method": "hist",
"device": "cuda",
}
# XGBoost will automatically use the RMM pool allocator
bst = xgb.train(params, dtrain, num_boost_round=100, evals=[(dtrain, "train")])