Handle the new device parameter in dask and demos. (#9386)

* Handle the new `device` parameter in dask and demos.

- Check no ordinal is specified in the dask interface.
- Update demos.
- Update dask doc.
- Update the condition for QDM.
This commit is contained in:
Jiaming Yuan
2023-07-15 19:11:20 +08:00
committed by GitHub
parent 9da5050643
commit 16eb41936d
31 changed files with 631 additions and 450 deletions

View File

@@ -18,43 +18,45 @@ def main(client):
# The Veterans' Administration Lung Cancer Trial
# The Statistical Analysis of Failure Time Data by Kalbfleisch J. and Prentice R (1980)
CURRENT_DIR = os.path.dirname(__file__)
df = dd.read_csv(os.path.join(CURRENT_DIR, os.pardir, 'data', 'veterans_lung_cancer.csv'))
df = dd.read_csv(
os.path.join(CURRENT_DIR, os.pardir, "data", "veterans_lung_cancer.csv")
)
# DaskDMatrix acts like normal DMatrix, works as a proxy for local
# DMatrix scatter around workers.
# For AFT survival, you'd need to extract the lower and upper bounds for the label
# and pass them as arguments to DaskDMatrix.
y_lower_bound = df['Survival_label_lower_bound']
y_upper_bound = df['Survival_label_upper_bound']
X = df.drop(['Survival_label_lower_bound',
'Survival_label_upper_bound'], axis=1)
dtrain = DaskDMatrix(client, X, label_lower_bound=y_lower_bound,
label_upper_bound=y_upper_bound)
y_lower_bound = df["Survival_label_lower_bound"]
y_upper_bound = df["Survival_label_upper_bound"]
X = df.drop(["Survival_label_lower_bound", "Survival_label_upper_bound"], axis=1)
dtrain = DaskDMatrix(
client, X, label_lower_bound=y_lower_bound, label_upper_bound=y_upper_bound
)
# Use train method from xgboost.dask instead of xgboost. This
# distributed version of train returns a dictionary containing the
# resulting booster and evaluation history obtained from
# evaluation metrics.
params = {'verbosity': 1,
'objective': 'survival:aft',
'eval_metric': 'aft-nloglik',
'learning_rate': 0.05,
'aft_loss_distribution_scale': 1.20,
'aft_loss_distribution': 'normal',
'max_depth': 6,
'lambda': 0.01,
'alpha': 0.02}
output = xgb.dask.train(client,
params,
dtrain,
num_boost_round=100,
evals=[(dtrain, 'train')])
bst = output['booster']
history = output['history']
params = {
"verbosity": 1,
"objective": "survival:aft",
"eval_metric": "aft-nloglik",
"learning_rate": 0.05,
"aft_loss_distribution_scale": 1.20,
"aft_loss_distribution": "normal",
"max_depth": 6,
"lambda": 0.01,
"alpha": 0.02,
}
output = xgb.dask.train(
client, params, dtrain, num_boost_round=100, evals=[(dtrain, "train")]
)
bst = output["booster"]
history = output["history"]
# you can pass output directly into `predict` too.
prediction = xgb.dask.predict(client, bst, dtrain)
print('Evaluation history: ', history)
print("Evaluation history: ", history)
# Uncomment the following line to save the model to the disk
# bst.save_model('survival_model.json')
@@ -62,7 +64,7 @@ def main(client):
return prediction
if __name__ == '__main__':
if __name__ == "__main__":
# or use other clusters for scaling
with LocalCluster(n_workers=7, threads_per_worker=4) as cluster:
with Client(cluster) as client:

View File

@@ -15,7 +15,7 @@ def main(client):
m = 100000
n = 100
X = da.random.random(size=(m, n), chunks=100)
y = da.random.random(size=(m, ), chunks=100)
y = da.random.random(size=(m,), chunks=100)
# DaskDMatrix acts like normal DMatrix, works as a proxy for local
# DMatrix scatter around workers.
@@ -25,21 +25,23 @@ def main(client):
# distributed version of train returns a dictionary containing the
# resulting booster and evaluation history obtained from
# evaluation metrics.
output = xgb.dask.train(client,
{'verbosity': 1,
'tree_method': 'hist'},
dtrain,
num_boost_round=4, evals=[(dtrain, 'train')])
bst = output['booster']
history = output['history']
output = xgb.dask.train(
client,
{"verbosity": 1, "tree_method": "hist"},
dtrain,
num_boost_round=4,
evals=[(dtrain, "train")],
)
bst = output["booster"]
history = output["history"]
# you can pass output directly into `predict` too.
prediction = xgb.dask.predict(client, bst, dtrain)
print('Evaluation history:', history)
print("Evaluation history:", history)
return prediction
if __name__ == '__main__':
if __name__ == "__main__":
# or use other clusters for scaling
with LocalCluster(n_workers=7, threads_per_worker=4) as cluster:
with Client(cluster) as client:

View File

@@ -13,33 +13,38 @@ from xgboost import dask as dxgb
from xgboost.dask import DaskDMatrix
def using_dask_matrix(client: Client, X, y):
# DaskDMatrix acts like normal DMatrix, works as a proxy for local
# DMatrix scatter around workers.
def using_dask_matrix(client: Client, X: da.Array, y: da.Array) -> da.Array:
# DaskDMatrix acts like normal DMatrix, works as a proxy for local DMatrix scatter
# around workers.
dtrain = DaskDMatrix(client, X, y)
# Use train method from xgboost.dask instead of xgboost. This
# distributed version of train returns a dictionary containing the
# resulting booster and evaluation history obtained from
# evaluation metrics.
output = xgb.dask.train(client,
{'verbosity': 2,
# Golden line for GPU training
'tree_method': 'gpu_hist'},
dtrain,
num_boost_round=4, evals=[(dtrain, 'train')])
bst = output['booster']
history = output['history']
# Use train method from xgboost.dask instead of xgboost. This distributed version
# of train returns a dictionary containing the resulting booster and evaluation
# history obtained from evaluation metrics.
output = xgb.dask.train(
client,
{
"verbosity": 2,
"tree_method": "hist",
# Golden line for GPU training
"device": "cuda",
},
dtrain,
num_boost_round=4,
evals=[(dtrain, "train")],
)
bst = output["booster"]
history = output["history"]
# you can pass output directly into `predict` too.
prediction = xgb.dask.predict(client, bst, dtrain)
print('Evaluation history:', history)
print("Evaluation history:", history)
return prediction
def using_quantile_device_dmatrix(client: Client, X, y):
"""`DaskQuantileDMatrix` is a data type specialized for `gpu_hist` and `hist` tree
methods for reducing memory usage.
def using_quantile_device_dmatrix(client: Client, X: da.Array, y: da.Array) -> da.Array:
"""`DaskQuantileDMatrix` is a data type specialized for `hist` tree methods for
reducing memory usage.
.. versionadded:: 1.2.0
@@ -52,26 +57,28 @@ def using_quantile_device_dmatrix(client: Client, X, y):
# the `ref` argument of `DaskQuantileDMatrix`.
dtrain = dxgb.DaskQuantileDMatrix(client, X, y)
output = xgb.dask.train(
client, {"verbosity": 2, "tree_method": "gpu_hist"}, dtrain, num_boost_round=4
client,
{"verbosity": 2, "tree_method": "hist", "device": "cuda"},
dtrain,
num_boost_round=4,
)
prediction = xgb.dask.predict(client, output, X)
return prediction
if __name__ == '__main__':
if __name__ == "__main__":
# `LocalCUDACluster` is used for assigning GPU to XGBoost processes. Here
# `n_workers` represents the number of GPUs since we use one GPU per worker
# process.
# `n_workers` represents the number of GPUs since we use one GPU per worker process.
with LocalCUDACluster(n_workers=2, threads_per_worker=4) as cluster:
with Client(cluster) as client:
# generate some random data for demonstration
m = 100000
n = 100
X = da.random.random(size=(m, n), chunks=10000)
y = da.random.random(size=(m, ), chunks=10000)
y = da.random.random(size=(m,), chunks=10000)
print('Using DaskQuantileDMatrix')
print("Using DaskQuantileDMatrix")
from_ddqdm = using_quantile_device_dmatrix(client, X, y)
print('Using DMatrix')
print("Using DMatrix")
from_dmatrix = using_dask_matrix(client, X, y)

View File

@@ -21,7 +21,8 @@ def main(client):
y = da.random.random(m, partition_size)
regressor = xgboost.dask.DaskXGBRegressor(verbosity=1)
regressor.set_params(tree_method='gpu_hist')
# set the device to CUDA
regressor.set_params(tree_method="hist", device="cuda")
# assigning client here is optional
regressor.client = client
@@ -31,13 +32,13 @@ def main(client):
bst = regressor.get_booster()
history = regressor.evals_result()
print('Evaluation history:', history)
print("Evaluation history:", history)
# returned prediction is always a dask array.
assert isinstance(prediction, da.Array)
return bst # returning the trained model
return bst # returning the trained model
if __name__ == '__main__':
if __name__ == "__main__":
# With dask cuda, one can scale up XGBoost to arbitrary GPU clusters.
# `LocalCUDACluster` used here is only for demonstration purpose.
with LocalCUDACluster() as cluster:

View File

@@ -71,7 +71,8 @@ def custom_callback():
{
'objective': 'binary:logistic',
'eval_metric': ['error', 'rmse'],
'tree_method': 'gpu_hist'
'tree_method': 'hist',
"device": "cuda",
},
D_train,
evals=[(D_train, 'Train'), (D_valid, 'Valid')],

View File

@@ -63,7 +63,8 @@ def load_cat_in_the_dat() -> tuple[pd.DataFrame, pd.Series]:
params = {
"tree_method": "gpu_hist",
"tree_method": "hist",
"device": "cuda",
"n_estimators": 32,
"colsample_bylevel": 0.7,
}

View File

@@ -58,13 +58,13 @@ def main() -> None:
# Specify `enable_categorical` to True, also we use onehot encoding based split
# here for demonstration. For details see the document of `max_cat_to_onehot`.
reg = xgb.XGBRegressor(
tree_method="gpu_hist", enable_categorical=True, max_cat_to_onehot=5
tree_method="hist", enable_categorical=True, max_cat_to_onehot=5, device="cuda"
)
reg.fit(X, y, eval_set=[(X, y)])
# Pass in already encoded data
X_enc, y_enc = make_categorical(100, 10, 4, True)
reg_enc = xgb.XGBRegressor(tree_method="gpu_hist")
reg_enc = xgb.XGBRegressor(tree_method="hist", device="cuda")
reg_enc.fit(X_enc, y_enc, eval_set=[(X_enc, y_enc)])
reg_results = np.array(reg.evals_result()["validation_0"]["rmse"])

View File

@@ -82,8 +82,9 @@ def main(tmpdir: str) -> xgboost.Booster:
missing = np.NaN
Xy = xgboost.DMatrix(it, missing=missing, enable_categorical=False)
# Other tree methods including ``approx``, and ``gpu_hist`` are supported. GPU
# behaves differently than CPU tree methods. See tutorial in doc for details.
# ``approx`` is also supported, but less efficient due to sketching. GPU behaves
# differently than CPU tree methods as it uses a hybrid approach. See tutorial in
# doc for details.
booster = xgboost.train(
{"tree_method": "hist", "max_depth": 4},
Xy,

View File

@@ -104,7 +104,8 @@ def ranking_demo(args: argparse.Namespace) -> None:
qid_test = qid_test[sorted_idx]
ranker = xgb.XGBRanker(
tree_method="gpu_hist",
tree_method="hist",
device="cuda",
lambdarank_pair_method="topk",
lambdarank_num_pair_per_sample=13,
eval_metric=["ndcg@1", "ndcg@8"],
@@ -161,7 +162,8 @@ def click_data_demo(args: argparse.Namespace) -> None:
ranker = xgb.XGBRanker(
n_estimators=512,
tree_method="gpu_hist",
tree_method="hist",
device="cuda",
learning_rate=0.01,
reg_lambda=1.5,
subsample=0.8,

View File

@@ -23,22 +23,23 @@ import numpy
import xgboost
COLS = 64
ROWS_PER_BATCH = 1000 # data is splited by rows
ROWS_PER_BATCH = 1000 # data is splited by rows
BATCHES = 32
class IterForDMatrixDemo(xgboost.core.DataIter):
'''A data iterator for XGBoost DMatrix.
"""A data iterator for XGBoost DMatrix.
`reset` and `next` are required for any data iterator, other functions here
are utilites for demonstration's purpose.
'''
"""
def __init__(self):
'''Generate some random data for demostration.
"""Generate some random data for demostration.
Actual data can be anything that is currently supported by XGBoost.
'''
"""
self.rows = ROWS_PER_BATCH
self.cols = COLS
rng = cupy.random.RandomState(1994)
@@ -46,7 +47,7 @@ class IterForDMatrixDemo(xgboost.core.DataIter):
self._labels = [rng.randn(self.rows)] * BATCHES
self._weights = [rng.uniform(size=self.rows)] * BATCHES
self.it = 0 # set iterator to 0
self.it = 0 # set iterator to 0
super().__init__()
def as_array(self):
@@ -59,27 +60,26 @@ class IterForDMatrixDemo(xgboost.core.DataIter):
return cupy.concatenate(self._weights)
def data(self):
'''Utility function for obtaining current batch of data.'''
"""Utility function for obtaining current batch of data."""
return self._data[self.it]
def labels(self):
'''Utility function for obtaining current batch of label.'''
"""Utility function for obtaining current batch of label."""
return self._labels[self.it]
def weights(self):
return self._weights[self.it]
def reset(self):
'''Reset the iterator'''
"""Reset the iterator"""
self.it = 0
def next(self, input_data):
'''Yield next batch of data.'''
"""Yield next batch of data."""
if self.it == len(self._data):
# Return 0 when there's no more batch.
return 0
input_data(data=self.data(), label=self.labels(),
weight=self.weights())
input_data(data=self.data(), label=self.labels(), weight=self.weights())
self.it += 1
return 1
@@ -103,18 +103,19 @@ def main():
assert m_with_it.num_col() == m.num_col()
assert m_with_it.num_row() == m.num_row()
# Tree meethod must be one of the `hist` or `gpu_hist`. We use `gpu_hist` for GPU
# input here.
# Tree meethod must be `hist`.
reg_with_it = xgboost.train(
{"tree_method": "gpu_hist"}, m_with_it, num_boost_round=rounds
{"tree_method": "hist", "device": "cuda"}, m_with_it, num_boost_round=rounds
)
predict_with_it = reg_with_it.predict(m_with_it)
reg = xgboost.train({"tree_method": "gpu_hist"}, m, num_boost_round=rounds)
reg = xgboost.train(
{"tree_method": "hist", "device": "cuda"}, m, num_boost_round=rounds
)
predict = reg.predict(m)
numpy.testing.assert_allclose(predict_with_it, predict, rtol=1e6)
if __name__ == '__main__':
if __name__ == "__main__":
main()

View File

@@ -24,7 +24,7 @@ def main():
Xy = xgb.DMatrix(X_train, y_train)
evals_result: xgb.callback.EvaluationMonitor.EvalsLog = {}
booster = xgb.train(
{"tree_method": "gpu_hist", "max_depth": 6},
{"tree_method": "hist", "max_depth": 6, "device": "cuda"},
Xy,
num_boost_round=n_rounds,
evals=[(Xy, "Train")],
@@ -33,8 +33,8 @@ def main():
SHAP = booster.predict(Xy, pred_contribs=True)
# Refresh the leaf value and tree statistic
X_refresh = X[X.shape[0] // 2:]
y_refresh = y[y.shape[0] // 2:]
X_refresh = X[X.shape[0] // 2 :]
y_refresh = y[y.shape[0] // 2 :]
Xy_refresh = xgb.DMatrix(X_refresh, y_refresh)
# The model will adapt to other half of the data by changing leaf value (no change in
# split condition) with refresh_leaf set to True.
@@ -87,7 +87,7 @@ def main():
np.testing.assert_allclose(
np.array(prune_result["Original"]["rmse"]),
np.array(prune_result["Train"]["rmse"]),
atol=1e-5
atol=1e-5,
)