Handle the new device parameter in dask and demos. (#9386)
* Handle the new `device` parameter in dask and demos. - Check no ordinal is specified in the dask interface. - Update demos. - Update dask doc. - Update the condition for QDM.
This commit is contained in:
@@ -18,43 +18,45 @@ def main(client):
|
||||
# The Veterans' Administration Lung Cancer Trial
|
||||
# The Statistical Analysis of Failure Time Data by Kalbfleisch J. and Prentice R (1980)
|
||||
CURRENT_DIR = os.path.dirname(__file__)
|
||||
df = dd.read_csv(os.path.join(CURRENT_DIR, os.pardir, 'data', 'veterans_lung_cancer.csv'))
|
||||
df = dd.read_csv(
|
||||
os.path.join(CURRENT_DIR, os.pardir, "data", "veterans_lung_cancer.csv")
|
||||
)
|
||||
|
||||
# DaskDMatrix acts like normal DMatrix, works as a proxy for local
|
||||
# DMatrix scatter around workers.
|
||||
# For AFT survival, you'd need to extract the lower and upper bounds for the label
|
||||
# and pass them as arguments to DaskDMatrix.
|
||||
y_lower_bound = df['Survival_label_lower_bound']
|
||||
y_upper_bound = df['Survival_label_upper_bound']
|
||||
X = df.drop(['Survival_label_lower_bound',
|
||||
'Survival_label_upper_bound'], axis=1)
|
||||
dtrain = DaskDMatrix(client, X, label_lower_bound=y_lower_bound,
|
||||
label_upper_bound=y_upper_bound)
|
||||
y_lower_bound = df["Survival_label_lower_bound"]
|
||||
y_upper_bound = df["Survival_label_upper_bound"]
|
||||
X = df.drop(["Survival_label_lower_bound", "Survival_label_upper_bound"], axis=1)
|
||||
dtrain = DaskDMatrix(
|
||||
client, X, label_lower_bound=y_lower_bound, label_upper_bound=y_upper_bound
|
||||
)
|
||||
|
||||
# Use train method from xgboost.dask instead of xgboost. This
|
||||
# distributed version of train returns a dictionary containing the
|
||||
# resulting booster and evaluation history obtained from
|
||||
# evaluation metrics.
|
||||
params = {'verbosity': 1,
|
||||
'objective': 'survival:aft',
|
||||
'eval_metric': 'aft-nloglik',
|
||||
'learning_rate': 0.05,
|
||||
'aft_loss_distribution_scale': 1.20,
|
||||
'aft_loss_distribution': 'normal',
|
||||
'max_depth': 6,
|
||||
'lambda': 0.01,
|
||||
'alpha': 0.02}
|
||||
output = xgb.dask.train(client,
|
||||
params,
|
||||
dtrain,
|
||||
num_boost_round=100,
|
||||
evals=[(dtrain, 'train')])
|
||||
bst = output['booster']
|
||||
history = output['history']
|
||||
params = {
|
||||
"verbosity": 1,
|
||||
"objective": "survival:aft",
|
||||
"eval_metric": "aft-nloglik",
|
||||
"learning_rate": 0.05,
|
||||
"aft_loss_distribution_scale": 1.20,
|
||||
"aft_loss_distribution": "normal",
|
||||
"max_depth": 6,
|
||||
"lambda": 0.01,
|
||||
"alpha": 0.02,
|
||||
}
|
||||
output = xgb.dask.train(
|
||||
client, params, dtrain, num_boost_round=100, evals=[(dtrain, "train")]
|
||||
)
|
||||
bst = output["booster"]
|
||||
history = output["history"]
|
||||
|
||||
# you can pass output directly into `predict` too.
|
||||
prediction = xgb.dask.predict(client, bst, dtrain)
|
||||
print('Evaluation history: ', history)
|
||||
print("Evaluation history: ", history)
|
||||
|
||||
# Uncomment the following line to save the model to the disk
|
||||
# bst.save_model('survival_model.json')
|
||||
@@ -62,7 +64,7 @@ def main(client):
|
||||
return prediction
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
# or use other clusters for scaling
|
||||
with LocalCluster(n_workers=7, threads_per_worker=4) as cluster:
|
||||
with Client(cluster) as client:
|
||||
|
||||
@@ -15,7 +15,7 @@ def main(client):
|
||||
m = 100000
|
||||
n = 100
|
||||
X = da.random.random(size=(m, n), chunks=100)
|
||||
y = da.random.random(size=(m, ), chunks=100)
|
||||
y = da.random.random(size=(m,), chunks=100)
|
||||
|
||||
# DaskDMatrix acts like normal DMatrix, works as a proxy for local
|
||||
# DMatrix scatter around workers.
|
||||
@@ -25,21 +25,23 @@ def main(client):
|
||||
# distributed version of train returns a dictionary containing the
|
||||
# resulting booster and evaluation history obtained from
|
||||
# evaluation metrics.
|
||||
output = xgb.dask.train(client,
|
||||
{'verbosity': 1,
|
||||
'tree_method': 'hist'},
|
||||
dtrain,
|
||||
num_boost_round=4, evals=[(dtrain, 'train')])
|
||||
bst = output['booster']
|
||||
history = output['history']
|
||||
output = xgb.dask.train(
|
||||
client,
|
||||
{"verbosity": 1, "tree_method": "hist"},
|
||||
dtrain,
|
||||
num_boost_round=4,
|
||||
evals=[(dtrain, "train")],
|
||||
)
|
||||
bst = output["booster"]
|
||||
history = output["history"]
|
||||
|
||||
# you can pass output directly into `predict` too.
|
||||
prediction = xgb.dask.predict(client, bst, dtrain)
|
||||
print('Evaluation history:', history)
|
||||
print("Evaluation history:", history)
|
||||
return prediction
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
# or use other clusters for scaling
|
||||
with LocalCluster(n_workers=7, threads_per_worker=4) as cluster:
|
||||
with Client(cluster) as client:
|
||||
|
||||
@@ -13,33 +13,38 @@ from xgboost import dask as dxgb
|
||||
from xgboost.dask import DaskDMatrix
|
||||
|
||||
|
||||
def using_dask_matrix(client: Client, X, y):
|
||||
# DaskDMatrix acts like normal DMatrix, works as a proxy for local
|
||||
# DMatrix scatter around workers.
|
||||
def using_dask_matrix(client: Client, X: da.Array, y: da.Array) -> da.Array:
|
||||
# DaskDMatrix acts like normal DMatrix, works as a proxy for local DMatrix scatter
|
||||
# around workers.
|
||||
dtrain = DaskDMatrix(client, X, y)
|
||||
|
||||
# Use train method from xgboost.dask instead of xgboost. This
|
||||
# distributed version of train returns a dictionary containing the
|
||||
# resulting booster and evaluation history obtained from
|
||||
# evaluation metrics.
|
||||
output = xgb.dask.train(client,
|
||||
{'verbosity': 2,
|
||||
# Golden line for GPU training
|
||||
'tree_method': 'gpu_hist'},
|
||||
dtrain,
|
||||
num_boost_round=4, evals=[(dtrain, 'train')])
|
||||
bst = output['booster']
|
||||
history = output['history']
|
||||
# Use train method from xgboost.dask instead of xgboost. This distributed version
|
||||
# of train returns a dictionary containing the resulting booster and evaluation
|
||||
# history obtained from evaluation metrics.
|
||||
output = xgb.dask.train(
|
||||
client,
|
||||
{
|
||||
"verbosity": 2,
|
||||
"tree_method": "hist",
|
||||
# Golden line for GPU training
|
||||
"device": "cuda",
|
||||
},
|
||||
dtrain,
|
||||
num_boost_round=4,
|
||||
evals=[(dtrain, "train")],
|
||||
)
|
||||
bst = output["booster"]
|
||||
history = output["history"]
|
||||
|
||||
# you can pass output directly into `predict` too.
|
||||
prediction = xgb.dask.predict(client, bst, dtrain)
|
||||
print('Evaluation history:', history)
|
||||
print("Evaluation history:", history)
|
||||
return prediction
|
||||
|
||||
|
||||
def using_quantile_device_dmatrix(client: Client, X, y):
|
||||
"""`DaskQuantileDMatrix` is a data type specialized for `gpu_hist` and `hist` tree
|
||||
methods for reducing memory usage.
|
||||
def using_quantile_device_dmatrix(client: Client, X: da.Array, y: da.Array) -> da.Array:
|
||||
"""`DaskQuantileDMatrix` is a data type specialized for `hist` tree methods for
|
||||
reducing memory usage.
|
||||
|
||||
.. versionadded:: 1.2.0
|
||||
|
||||
@@ -52,26 +57,28 @@ def using_quantile_device_dmatrix(client: Client, X, y):
|
||||
# the `ref` argument of `DaskQuantileDMatrix`.
|
||||
dtrain = dxgb.DaskQuantileDMatrix(client, X, y)
|
||||
output = xgb.dask.train(
|
||||
client, {"verbosity": 2, "tree_method": "gpu_hist"}, dtrain, num_boost_round=4
|
||||
client,
|
||||
{"verbosity": 2, "tree_method": "hist", "device": "cuda"},
|
||||
dtrain,
|
||||
num_boost_round=4,
|
||||
)
|
||||
|
||||
prediction = xgb.dask.predict(client, output, X)
|
||||
return prediction
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
# `LocalCUDACluster` is used for assigning GPU to XGBoost processes. Here
|
||||
# `n_workers` represents the number of GPUs since we use one GPU per worker
|
||||
# process.
|
||||
# `n_workers` represents the number of GPUs since we use one GPU per worker process.
|
||||
with LocalCUDACluster(n_workers=2, threads_per_worker=4) as cluster:
|
||||
with Client(cluster) as client:
|
||||
# generate some random data for demonstration
|
||||
m = 100000
|
||||
n = 100
|
||||
X = da.random.random(size=(m, n), chunks=10000)
|
||||
y = da.random.random(size=(m, ), chunks=10000)
|
||||
y = da.random.random(size=(m,), chunks=10000)
|
||||
|
||||
print('Using DaskQuantileDMatrix')
|
||||
print("Using DaskQuantileDMatrix")
|
||||
from_ddqdm = using_quantile_device_dmatrix(client, X, y)
|
||||
print('Using DMatrix')
|
||||
print("Using DMatrix")
|
||||
from_dmatrix = using_dask_matrix(client, X, y)
|
||||
|
||||
@@ -21,7 +21,8 @@ def main(client):
|
||||
y = da.random.random(m, partition_size)
|
||||
|
||||
regressor = xgboost.dask.DaskXGBRegressor(verbosity=1)
|
||||
regressor.set_params(tree_method='gpu_hist')
|
||||
# set the device to CUDA
|
||||
regressor.set_params(tree_method="hist", device="cuda")
|
||||
# assigning client here is optional
|
||||
regressor.client = client
|
||||
|
||||
@@ -31,13 +32,13 @@ def main(client):
|
||||
bst = regressor.get_booster()
|
||||
history = regressor.evals_result()
|
||||
|
||||
print('Evaluation history:', history)
|
||||
print("Evaluation history:", history)
|
||||
# returned prediction is always a dask array.
|
||||
assert isinstance(prediction, da.Array)
|
||||
return bst # returning the trained model
|
||||
return bst # returning the trained model
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
# With dask cuda, one can scale up XGBoost to arbitrary GPU clusters.
|
||||
# `LocalCUDACluster` used here is only for demonstration purpose.
|
||||
with LocalCUDACluster() as cluster:
|
||||
|
||||
@@ -71,7 +71,8 @@ def custom_callback():
|
||||
{
|
||||
'objective': 'binary:logistic',
|
||||
'eval_metric': ['error', 'rmse'],
|
||||
'tree_method': 'gpu_hist'
|
||||
'tree_method': 'hist',
|
||||
"device": "cuda",
|
||||
},
|
||||
D_train,
|
||||
evals=[(D_train, 'Train'), (D_valid, 'Valid')],
|
||||
|
||||
@@ -63,7 +63,8 @@ def load_cat_in_the_dat() -> tuple[pd.DataFrame, pd.Series]:
|
||||
|
||||
|
||||
params = {
|
||||
"tree_method": "gpu_hist",
|
||||
"tree_method": "hist",
|
||||
"device": "cuda",
|
||||
"n_estimators": 32,
|
||||
"colsample_bylevel": 0.7,
|
||||
}
|
||||
|
||||
@@ -58,13 +58,13 @@ def main() -> None:
|
||||
# Specify `enable_categorical` to True, also we use onehot encoding based split
|
||||
# here for demonstration. For details see the document of `max_cat_to_onehot`.
|
||||
reg = xgb.XGBRegressor(
|
||||
tree_method="gpu_hist", enable_categorical=True, max_cat_to_onehot=5
|
||||
tree_method="hist", enable_categorical=True, max_cat_to_onehot=5, device="cuda"
|
||||
)
|
||||
reg.fit(X, y, eval_set=[(X, y)])
|
||||
|
||||
# Pass in already encoded data
|
||||
X_enc, y_enc = make_categorical(100, 10, 4, True)
|
||||
reg_enc = xgb.XGBRegressor(tree_method="gpu_hist")
|
||||
reg_enc = xgb.XGBRegressor(tree_method="hist", device="cuda")
|
||||
reg_enc.fit(X_enc, y_enc, eval_set=[(X_enc, y_enc)])
|
||||
|
||||
reg_results = np.array(reg.evals_result()["validation_0"]["rmse"])
|
||||
|
||||
@@ -82,8 +82,9 @@ def main(tmpdir: str) -> xgboost.Booster:
|
||||
missing = np.NaN
|
||||
Xy = xgboost.DMatrix(it, missing=missing, enable_categorical=False)
|
||||
|
||||
# Other tree methods including ``approx``, and ``gpu_hist`` are supported. GPU
|
||||
# behaves differently than CPU tree methods. See tutorial in doc for details.
|
||||
# ``approx`` is also supported, but less efficient due to sketching. GPU behaves
|
||||
# differently than CPU tree methods as it uses a hybrid approach. See tutorial in
|
||||
# doc for details.
|
||||
booster = xgboost.train(
|
||||
{"tree_method": "hist", "max_depth": 4},
|
||||
Xy,
|
||||
|
||||
@@ -104,7 +104,8 @@ def ranking_demo(args: argparse.Namespace) -> None:
|
||||
qid_test = qid_test[sorted_idx]
|
||||
|
||||
ranker = xgb.XGBRanker(
|
||||
tree_method="gpu_hist",
|
||||
tree_method="hist",
|
||||
device="cuda",
|
||||
lambdarank_pair_method="topk",
|
||||
lambdarank_num_pair_per_sample=13,
|
||||
eval_metric=["ndcg@1", "ndcg@8"],
|
||||
@@ -161,7 +162,8 @@ def click_data_demo(args: argparse.Namespace) -> None:
|
||||
|
||||
ranker = xgb.XGBRanker(
|
||||
n_estimators=512,
|
||||
tree_method="gpu_hist",
|
||||
tree_method="hist",
|
||||
device="cuda",
|
||||
learning_rate=0.01,
|
||||
reg_lambda=1.5,
|
||||
subsample=0.8,
|
||||
|
||||
@@ -23,22 +23,23 @@ import numpy
|
||||
import xgboost
|
||||
|
||||
COLS = 64
|
||||
ROWS_PER_BATCH = 1000 # data is splited by rows
|
||||
ROWS_PER_BATCH = 1000 # data is splited by rows
|
||||
BATCHES = 32
|
||||
|
||||
|
||||
class IterForDMatrixDemo(xgboost.core.DataIter):
|
||||
'''A data iterator for XGBoost DMatrix.
|
||||
"""A data iterator for XGBoost DMatrix.
|
||||
|
||||
`reset` and `next` are required for any data iterator, other functions here
|
||||
are utilites for demonstration's purpose.
|
||||
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
'''Generate some random data for demostration.
|
||||
"""Generate some random data for demostration.
|
||||
|
||||
Actual data can be anything that is currently supported by XGBoost.
|
||||
'''
|
||||
"""
|
||||
self.rows = ROWS_PER_BATCH
|
||||
self.cols = COLS
|
||||
rng = cupy.random.RandomState(1994)
|
||||
@@ -46,7 +47,7 @@ class IterForDMatrixDemo(xgboost.core.DataIter):
|
||||
self._labels = [rng.randn(self.rows)] * BATCHES
|
||||
self._weights = [rng.uniform(size=self.rows)] * BATCHES
|
||||
|
||||
self.it = 0 # set iterator to 0
|
||||
self.it = 0 # set iterator to 0
|
||||
super().__init__()
|
||||
|
||||
def as_array(self):
|
||||
@@ -59,27 +60,26 @@ class IterForDMatrixDemo(xgboost.core.DataIter):
|
||||
return cupy.concatenate(self._weights)
|
||||
|
||||
def data(self):
|
||||
'''Utility function for obtaining current batch of data.'''
|
||||
"""Utility function for obtaining current batch of data."""
|
||||
return self._data[self.it]
|
||||
|
||||
def labels(self):
|
||||
'''Utility function for obtaining current batch of label.'''
|
||||
"""Utility function for obtaining current batch of label."""
|
||||
return self._labels[self.it]
|
||||
|
||||
def weights(self):
|
||||
return self._weights[self.it]
|
||||
|
||||
def reset(self):
|
||||
'''Reset the iterator'''
|
||||
"""Reset the iterator"""
|
||||
self.it = 0
|
||||
|
||||
def next(self, input_data):
|
||||
'''Yield next batch of data.'''
|
||||
"""Yield next batch of data."""
|
||||
if self.it == len(self._data):
|
||||
# Return 0 when there's no more batch.
|
||||
return 0
|
||||
input_data(data=self.data(), label=self.labels(),
|
||||
weight=self.weights())
|
||||
input_data(data=self.data(), label=self.labels(), weight=self.weights())
|
||||
self.it += 1
|
||||
return 1
|
||||
|
||||
@@ -103,18 +103,19 @@ def main():
|
||||
|
||||
assert m_with_it.num_col() == m.num_col()
|
||||
assert m_with_it.num_row() == m.num_row()
|
||||
# Tree meethod must be one of the `hist` or `gpu_hist`. We use `gpu_hist` for GPU
|
||||
# input here.
|
||||
# Tree meethod must be `hist`.
|
||||
reg_with_it = xgboost.train(
|
||||
{"tree_method": "gpu_hist"}, m_with_it, num_boost_round=rounds
|
||||
{"tree_method": "hist", "device": "cuda"}, m_with_it, num_boost_round=rounds
|
||||
)
|
||||
predict_with_it = reg_with_it.predict(m_with_it)
|
||||
|
||||
reg = xgboost.train({"tree_method": "gpu_hist"}, m, num_boost_round=rounds)
|
||||
reg = xgboost.train(
|
||||
{"tree_method": "hist", "device": "cuda"}, m, num_boost_round=rounds
|
||||
)
|
||||
predict = reg.predict(m)
|
||||
|
||||
numpy.testing.assert_allclose(predict_with_it, predict, rtol=1e6)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -24,7 +24,7 @@ def main():
|
||||
Xy = xgb.DMatrix(X_train, y_train)
|
||||
evals_result: xgb.callback.EvaluationMonitor.EvalsLog = {}
|
||||
booster = xgb.train(
|
||||
{"tree_method": "gpu_hist", "max_depth": 6},
|
||||
{"tree_method": "hist", "max_depth": 6, "device": "cuda"},
|
||||
Xy,
|
||||
num_boost_round=n_rounds,
|
||||
evals=[(Xy, "Train")],
|
||||
@@ -33,8 +33,8 @@ def main():
|
||||
SHAP = booster.predict(Xy, pred_contribs=True)
|
||||
|
||||
# Refresh the leaf value and tree statistic
|
||||
X_refresh = X[X.shape[0] // 2:]
|
||||
y_refresh = y[y.shape[0] // 2:]
|
||||
X_refresh = X[X.shape[0] // 2 :]
|
||||
y_refresh = y[y.shape[0] // 2 :]
|
||||
Xy_refresh = xgb.DMatrix(X_refresh, y_refresh)
|
||||
# The model will adapt to other half of the data by changing leaf value (no change in
|
||||
# split condition) with refresh_leaf set to True.
|
||||
@@ -87,7 +87,7 @@ def main():
|
||||
np.testing.assert_allclose(
|
||||
np.array(prune_result["Original"]["rmse"]),
|
||||
np.array(prune_result["Train"]["rmse"]),
|
||||
atol=1e-5
|
||||
atol=1e-5,
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user