[dask] Update dask demo for using the new dask backend. (#10347)
This commit is contained in:
parent
e6eefea5e2
commit
c2e3d4f3cd
@ -3,7 +3,7 @@ Example of training with Dask on GPU
|
||||
====================================
|
||||
"""
|
||||
|
||||
import cupy as cp
|
||||
import dask
|
||||
import dask_cudf
|
||||
from dask import array as da
|
||||
from dask import dataframe as dd
|
||||
@ -24,12 +24,8 @@ def using_dask_matrix(client: Client, X: da.Array, y: da.Array) -> da.Array:
|
||||
# history obtained from evaluation metrics.
|
||||
output = dxgb.train(
|
||||
client,
|
||||
{
|
||||
"verbosity": 2,
|
||||
"tree_method": "hist",
|
||||
# Golden line for GPU training
|
||||
"device": "cuda",
|
||||
},
|
||||
# Make sure the device is set to CUDA.
|
||||
{"tree_method": "hist", "device": "cuda"},
|
||||
dtrain,
|
||||
num_boost_round=4,
|
||||
evals=[(dtrain, "train")],
|
||||
@ -50,18 +46,17 @@ def using_quantile_device_dmatrix(client: Client, X: da.Array, y: da.Array) -> d
|
||||
.. versionadded:: 1.2.0
|
||||
|
||||
"""
|
||||
X = dask_cudf.from_dask_dataframe(dd.from_dask_array(X))
|
||||
y = dask_cudf.from_dask_dataframe(dd.from_dask_array(y))
|
||||
|
||||
# `DaskQuantileDMatrix` is used instead of `DaskDMatrix`, be careful that it can not
|
||||
# be used for anything else other than training unless a reference is specified. See
|
||||
# the `ref` argument of `DaskQuantileDMatrix`.
|
||||
dtrain = dxgb.DaskQuantileDMatrix(client, X, y)
|
||||
output = dxgb.train(
|
||||
client,
|
||||
{"verbosity": 2, "tree_method": "hist", "device": "cuda"},
|
||||
# Make sure the device is set to CUDA.
|
||||
{"tree_method": "hist", "device": "cuda"},
|
||||
dtrain,
|
||||
num_boost_round=4,
|
||||
evals=[(dtrain, "train")],
|
||||
)
|
||||
|
||||
prediction = dxgb.predict(client, output, X)
|
||||
@ -72,15 +67,23 @@ if __name__ == "__main__":
|
||||
# `LocalCUDACluster` is used for assigning GPU to XGBoost processes. Here
|
||||
# `n_workers` represents the number of GPUs since we use one GPU per worker process.
|
||||
with LocalCUDACluster(n_workers=2, threads_per_worker=4) as cluster:
|
||||
with Client(cluster) as client:
|
||||
# generate some random data for demonstration
|
||||
# Create client from cluster, set the backend to GPU array (cupy).
|
||||
with Client(cluster) as client, dask.config.set({"array.backend": "cupy"}):
|
||||
# Generate some random data for demonstration
|
||||
rng = da.random.default_rng(1)
|
||||
|
||||
m = 100000
|
||||
m = 2**18
|
||||
n = 100
|
||||
X = rng.normal(size=(m, n))
|
||||
X = rng.uniform(size=(m, n), chunks=(128**2, -1))
|
||||
y = X.sum(axis=1)
|
||||
|
||||
X = dd.from_dask_array(X)
|
||||
y = dd.from_dask_array(y)
|
||||
# XGBoost can take arrays. This is to show that DataFrame uses the GPU
|
||||
# backend as well.
|
||||
assert isinstance(X, dask_cudf.DataFrame)
|
||||
assert isinstance(y, dask_cudf.Series)
|
||||
|
||||
print("Using DaskQuantileDMatrix")
|
||||
from_ddqdm = using_quantile_device_dmatrix(client, X, y)
|
||||
print("Using DMatrix")
|
||||
|
||||
@ -3,6 +3,7 @@ Use scikit-learn regressor interface with GPU histogram tree method
|
||||
===================================================================
|
||||
"""
|
||||
|
||||
import dask
|
||||
from dask import array as da
|
||||
from dask.distributed import Client
|
||||
|
||||
@ -13,17 +14,18 @@ from xgboost import dask as dxgb
|
||||
|
||||
|
||||
def main(client: Client) -> dxgb.Booster:
|
||||
# generate some random data for demonstration
|
||||
# Generate some random data for demonstration
|
||||
rng = da.random.default_rng(1)
|
||||
|
||||
m = 2**18
|
||||
n = 100
|
||||
m = 1000000
|
||||
partition_size = 10000
|
||||
X = da.random.random((m, n), partition_size)
|
||||
y = da.random.random(m, partition_size)
|
||||
X = rng.uniform(size=(m, n), chunks=(128**2, -1))
|
||||
y = X.sum(axis=1)
|
||||
|
||||
regressor = dxgb.DaskXGBRegressor(verbosity=1)
|
||||
# set the device to CUDA
|
||||
# Set the device to CUDA
|
||||
regressor.set_params(tree_method="hist", device="cuda")
|
||||
# assigning client here is optional
|
||||
# Assigning client here is optional
|
||||
regressor.client = client
|
||||
|
||||
regressor.fit(X, y, eval_set=[(X, y)])
|
||||
@ -42,5 +44,6 @@ if __name__ == "__main__":
|
||||
# With dask cuda, one can scale up XGBoost to arbitrary GPU clusters.
|
||||
# `LocalCUDACluster` used here is only for demonstration purpose.
|
||||
with LocalCUDACluster() as cluster:
|
||||
with Client(cluster) as client:
|
||||
# Create client from cluster, set the backend to GPU array (cupy).
|
||||
with Client(cluster) as client, dask.config.set({"array.backend": "cupy"}):
|
||||
main(client)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user