[dask] Update dask demo for using the new dask backend. (#10347)
This commit is contained in:
parent
e6eefea5e2
commit
c2e3d4f3cd
@ -3,7 +3,7 @@ Example of training with Dask on GPU
|
|||||||
====================================
|
====================================
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import cupy as cp
|
import dask
|
||||||
import dask_cudf
|
import dask_cudf
|
||||||
from dask import array as da
|
from dask import array as da
|
||||||
from dask import dataframe as dd
|
from dask import dataframe as dd
|
||||||
@ -24,12 +24,8 @@ def using_dask_matrix(client: Client, X: da.Array, y: da.Array) -> da.Array:
|
|||||||
# history obtained from evaluation metrics.
|
# history obtained from evaluation metrics.
|
||||||
output = dxgb.train(
|
output = dxgb.train(
|
||||||
client,
|
client,
|
||||||
{
|
# Make sure the device is set to CUDA.
|
||||||
"verbosity": 2,
|
{"tree_method": "hist", "device": "cuda"},
|
||||||
"tree_method": "hist",
|
|
||||||
# Golden line for GPU training
|
|
||||||
"device": "cuda",
|
|
||||||
},
|
|
||||||
dtrain,
|
dtrain,
|
||||||
num_boost_round=4,
|
num_boost_round=4,
|
||||||
evals=[(dtrain, "train")],
|
evals=[(dtrain, "train")],
|
||||||
@ -50,18 +46,17 @@ def using_quantile_device_dmatrix(client: Client, X: da.Array, y: da.Array) -> d
|
|||||||
.. versionadded:: 1.2.0
|
.. versionadded:: 1.2.0
|
||||||
|
|
||||||
"""
|
"""
|
||||||
X = dask_cudf.from_dask_dataframe(dd.from_dask_array(X))
|
|
||||||
y = dask_cudf.from_dask_dataframe(dd.from_dask_array(y))
|
|
||||||
|
|
||||||
# `DaskQuantileDMatrix` is used instead of `DaskDMatrix`, be careful that it can not
|
# `DaskQuantileDMatrix` is used instead of `DaskDMatrix`, be careful that it can not
|
||||||
# be used for anything else other than training unless a reference is specified. See
|
# be used for anything else other than training unless a reference is specified. See
|
||||||
# the `ref` argument of `DaskQuantileDMatrix`.
|
# the `ref` argument of `DaskQuantileDMatrix`.
|
||||||
dtrain = dxgb.DaskQuantileDMatrix(client, X, y)
|
dtrain = dxgb.DaskQuantileDMatrix(client, X, y)
|
||||||
output = dxgb.train(
|
output = dxgb.train(
|
||||||
client,
|
client,
|
||||||
{"verbosity": 2, "tree_method": "hist", "device": "cuda"},
|
# Make sure the device is set to CUDA.
|
||||||
|
{"tree_method": "hist", "device": "cuda"},
|
||||||
dtrain,
|
dtrain,
|
||||||
num_boost_round=4,
|
num_boost_round=4,
|
||||||
|
evals=[(dtrain, "train")],
|
||||||
)
|
)
|
||||||
|
|
||||||
prediction = dxgb.predict(client, output, X)
|
prediction = dxgb.predict(client, output, X)
|
||||||
@ -72,15 +67,23 @@ if __name__ == "__main__":
|
|||||||
# `LocalCUDACluster` is used for assigning GPU to XGBoost processes. Here
|
# `LocalCUDACluster` is used for assigning GPU to XGBoost processes. Here
|
||||||
# `n_workers` represents the number of GPUs since we use one GPU per worker process.
|
# `n_workers` represents the number of GPUs since we use one GPU per worker process.
|
||||||
with LocalCUDACluster(n_workers=2, threads_per_worker=4) as cluster:
|
with LocalCUDACluster(n_workers=2, threads_per_worker=4) as cluster:
|
||||||
with Client(cluster) as client:
|
# Create client from cluster, set the backend to GPU array (cupy).
|
||||||
# generate some random data for demonstration
|
with Client(cluster) as client, dask.config.set({"array.backend": "cupy"}):
|
||||||
|
# Generate some random data for demonstration
|
||||||
rng = da.random.default_rng(1)
|
rng = da.random.default_rng(1)
|
||||||
|
|
||||||
m = 100000
|
m = 2**18
|
||||||
n = 100
|
n = 100
|
||||||
X = rng.normal(size=(m, n))
|
X = rng.uniform(size=(m, n), chunks=(128**2, -1))
|
||||||
y = X.sum(axis=1)
|
y = X.sum(axis=1)
|
||||||
|
|
||||||
|
X = dd.from_dask_array(X)
|
||||||
|
y = dd.from_dask_array(y)
|
||||||
|
# XGBoost can take arrays. This is to show that DataFrame uses the GPU
|
||||||
|
# backend as well.
|
||||||
|
assert isinstance(X, dask_cudf.DataFrame)
|
||||||
|
assert isinstance(y, dask_cudf.Series)
|
||||||
|
|
||||||
print("Using DaskQuantileDMatrix")
|
print("Using DaskQuantileDMatrix")
|
||||||
from_ddqdm = using_quantile_device_dmatrix(client, X, y)
|
from_ddqdm = using_quantile_device_dmatrix(client, X, y)
|
||||||
print("Using DMatrix")
|
print("Using DMatrix")
|
||||||
|
|||||||
@ -3,6 +3,7 @@ Use scikit-learn regressor interface with GPU histogram tree method
|
|||||||
===================================================================
|
===================================================================
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import dask
|
||||||
from dask import array as da
|
from dask import array as da
|
||||||
from dask.distributed import Client
|
from dask.distributed import Client
|
||||||
|
|
||||||
@ -13,17 +14,18 @@ from xgboost import dask as dxgb
|
|||||||
|
|
||||||
|
|
||||||
def main(client: Client) -> dxgb.Booster:
|
def main(client: Client) -> dxgb.Booster:
|
||||||
# generate some random data for demonstration
|
# Generate some random data for demonstration
|
||||||
|
rng = da.random.default_rng(1)
|
||||||
|
|
||||||
|
m = 2**18
|
||||||
n = 100
|
n = 100
|
||||||
m = 1000000
|
X = rng.uniform(size=(m, n), chunks=(128**2, -1))
|
||||||
partition_size = 10000
|
y = X.sum(axis=1)
|
||||||
X = da.random.random((m, n), partition_size)
|
|
||||||
y = da.random.random(m, partition_size)
|
|
||||||
|
|
||||||
regressor = dxgb.DaskXGBRegressor(verbosity=1)
|
regressor = dxgb.DaskXGBRegressor(verbosity=1)
|
||||||
# set the device to CUDA
|
# Set the device to CUDA
|
||||||
regressor.set_params(tree_method="hist", device="cuda")
|
regressor.set_params(tree_method="hist", device="cuda")
|
||||||
# assigning client here is optional
|
# Assigning client here is optional
|
||||||
regressor.client = client
|
regressor.client = client
|
||||||
|
|
||||||
regressor.fit(X, y, eval_set=[(X, y)])
|
regressor.fit(X, y, eval_set=[(X, y)])
|
||||||
@ -42,5 +44,6 @@ if __name__ == "__main__":
|
|||||||
# With dask cuda, one can scale up XGBoost to arbitrary GPU clusters.
|
# With dask cuda, one can scale up XGBoost to arbitrary GPU clusters.
|
||||||
# `LocalCUDACluster` used here is only for demonstration purpose.
|
# `LocalCUDACluster` used here is only for demonstration purpose.
|
||||||
with LocalCUDACluster() as cluster:
|
with LocalCUDACluster() as cluster:
|
||||||
with Client(cluster) as client:
|
# Create client from cluster, set the backend to GPU array (cupy).
|
||||||
|
with Client(cluster) as client, dask.config.set({"array.backend": "cupy"}):
|
||||||
main(client)
|
main(client)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user