[dask] Change document to avoid using default import. (#9742)

This aligns dask with pyspark, users need to explicitly call:

```
from xgboost.dask import DaskXGBClassifier
from xgboost import dask as dxgb
```

In future releases, we might stop using the default import and remove the lazy loader.
This commit is contained in:
Jiaming Yuan
2023-11-07 02:44:39 +08:00
committed by GitHub
parent 093b675838
commit 98238d63fa
7 changed files with 51 additions and 48 deletions

View File

@@ -9,7 +9,7 @@ import os
import dask.dataframe as dd
from dask.distributed import Client, LocalCluster
import xgboost as xgb
from xgboost import dask as dxgb
from xgboost.dask import DaskDMatrix
@@ -48,14 +48,14 @@ def main(client):
"lambda": 0.01,
"alpha": 0.02,
}
output = xgb.dask.train(
output = dxgb.train(
client, params, dtrain, num_boost_round=100, evals=[(dtrain, "train")]
)
bst = output["booster"]
history = output["history"]
# you can pass output directly into `predict` too.
prediction = xgb.dask.predict(client, bst, dtrain)
prediction = dxgb.predict(client, bst, dtrain)
print("Evaluation history: ", history)
# Uncomment the following line to save the model to the disk

View File

@@ -6,7 +6,7 @@ Example of training with Dask on CPU
from dask import array as da
from dask.distributed import Client, LocalCluster
import xgboost as xgb
from xgboost import dask as dxgb
from xgboost.dask import DaskDMatrix
@@ -25,7 +25,7 @@ def main(client):
# distributed version of train returns a dictionary containing the
# resulting booster and evaluation history obtained from
# evaluation metrics.
output = xgb.dask.train(
output = dxgb.train(
client,
{"verbosity": 1, "tree_method": "hist"},
dtrain,
@@ -36,7 +36,7 @@ def main(client):
history = output["history"]
# you can pass output directly into `predict` too.
prediction = xgb.dask.predict(client, bst, dtrain)
prediction = dxgb.predict(client, bst, dtrain)
print("Evaluation history:", history)
return prediction

View File

@@ -8,6 +8,7 @@ from dask_ml.datasets import make_regression
from dask_ml.model_selection import train_test_split
import xgboost as xgb
import xgboost.dask as dxgb
from xgboost.dask import DaskDMatrix
@@ -61,7 +62,7 @@ def main(client):
dtrain = DaskDMatrix(client, X_train, y_train)
dtest = DaskDMatrix(client, X_test, y_test)
output = xgb.dask.train(
output = dxgb.train(
client,
{
"verbosity": 1,

View File

@@ -8,7 +8,6 @@ from dask import dataframe as dd
from dask.distributed import Client
from dask_cuda import LocalCUDACluster
import xgboost as xgb
from xgboost import dask as dxgb
from xgboost.dask import DaskDMatrix
@@ -21,7 +20,7 @@ def using_dask_matrix(client: Client, X: da.Array, y: da.Array) -> da.Array:
# Use train method from xgboost.dask instead of xgboost. This distributed version
# of train returns a dictionary containing the resulting booster and evaluation
# history obtained from evaluation metrics.
output = xgb.dask.train(
output = dxgb.train(
client,
{
"verbosity": 2,
@@ -37,7 +36,7 @@ def using_dask_matrix(client: Client, X: da.Array, y: da.Array) -> da.Array:
history = output["history"]
# you can pass output directly into `predict` too.
prediction = xgb.dask.predict(client, bst, dtrain)
prediction = dxgb.predict(client, bst, dtrain)
print("Evaluation history:", history)
return prediction
@@ -56,14 +55,14 @@ def using_quantile_device_dmatrix(client: Client, X: da.Array, y: da.Array) -> d
# be used for anything else other than training unless a reference is specified. See
# the `ref` argument of `DaskQuantileDMatrix`.
dtrain = dxgb.DaskQuantileDMatrix(client, X, y)
output = xgb.dask.train(
output = dxgb.train(
client,
{"verbosity": 2, "tree_method": "hist", "device": "cuda"},
dtrain,
num_boost_round=4,
)
prediction = xgb.dask.predict(client, output, X)
prediction = dxgb.predict(client, output, X)
return prediction

View File

@@ -5,7 +5,7 @@ Use scikit-learn regressor interface with CPU histogram tree method
from dask import array as da
from dask.distributed import Client, LocalCluster
import xgboost
from xgboost import dask as dxgb
def main(client):
@@ -16,7 +16,7 @@ def main(client):
X = da.random.random((m, n), partition_size)
y = da.random.random(m, partition_size)
regressor = xgboost.dask.DaskXGBRegressor(verbosity=1, n_estimators=2)
regressor = dxgb.DaskXGBRegressor(verbosity=1, n_estimators=2)
regressor.set_params(tree_method="hist")
# assigning client here is optional
regressor.client = client

View File

@@ -9,7 +9,7 @@ from dask.distributed import Client
# It's recommended to use dask_cuda for GPU assignment
from dask_cuda import LocalCUDACluster
import xgboost
from xgboost import dask as dxgb
def main(client):
@@ -20,7 +20,7 @@ def main(client):
X = da.random.random((m, n), partition_size)
y = da.random.random(m, partition_size)
regressor = xgboost.dask.DaskXGBRegressor(verbosity=1)
regressor = dxgb.DaskXGBRegressor(verbosity=1)
# set the device to CUDA
regressor.set_params(tree_method="hist", device="cuda")
# assigning client here is optional