merge latest changes

This commit is contained in:
Hui Liu
2023-12-13 21:06:28 -08:00
194 changed files with 4859 additions and 2838 deletions

View File

@@ -9,7 +9,7 @@ import os
import dask.dataframe as dd
from dask.distributed import Client, LocalCluster
import xgboost as xgb
from xgboost import dask as dxgb
from xgboost.dask import DaskDMatrix
@@ -48,14 +48,14 @@ def main(client):
"lambda": 0.01,
"alpha": 0.02,
}
output = xgb.dask.train(
output = dxgb.train(
client, params, dtrain, num_boost_round=100, evals=[(dtrain, "train")]
)
bst = output["booster"]
history = output["history"]
# you can pass output directly into `predict` too.
prediction = xgb.dask.predict(client, bst, dtrain)
prediction = dxgb.predict(client, bst, dtrain)
print("Evaluation history: ", history)
# Uncomment the following line to save the model to the disk

View File

@@ -6,7 +6,7 @@ Example of training with Dask on CPU
from dask import array as da
from dask.distributed import Client, LocalCluster
import xgboost as xgb
from xgboost import dask as dxgb
from xgboost.dask import DaskDMatrix
@@ -25,7 +25,7 @@ def main(client):
# distributed version of train returns a dictionary containing the
# resulting booster and evaluation history obtained from
# evaluation metrics.
output = xgb.dask.train(
output = dxgb.train(
client,
{"verbosity": 1, "tree_method": "hist"},
dtrain,
@@ -36,7 +36,7 @@ def main(client):
history = output["history"]
# you can pass output directly into `predict` too.
prediction = xgb.dask.predict(client, bst, dtrain)
prediction = dxgb.predict(client, bst, dtrain)
print("Evaluation history:", history)
return prediction

View File

@@ -8,6 +8,7 @@ from dask_ml.datasets import make_regression
from dask_ml.model_selection import train_test_split
import xgboost as xgb
import xgboost.dask as dxgb
from xgboost.dask import DaskDMatrix
@@ -61,7 +62,7 @@ def main(client):
dtrain = DaskDMatrix(client, X_train, y_train)
dtest = DaskDMatrix(client, X_test, y_test)
output = xgb.dask.train(
output = dxgb.train(
client,
{
"verbosity": 1,

View File

@@ -8,7 +8,6 @@ from dask import dataframe as dd
from dask.distributed import Client
from dask_cuda import LocalCUDACluster
import xgboost as xgb
from xgboost import dask as dxgb
from xgboost.dask import DaskDMatrix
@@ -21,7 +20,7 @@ def using_dask_matrix(client: Client, X: da.Array, y: da.Array) -> da.Array:
# Use train method from xgboost.dask instead of xgboost. This distributed version
# of train returns a dictionary containing the resulting booster and evaluation
# history obtained from evaluation metrics.
output = xgb.dask.train(
output = dxgb.train(
client,
{
"verbosity": 2,
@@ -37,7 +36,7 @@ def using_dask_matrix(client: Client, X: da.Array, y: da.Array) -> da.Array:
history = output["history"]
# you can pass output directly into `predict` too.
prediction = xgb.dask.predict(client, bst, dtrain)
prediction = dxgb.predict(client, bst, dtrain)
print("Evaluation history:", history)
return prediction
@@ -56,14 +55,14 @@ def using_quantile_device_dmatrix(client: Client, X: da.Array, y: da.Array) -> d
# be used for anything else other than training unless a reference is specified. See
# the `ref` argument of `DaskQuantileDMatrix`.
dtrain = dxgb.DaskQuantileDMatrix(client, X, y)
output = xgb.dask.train(
output = dxgb.train(
client,
{"verbosity": 2, "tree_method": "hist", "device": "cuda"},
dtrain,
num_boost_round=4,
)
prediction = xgb.dask.predict(client, output, X)
prediction = dxgb.predict(client, output, X)
return prediction

View File

@@ -5,7 +5,7 @@ Use scikit-learn regressor interface with CPU histogram tree method
from dask import array as da
from dask.distributed import Client, LocalCluster
import xgboost
from xgboost import dask as dxgb
def main(client):
@@ -16,7 +16,7 @@ def main(client):
X = da.random.random((m, n), partition_size)
y = da.random.random(m, partition_size)
regressor = xgboost.dask.DaskXGBRegressor(verbosity=1, n_estimators=2)
regressor = dxgb.DaskXGBRegressor(verbosity=1, n_estimators=2)
regressor.set_params(tree_method="hist")
# assigning client here is optional
regressor.client = client

View File

@@ -9,7 +9,7 @@ from dask.distributed import Client
# It's recommended to use dask_cuda for GPU assignment
from dask_cuda import LocalCUDACluster
import xgboost
from xgboost import dask as dxgb
def main(client):
@@ -20,7 +20,7 @@ def main(client):
X = da.random.random((m, n), partition_size)
y = da.random.random(m, partition_size)
regressor = xgboost.dask.DaskXGBRegressor(verbosity=1)
regressor = dxgb.DaskXGBRegressor(verbosity=1)
# set the device to CUDA
regressor.set_params(tree_method="hist", device="cuda")
# assigning client here is optional