sync upstream code

This commit is contained in:
Hui Liu
2024-03-20 16:14:38 -07:00
75 changed files with 754 additions and 312 deletions

View File

@@ -252,7 +252,7 @@ class TestDistributedGPU:
X_onehot, _ = make_categorical(local_cuda_client, 10000, 30, 13, True)
X_onehot = dask_cudf.from_dask_dataframe(X_onehot)
run_categorical(local_cuda_client, "gpu_hist", X, X_onehot, y)
run_categorical(local_cuda_client, "hist", "cuda", X, X_onehot, y)
@given(
params=hist_parameter_strategy,

View File

@@ -315,8 +315,15 @@ def test_dask_sparse(client: "Client") -> None:
)
def run_categorical(client: "Client", tree_method: str, X, X_onehot, y) -> None:
parameters = {"tree_method": tree_method, "max_cat_to_onehot": 9999} # force onehot
def run_categorical(
client: "Client", tree_method: str, device: str, X, X_onehot, y
) -> None:
# Force onehot
parameters = {
"tree_method": tree_method,
"device": device,
"max_cat_to_onehot": 9999,
}
rounds = 10
m = xgb.dask.DaskDMatrix(client, X_onehot, y, enable_categorical=True)
by_etl_results = xgb.dask.train(
@@ -364,6 +371,7 @@ def run_categorical(client: "Client", tree_method: str, X, X_onehot, y) -> None:
enable_categorical=True,
n_estimators=10,
tree_method=tree_method,
device=device,
# force onehot
max_cat_to_onehot=9999,
)
@@ -378,7 +386,10 @@ def run_categorical(client: "Client", tree_method: str, X, X_onehot, y) -> None:
reg.fit(X, y)
# check partition based
reg = xgb.dask.DaskXGBRegressor(
enable_categorical=True, n_estimators=10, tree_method=tree_method
enable_categorical=True,
n_estimators=10,
tree_method=tree_method,
device=device,
)
reg.fit(X, y, eval_set=[(X, y)])
assert tm.non_increasing(reg.evals_result()["validation_0"]["rmse"])
@@ -398,8 +409,8 @@ def run_categorical(client: "Client", tree_method: str, X, X_onehot, y) -> None:
def test_categorical(client: "Client") -> None:
X, y = make_categorical(client, 10000, 30, 13)
X_onehot, _ = make_categorical(client, 10000, 30, 13, True)
run_categorical(client, "approx", X, X_onehot, y)
run_categorical(client, "hist", X, X_onehot, y)
run_categorical(client, "approx", "cpu", X, X_onehot, y)
run_categorical(client, "hist", "cpu", X, X_onehot, y)
ft = ["c"] * X.shape[1]
reg = xgb.dask.DaskXGBRegressor(