Disable dense optimization in hist for distributed training. (#9272)

This commit is contained in:
Jiaming Yuan
2023-06-10 02:31:34 +08:00
committed by GitHub
parent 8c1065f645
commit ea0deeca68
5 changed files with 44 additions and 10 deletions

View File

@@ -1,6 +1,8 @@
"""Tests for dask shared by different test modules."""
import numpy as np
import pandas as pd
from dask import array as da
from dask import dataframe as dd
from distributed import Client
import xgboost as xgb
@@ -52,3 +54,22 @@ def check_init_estimation(tree_method: str, client: Client) -> None:
"""Test init estimation."""
check_init_estimation_reg(tree_method, client)
check_init_estimation_clf(tree_method, client)
def check_uneven_nan(client: Client, tree_method: str, n_workers: int) -> None:
"""Issue #9271, not every worker has missing value."""
assert n_workers >= 2
with client.as_current():
clf = xgb.dask.DaskXGBClassifier(tree_method=tree_method)
X = pd.DataFrame({"a": range(10000), "b": range(10000, 0, -1)})
y = pd.Series([*[0] * 5000, *[1] * 5000])
X["a"][:3000:1000] = np.NaN
client.wait_for_workers(n_workers=n_workers)
clf.fit(
dd.from_pandas(X, npartitions=n_workers),
dd.from_pandas(y, npartitions=n_workers),
)