[breaking] Remove deprecated parameters in the skl interface. (#9986)

This commit is contained in:
Jiaming Yuan
2024-01-15 20:40:05 +08:00
committed by GitHub
parent 2de85d3241
commit 0798e36d73
16 changed files with 418 additions and 462 deletions

View File

@@ -363,12 +363,12 @@ class TestDistributedGPU:
device="cuda",
eval_metric="error",
n_estimators=100,
early_stopping_rounds=early_stopping_rounds,
)
cls.client = local_cuda_client
cls.fit(
X,
y,
early_stopping_rounds=early_stopping_rounds,
eval_set=[(valid_X, valid_y)],
)
booster = cls.get_booster()

View File

@@ -937,8 +937,10 @@ def run_empty_dmatrix_auc(client: "Client", device: str, n_workers: int) -> None
valid_X = dd.from_array(valid_X_, chunksize=n_samples)
valid_y = dd.from_array(valid_y_, chunksize=n_samples)
cls = xgb.dask.DaskXGBClassifier(device=device, n_estimators=2)
cls.fit(X, y, eval_metric=["auc", "aucpr"], eval_set=[(valid_X, valid_y)])
cls = xgb.dask.DaskXGBClassifier(
device=device, n_estimators=2, eval_metric=["auc", "aucpr"]
)
cls.fit(X, y, eval_set=[(valid_X, valid_y)])
# multiclass
X_, y_ = make_classification(
@@ -966,8 +968,10 @@ def run_empty_dmatrix_auc(client: "Client", device: str, n_workers: int) -> None
valid_X = dd.from_array(valid_X_, chunksize=n_samples)
valid_y = dd.from_array(valid_y_, chunksize=n_samples)
cls = xgb.dask.DaskXGBClassifier(device=device, n_estimators=2)
cls.fit(X, y, eval_metric=["auc", "aucpr"], eval_set=[(valid_X, valid_y)])
cls = xgb.dask.DaskXGBClassifier(
device=device, n_estimators=2, eval_metric=["auc", "aucpr"]
)
cls.fit(X, y, eval_set=[(valid_X, valid_y)])
def test_empty_dmatrix_auc() -> None:
@@ -994,11 +998,11 @@ def run_auc(client: "Client", device: str) -> None:
valid_X = dd.from_array(valid_X_, chunksize=10)
valid_y = dd.from_array(valid_y_, chunksize=10)
cls = xgb.XGBClassifier(device=device, n_estimators=2)
cls.fit(X_, y_, eval_metric="auc", eval_set=[(valid_X_, valid_y_)])
cls = xgb.XGBClassifier(device=device, n_estimators=2, eval_metric="auc")
cls.fit(X_, y_, eval_set=[(valid_X_, valid_y_)])
dcls = xgb.dask.DaskXGBClassifier(device=device, n_estimators=2)
dcls.fit(X, y, eval_metric="auc", eval_set=[(valid_X, valid_y)])
dcls = xgb.dask.DaskXGBClassifier(device=device, n_estimators=2, eval_metric="auc")
dcls.fit(X, y, eval_set=[(valid_X, valid_y)])
approx = dcls.evals_result()["validation_0"]["auc"]
exact = cls.evals_result()["validation_0"]["auc"]
@@ -1267,16 +1271,16 @@ def test_dask_ranking(client: "Client") -> None:
qid_valid = qid_valid.astype(np.uint32)
qid_test = qid_test.astype(np.uint32)
rank = xgb.dask.DaskXGBRanker(n_estimators=2500)
rank = xgb.dask.DaskXGBRanker(
n_estimators=2500, eval_metric=["ndcg"], early_stopping_rounds=10
)
rank.fit(
x_train,
y_train,
qid=qid_train,
eval_set=[(x_test, y_test), (x_train, y_train)],
eval_qid=[qid_test, qid_train],
eval_metric=["ndcg"],
verbose=True,
early_stopping_rounds=10,
)
assert rank.n_features_in_ == 46
assert rank.best_score > 0.98
@@ -2150,13 +2154,15 @@ class TestDaskCallbacks:
valid_X, valid_y = load_breast_cancer(return_X_y=True)
valid_X, valid_y = da.from_array(valid_X), da.from_array(valid_y)
cls = xgb.dask.DaskXGBClassifier(
objective="binary:logistic", tree_method="hist", n_estimators=1000
objective="binary:logistic",
tree_method="hist",
n_estimators=1000,
early_stopping_rounds=early_stopping_rounds,
)
cls.client = client
cls.fit(
X,
y,
early_stopping_rounds=early_stopping_rounds,
eval_set=[(valid_X, valid_y)],
)
booster = cls.get_booster()
@@ -2165,15 +2171,17 @@ class TestDaskCallbacks:
# Specify the metric
cls = xgb.dask.DaskXGBClassifier(
objective="binary:logistic", tree_method="hist", n_estimators=1000
objective="binary:logistic",
tree_method="hist",
n_estimators=1000,
early_stopping_rounds=early_stopping_rounds,
eval_metric="error",
)
cls.client = client
cls.fit(
X,
y,
early_stopping_rounds=early_stopping_rounds,
eval_set=[(valid_X, valid_y)],
eval_metric="error",
)
assert tm.non_increasing(cls.evals_result()["validation_0"]["error"])
booster = cls.get_booster()
@@ -2215,12 +2223,12 @@ class TestDaskCallbacks:
tree_method="hist",
n_estimators=1000,
eval_metric=tm.eval_error_metric_skl,
early_stopping_rounds=early_stopping_rounds,
)
cls.client = client
cls.fit(
X,
y,
early_stopping_rounds=early_stopping_rounds,
eval_set=[(valid_X, valid_y)],
)
booster = cls.get_booster()
@@ -2234,21 +2242,22 @@ class TestDaskCallbacks:
X, y = load_breast_cancer(return_X_y=True)
X, y = da.from_array(X), da.from_array(y)
cls = xgb.dask.DaskXGBClassifier(
objective="binary:logistic", tree_method="hist", n_estimators=10
)
cls.client = client
with tempfile.TemporaryDirectory() as tmpdir:
cls.fit(
X,
y,
cls = xgb.dask.DaskXGBClassifier(
objective="binary:logistic",
tree_method="hist",
n_estimators=10,
callbacks=[
xgb.callback.TrainingCheckPoint(
directory=Path(tmpdir), interval=1, name="model"
)
],
)
cls.client = client
cls.fit(
X,
y,
)
for i in range(1, 10):
assert os.path.exists(
os.path.join(

View File

@@ -311,24 +311,20 @@ def clf_with_weight(
y_val = np.array([0, 1])
w_train = np.array([1.0, 2.0])
w_val = np.array([1.0, 2.0])
cls2 = XGBClassifier()
cls2 = XGBClassifier(eval_metric="logloss", early_stopping_rounds=1)
cls2.fit(
X_train,
y_train,
eval_set=[(X_val, y_val)],
early_stopping_rounds=1,
eval_metric="logloss",
)
cls3 = XGBClassifier()
cls3 = XGBClassifier(eval_metric="logloss", early_stopping_rounds=1)
cls3.fit(
X_train,
y_train,
sample_weight=w_train,
eval_set=[(X_val, y_val)],
sample_weight_eval_set=[w_val],
early_stopping_rounds=1,
eval_metric="logloss",
)
cls_df_train_with_eval_weight = spark.createDataFrame(