[breaking] Remove deprecated parameters in the skl interface. (#9986)

2024-01-15 20:40:05 +08:00
parent 2de85d3241
commit 0798e36d73
16 changed files with 418 additions and 462 deletions
--- a/tests/python/test_callback.py
+++ b/tests/python/test_callback.py
@@ -16,13 +16,14 @@ class TestCallbacks:
    @classmethod
    def setup_class(cls):
        from sklearn.datasets import load_breast_cancer
+
        X, y = load_breast_cancer(return_X_y=True)
        cls.X = X
        cls.y = y

-        split = int(X.shape[0]*0.8)
-        cls.X_train = X[: split, ...]
-        cls.y_train = y[: split, ...]
+        split = int(X.shape[0] * 0.8)
+        cls.X_train = X[:split, ...]
+        cls.y_train = y[:split, ...]
        cls.X_valid = X[split:, ...]
        cls.y_valid = y[split:, ...]

@@ -31,31 +32,32 @@ class TestCallbacks:
        D_train: xgb.DMatrix,
        D_valid: xgb.DMatrix,
        rounds: int,
-        verbose_eval: Union[bool, int]
+        verbose_eval: Union[bool, int],
    ):
        def check_output(output: str) -> None:
            if int(verbose_eval) == 1:
                # Should print each iteration info
-                assert len(output.split('\n')) == rounds
+                assert len(output.split("\n")) == rounds
            elif int(verbose_eval) > rounds:
                # Should print first and latest iteration info
-                assert len(output.split('\n')) == 2
+                assert len(output.split("\n")) == 2
            else:
                # Should print info by each period additionaly to first and latest
                # iteration
                num_periods = rounds // int(verbose_eval)
                # Extra information is required for latest iteration
                is_extra_info_required = num_periods * int(verbose_eval) < (rounds - 1)
-                assert len(output.split('\n')) == (
+                assert len(output.split("\n")) == (
                    1 + num_periods + int(is_extra_info_required)
                )

        evals_result: xgb.callback.TrainingCallback.EvalsLog = {}
-        params = {'objective': 'binary:logistic', 'eval_metric': 'error'}
+        params = {"objective": "binary:logistic", "eval_metric": "error"}
        with tm.captured_output() as (out, err):
            xgb.train(
-                params, D_train,
-                evals=[(D_train, 'Train'), (D_valid, 'Valid')],
+                params,
+                D_train,
+                evals=[(D_train, "Train"), (D_valid, "Valid")],
                num_boost_round=rounds,
                evals_result=evals_result,
                verbose_eval=verbose_eval,
@@ -73,14 +75,16 @@ class TestCallbacks:
        D_valid = xgb.DMatrix(self.X_valid, self.y_valid)
        evals_result = {}
        rounds = 10
-        xgb.train({'objective': 'binary:logistic',
-                   'eval_metric': 'error'}, D_train,
-                  evals=[(D_train, 'Train'), (D_valid, 'Valid')],
-                  num_boost_round=rounds,
-                  evals_result=evals_result,
-                  verbose_eval=True)
-        assert len(evals_result['Train']['error']) == rounds
-        assert len(evals_result['Valid']['error']) == rounds
+        xgb.train(
+            {"objective": "binary:logistic", "eval_metric": "error"},
+            D_train,
+            evals=[(D_train, "Train"), (D_valid, "Valid")],
+            num_boost_round=rounds,
+            evals_result=evals_result,
+            verbose_eval=True,
+        )
+        assert len(evals_result["Train"]["error"]) == rounds
+        assert len(evals_result["Valid"]["error"]) == rounds

        self.run_evaluation_monitor(D_train, D_valid, rounds, True)
        self.run_evaluation_monitor(D_train, D_valid, rounds, 2)
@@ -93,72 +97,83 @@ class TestCallbacks:
        evals_result = {}
        rounds = 30
        early_stopping_rounds = 5
-        booster = xgb.train({'objective': 'binary:logistic',
-                             'eval_metric': 'error'}, D_train,
-                            evals=[(D_train, 'Train'), (D_valid, 'Valid')],
-                            num_boost_round=rounds,
-                            evals_result=evals_result,
-                            verbose_eval=True,
-                            early_stopping_rounds=early_stopping_rounds)
-        dump = booster.get_dump(dump_format='json')
+        booster = xgb.train(
+            {"objective": "binary:logistic", "eval_metric": "error"},
+            D_train,
+            evals=[(D_train, "Train"), (D_valid, "Valid")],
+            num_boost_round=rounds,
+            evals_result=evals_result,
+            verbose_eval=True,
+            early_stopping_rounds=early_stopping_rounds,
+        )
+        dump = booster.get_dump(dump_format="json")
        assert len(dump) - booster.best_iteration == early_stopping_rounds + 1

    def test_early_stopping_custom_eval(self):
        D_train = xgb.DMatrix(self.X_train, self.y_train)
        D_valid = xgb.DMatrix(self.X_valid, self.y_valid)
        early_stopping_rounds = 5
-        booster = xgb.train({'objective': 'binary:logistic',
-                             'eval_metric': 'error',
-                             'tree_method': 'hist'}, D_train,
-                            evals=[(D_train, 'Train'), (D_valid, 'Valid')],
-                            feval=tm.eval_error_metric,
-                            num_boost_round=1000,
-                            early_stopping_rounds=early_stopping_rounds,
-                            verbose_eval=False)
-        dump = booster.get_dump(dump_format='json')
+        booster = xgb.train(
+            {
+                "objective": "binary:logistic",
+                "eval_metric": "error",
+                "tree_method": "hist",
+            },
+            D_train,
+            evals=[(D_train, "Train"), (D_valid, "Valid")],
+            feval=tm.eval_error_metric,
+            num_boost_round=1000,
+            early_stopping_rounds=early_stopping_rounds,
+            verbose_eval=False,
+        )
+        dump = booster.get_dump(dump_format="json")
        assert len(dump) - booster.best_iteration == early_stopping_rounds + 1

    def test_early_stopping_customize(self):
        D_train = xgb.DMatrix(self.X_train, self.y_train)
        D_valid = xgb.DMatrix(self.X_valid, self.y_valid)
        early_stopping_rounds = 5
-        early_stop = xgb.callback.EarlyStopping(rounds=early_stopping_rounds,
-                                                metric_name='CustomErr',
-                                                data_name='Train')
+        early_stop = xgb.callback.EarlyStopping(
+            rounds=early_stopping_rounds, metric_name="CustomErr", data_name="Train"
+        )
        # Specify which dataset and which metric should be used for early stopping.
        booster = xgb.train(
-            {'objective': 'binary:logistic',
-             'eval_metric': ['error', 'rmse'],
-             'tree_method': 'hist'}, D_train,
-            evals=[(D_train, 'Train'), (D_valid, 'Valid')],
+            {
+                "objective": "binary:logistic",
+                "eval_metric": ["error", "rmse"],
+                "tree_method": "hist",
+            },
+            D_train,
+            evals=[(D_train, "Train"), (D_valid, "Valid")],
            feval=tm.eval_error_metric,
            num_boost_round=1000,
            callbacks=[early_stop],
-            verbose_eval=False)
-        dump = booster.get_dump(dump_format='json')
+            verbose_eval=False,
+        )
+        dump = booster.get_dump(dump_format="json")
        assert len(dump) - booster.best_iteration == early_stopping_rounds + 1
-        assert len(early_stop.stopping_history['Train']['CustomErr']) == len(dump)
+        assert len(early_stop.stopping_history["Train"]["CustomErr"]) == len(dump)

        rounds = 100
        early_stop = xgb.callback.EarlyStopping(
            rounds=early_stopping_rounds,
-            metric_name='CustomErr',
-            data_name='Train',
+            metric_name="CustomErr",
+            data_name="Train",
            min_delta=100,
            save_best=True,
        )
        booster = xgb.train(
            {
-                'objective': 'binary:logistic',
-                'eval_metric': ['error', 'rmse'],
-                'tree_method': 'hist'
+                "objective": "binary:logistic",
+                "eval_metric": ["error", "rmse"],
+                "tree_method": "hist",
            },
            D_train,
-            evals=[(D_train, 'Train'), (D_valid, 'Valid')],
+            evals=[(D_train, "Train"), (D_valid, "Valid")],
            feval=tm.eval_error_metric,
            num_boost_round=rounds,
            callbacks=[early_stop],
-            verbose_eval=False
+            verbose_eval=False,
        )
        # No iteration can be made with min_delta == 100
        assert booster.best_iteration == 0
@@ -166,18 +181,20 @@ class TestCallbacks:

    def test_early_stopping_skl(self):
        from sklearn.datasets import load_breast_cancer
+
        X, y = load_breast_cancer(return_X_y=True)
        early_stopping_rounds = 5
        cls = xgb.XGBClassifier(
-            early_stopping_rounds=early_stopping_rounds, eval_metric='error'
+            early_stopping_rounds=early_stopping_rounds, eval_metric="error"
        )
        cls.fit(X, y, eval_set=[(X, y)])
        booster = cls.get_booster()
-        dump = booster.get_dump(dump_format='json')
+        dump = booster.get_dump(dump_format="json")
        assert len(dump) - booster.best_iteration == early_stopping_rounds + 1

    def test_early_stopping_custom_eval_skl(self):
        from sklearn.datasets import load_breast_cancer
+
        X, y = load_breast_cancer(return_X_y=True)
        early_stopping_rounds = 5
        early_stop = xgb.callback.EarlyStopping(rounds=early_stopping_rounds)
@@ -186,11 +203,12 @@ class TestCallbacks:
        )
        cls.fit(X, y, eval_set=[(X, y)])
        booster = cls.get_booster()
-        dump = booster.get_dump(dump_format='json')
+        dump = booster.get_dump(dump_format="json")
        assert len(dump) - booster.best_iteration == early_stopping_rounds + 1

    def test_early_stopping_save_best_model(self):
        from sklearn.datasets import load_breast_cancer
+
        X, y = load_breast_cancer(return_X_y=True)
        n_estimators = 100
        early_stopping_rounds = 5
@@ -200,11 +218,11 @@ class TestCallbacks:
        cls = xgb.XGBClassifier(
            n_estimators=n_estimators,
            eval_metric=tm.eval_error_metric_skl,
-            callbacks=[early_stop]
+            callbacks=[early_stop],
        )
        cls.fit(X, y, eval_set=[(X, y)])
        booster = cls.get_booster()
-        dump = booster.get_dump(dump_format='json')
+        dump = booster.get_dump(dump_format="json")
        assert len(dump) == booster.best_iteration + 1

        early_stop = xgb.callback.EarlyStopping(
@@ -220,8 +238,9 @@ class TestCallbacks:
            cls.fit(X, y, eval_set=[(X, y)])

        # No error
-        early_stop = xgb.callback.EarlyStopping(rounds=early_stopping_rounds,
-                                                save_best=False)
+        early_stop = xgb.callback.EarlyStopping(
+            rounds=early_stopping_rounds, save_best=False
+        )
        xgb.XGBClassifier(
            booster="gblinear",
            n_estimators=10,
@@ -231,14 +250,17 @@ class TestCallbacks:

    def test_early_stopping_continuation(self):
        from sklearn.datasets import load_breast_cancer
+
        X, y = load_breast_cancer(return_X_y=True)
-        cls = xgb.XGBClassifier(eval_metric=tm.eval_error_metric_skl)
+
        early_stopping_rounds = 5
        early_stop = xgb.callback.EarlyStopping(
            rounds=early_stopping_rounds, save_best=True
        )
-        with pytest.warns(UserWarning):
-            cls.fit(X, y, eval_set=[(X, y)], callbacks=[early_stop])
+        cls = xgb.XGBClassifier(
+            eval_metric=tm.eval_error_metric_skl, callbacks=[early_stop]
+        )
+        cls.fit(X, y, eval_set=[(X, y)])

        booster = cls.get_booster()
        assert booster.num_boosted_rounds() == booster.best_iteration + 1
@@ -256,21 +278,10 @@ class TestCallbacks:
            )
            cls.fit(X, y, eval_set=[(X, y)])
            booster = cls.get_booster()
-            assert booster.num_boosted_rounds() == \
-                booster.best_iteration + early_stopping_rounds + 1
-
-    def test_deprecated(self):
-        from sklearn.datasets import load_breast_cancer
-        X, y = load_breast_cancer(return_X_y=True)
-        early_stopping_rounds = 5
-        early_stop = xgb.callback.EarlyStopping(
-            rounds=early_stopping_rounds, save_best=True
-        )
-        clf = xgb.XGBClassifier(
-            eval_metric=tm.eval_error_metric_skl, callbacks=[early_stop]
-        )
-        with pytest.raises(ValueError, match=r".*set_params.*"):
-            clf.fit(X, y, eval_set=[(X, y)], callbacks=[early_stop])
+            assert (
+                booster.num_boosted_rounds()
+                == booster.best_iteration + early_stopping_rounds + 1
+            )

    def run_eta_decay(self, tree_method):
        """Test learning rate scheduler, used by both CPU and GPU tests."""
@@ -343,7 +354,7 @@ class TestCallbacks:
            callbacks=[scheduler([0, 0, 0, 0])],
            evals_result=evals_result,
        )
-        eval_errors_2 = list(map(float, evals_result['eval']['error']))
+        eval_errors_2 = list(map(float, evals_result["eval"]["error"]))
        assert isinstance(bst, xgb.core.Booster)
        # validation error should not decrease, if eta/learning_rate = 0
        assert eval_errors_2[0] == eval_errors_2[-1]
@@ -361,7 +372,7 @@ class TestCallbacks:
            callbacks=[scheduler(eta_decay)],
            evals_result=evals_result,
        )
-        eval_errors_3 = list(map(float, evals_result['eval']['error']))
+        eval_errors_3 = list(map(float, evals_result["eval"]["error"]))

        assert isinstance(bst, xgb.core.Booster)