[Breaking] Remove Scikit-Learn default parameters (#5130)

* Simplify Scikit-Learn parameter management. * Copy base class for removing duplicated parameter signatures. * Set all parameters to None. * Handle None in set_param. * Extract the doc. Co-authored-by: Jiaming Yuan <jm.yuan@outlook.com>
2020-01-23 13:25:20 +01:00
parent aa9a68010b
commit b4f952bd22
6 changed files with 270 additions and 293 deletions
--- a/tests/python/test_early_stopping.py
+++ b/tests/python/test_early_stopping.py
@@ -22,17 +22,17 @@ class TestEarlyStopping(unittest.TestCase):
        y = digits['target']
        X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                            random_state=0)
-        clf1 = xgb.XGBClassifier()
+        clf1 = xgb.XGBClassifier(learning_rate=0.1)
        clf1.fit(X_train, y_train, early_stopping_rounds=5, eval_metric="auc",
                 eval_set=[(X_test, y_test)])
-        clf2 = xgb.XGBClassifier()
+        clf2 = xgb.XGBClassifier(learning_rate=0.1)
        clf2.fit(X_train, y_train, early_stopping_rounds=4, eval_metric="auc",
                 eval_set=[(X_test, y_test)])
        # should be the same
        assert clf1.best_score == clf2.best_score
        assert clf1.best_score != 1
        # check overfit
-        clf3 = xgb.XGBClassifier()
+        clf3 = xgb.XGBClassifier(learning_rate=0.1)
        clf3.fit(X_train, y_train, early_stopping_rounds=10, eval_metric="auc",
                 eval_set=[(X_test, y_test)])
        assert clf3.best_score == 1
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -6,6 +6,7 @@ import os
 import shutil
 import pytest
 import unittest
+import json

 rng = np.random.RandomState(1994)

@@ -117,9 +118,10 @@ def test_feature_importances_weight():
    digits = load_digits(2)
    y = digits['target']
    X = digits['data']
-    xgb_model = xgb.XGBClassifier(
-        random_state=0, tree_method="exact", importance_type="weight").fit(X, y)
-
+    xgb_model = xgb.XGBClassifier(random_state=0,
+                                  tree_method="exact",
+                                  learning_rate=0.1,
+                                  importance_type="weight").fit(X, y)
    exp = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.00833333, 0.,
                    0., 0., 0., 0., 0., 0., 0., 0.025, 0.14166667, 0., 0., 0.,
                    0., 0., 0., 0.00833333, 0.25833333, 0., 0., 0., 0.,
@@ -134,12 +136,16 @@ def test_feature_importances_weight():
    import pandas as pd
    y = pd.Series(digits['target'])
    X = pd.DataFrame(digits['data'])
-    xgb_model = xgb.XGBClassifier(
-        random_state=0, tree_method="exact", importance_type="weight").fit(X, y)
+    xgb_model = xgb.XGBClassifier(random_state=0,
+                                  tree_method="exact",
+                                  learning_rate=0.1,
+                                  importance_type="weight").fit(X, y)
    np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)

-    xgb_model = xgb.XGBClassifier(
-        random_state=0, tree_method="exact", importance_type="weight").fit(X, y)
+    xgb_model = xgb.XGBClassifier(random_state=0,
+                                  tree_method="exact",
+                                  learning_rate=0.1,
+                                  importance_type="weight").fit(X, y)
    np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)


@@ -151,7 +157,9 @@ def test_feature_importances_gain():
    y = digits['target']
    X = digits['data']
    xgb_model = xgb.XGBClassifier(
-        random_state=0, tree_method="exact", importance_type="gain").fit(X, y)
+        random_state=0, tree_method="exact",
+        learning_rate=0.1,
+        importance_type="gain").fit(X, y)

    exp = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
                    0.00326159, 0., 0., 0., 0., 0., 0., 0., 0.,
@@ -169,11 +177,15 @@ def test_feature_importances_gain():
    y = pd.Series(digits['target'])
    X = pd.DataFrame(digits['data'])
    xgb_model = xgb.XGBClassifier(
-        random_state=0, tree_method="exact", importance_type="gain").fit(X, y)
+        random_state=0, tree_method="exact",
+        learning_rate=0.1,
+        importance_type="gain").fit(X, y)
    np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)

    xgb_model = xgb.XGBClassifier(
-        random_state=0, tree_method="exact", importance_type="gain").fit(X, y)
+        random_state=0, tree_method="exact",
+        learning_rate=0.1,
+        importance_type="gain").fit(X, y)
    np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)


@@ -191,6 +203,10 @@ def test_num_parallel_tree():
    dump = bst.get_booster().get_dump(dump_format='json')
    assert len(dump) == 4

+    config = json.loads(bst.get_booster().save_config())
+    assert int(config['learner']['gradient_booster']['gbtree_train_param'][
+        'num_parallel_tree']) == 4
+

 def test_boston_housing_regression():
    from sklearn.metrics import mean_squared_error
@@ -244,7 +260,7 @@ def test_parameter_tuning():
    boston = load_boston()
    y = boston['target']
    X = boston['data']
-    xgb_model = xgb.XGBRegressor()
+    xgb_model = xgb.XGBRegressor(learning_rate=0.1)
    clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
                                   'n_estimators': [50, 100, 200]},
                       cv=3, verbose=1, iid=True)