[Breaking] Remove Scikit-Learn default parameters (#5130)

* Simplify Scikit-Learn parameter management.

* Copy base class for removing duplicated parameter signatures.
* Set all parameters to None.
* Handle None in set_param.
* Extract the doc.

Co-authored-by: Jiaming Yuan <jm.yuan@outlook.com>
This commit is contained in:
OrdoAbChao
2020-01-23 13:25:20 +01:00
committed by Jiaming Yuan
parent aa9a68010b
commit b4f952bd22
6 changed files with 270 additions and 293 deletions

View File

@@ -22,17 +22,17 @@ class TestEarlyStopping(unittest.TestCase):
y = digits['target']
X_train, X_test, y_train, y_test = train_test_split(X, y,
random_state=0)
clf1 = xgb.XGBClassifier()
clf1 = xgb.XGBClassifier(learning_rate=0.1)
clf1.fit(X_train, y_train, early_stopping_rounds=5, eval_metric="auc",
eval_set=[(X_test, y_test)])
clf2 = xgb.XGBClassifier()
clf2 = xgb.XGBClassifier(learning_rate=0.1)
clf2.fit(X_train, y_train, early_stopping_rounds=4, eval_metric="auc",
eval_set=[(X_test, y_test)])
# should be the same
assert clf1.best_score == clf2.best_score
assert clf1.best_score != 1
# check overfit
clf3 = xgb.XGBClassifier()
clf3 = xgb.XGBClassifier(learning_rate=0.1)
clf3.fit(X_train, y_train, early_stopping_rounds=10, eval_metric="auc",
eval_set=[(X_test, y_test)])
assert clf3.best_score == 1

View File

@@ -6,6 +6,7 @@ import os
import shutil
import pytest
import unittest
import json
rng = np.random.RandomState(1994)
@@ -117,9 +118,10 @@ def test_feature_importances_weight():
digits = load_digits(2)
y = digits['target']
X = digits['data']
xgb_model = xgb.XGBClassifier(
random_state=0, tree_method="exact", importance_type="weight").fit(X, y)
xgb_model = xgb.XGBClassifier(random_state=0,
tree_method="exact",
learning_rate=0.1,
importance_type="weight").fit(X, y)
exp = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.00833333, 0.,
0., 0., 0., 0., 0., 0., 0., 0.025, 0.14166667, 0., 0., 0.,
0., 0., 0., 0.00833333, 0.25833333, 0., 0., 0., 0.,
@@ -134,12 +136,16 @@ def test_feature_importances_weight():
import pandas as pd
y = pd.Series(digits['target'])
X = pd.DataFrame(digits['data'])
xgb_model = xgb.XGBClassifier(
random_state=0, tree_method="exact", importance_type="weight").fit(X, y)
xgb_model = xgb.XGBClassifier(random_state=0,
tree_method="exact",
learning_rate=0.1,
importance_type="weight").fit(X, y)
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
xgb_model = xgb.XGBClassifier(
random_state=0, tree_method="exact", importance_type="weight").fit(X, y)
xgb_model = xgb.XGBClassifier(random_state=0,
tree_method="exact",
learning_rate=0.1,
importance_type="weight").fit(X, y)
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
@@ -151,7 +157,9 @@ def test_feature_importances_gain():
y = digits['target']
X = digits['data']
xgb_model = xgb.XGBClassifier(
random_state=0, tree_method="exact", importance_type="gain").fit(X, y)
random_state=0, tree_method="exact",
learning_rate=0.1,
importance_type="gain").fit(X, y)
exp = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0.00326159, 0., 0., 0., 0., 0., 0., 0., 0.,
@@ -169,11 +177,15 @@ def test_feature_importances_gain():
y = pd.Series(digits['target'])
X = pd.DataFrame(digits['data'])
xgb_model = xgb.XGBClassifier(
random_state=0, tree_method="exact", importance_type="gain").fit(X, y)
random_state=0, tree_method="exact",
learning_rate=0.1,
importance_type="gain").fit(X, y)
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
xgb_model = xgb.XGBClassifier(
random_state=0, tree_method="exact", importance_type="gain").fit(X, y)
random_state=0, tree_method="exact",
learning_rate=0.1,
importance_type="gain").fit(X, y)
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
@@ -191,6 +203,10 @@ def test_num_parallel_tree():
dump = bst.get_booster().get_dump(dump_format='json')
assert len(dump) == 4
config = json.loads(bst.get_booster().save_config())
assert int(config['learner']['gradient_booster']['gbtree_train_param'][
'num_parallel_tree']) == 4
def test_boston_housing_regression():
from sklearn.metrics import mean_squared_error
@@ -244,7 +260,7 @@ def test_parameter_tuning():
boston = load_boston()
y = boston['target']
X = boston['data']
xgb_model = xgb.XGBRegressor()
xgb_model = xgb.XGBRegressor(learning_rate=0.1)
clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
'n_estimators': [50, 100, 200]},
cv=3, verbose=1, iid=True)