[Breaking] Remove Scikit-Learn default parameters (#5130)
* Simplify Scikit-Learn parameter management. * Copy base class for removing duplicated parameter signatures. * Set all parameters to None. * Handle None in set_param. * Extract the doc. Co-authored-by: Jiaming Yuan <jm.yuan@outlook.com>
This commit is contained in:
@@ -22,17 +22,17 @@ class TestEarlyStopping(unittest.TestCase):
|
||||
y = digits['target']
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y,
|
||||
random_state=0)
|
||||
clf1 = xgb.XGBClassifier()
|
||||
clf1 = xgb.XGBClassifier(learning_rate=0.1)
|
||||
clf1.fit(X_train, y_train, early_stopping_rounds=5, eval_metric="auc",
|
||||
eval_set=[(X_test, y_test)])
|
||||
clf2 = xgb.XGBClassifier()
|
||||
clf2 = xgb.XGBClassifier(learning_rate=0.1)
|
||||
clf2.fit(X_train, y_train, early_stopping_rounds=4, eval_metric="auc",
|
||||
eval_set=[(X_test, y_test)])
|
||||
# should be the same
|
||||
assert clf1.best_score == clf2.best_score
|
||||
assert clf1.best_score != 1
|
||||
# check overfit
|
||||
clf3 = xgb.XGBClassifier()
|
||||
clf3 = xgb.XGBClassifier(learning_rate=0.1)
|
||||
clf3.fit(X_train, y_train, early_stopping_rounds=10, eval_metric="auc",
|
||||
eval_set=[(X_test, y_test)])
|
||||
assert clf3.best_score == 1
|
||||
|
||||
@@ -6,6 +6,7 @@ import os
|
||||
import shutil
|
||||
import pytest
|
||||
import unittest
|
||||
import json
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
@@ -117,9 +118,10 @@ def test_feature_importances_weight():
|
||||
digits = load_digits(2)
|
||||
y = digits['target']
|
||||
X = digits['data']
|
||||
xgb_model = xgb.XGBClassifier(
|
||||
random_state=0, tree_method="exact", importance_type="weight").fit(X, y)
|
||||
|
||||
xgb_model = xgb.XGBClassifier(random_state=0,
|
||||
tree_method="exact",
|
||||
learning_rate=0.1,
|
||||
importance_type="weight").fit(X, y)
|
||||
exp = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.00833333, 0.,
|
||||
0., 0., 0., 0., 0., 0., 0., 0.025, 0.14166667, 0., 0., 0.,
|
||||
0., 0., 0., 0.00833333, 0.25833333, 0., 0., 0., 0.,
|
||||
@@ -134,12 +136,16 @@ def test_feature_importances_weight():
|
||||
import pandas as pd
|
||||
y = pd.Series(digits['target'])
|
||||
X = pd.DataFrame(digits['data'])
|
||||
xgb_model = xgb.XGBClassifier(
|
||||
random_state=0, tree_method="exact", importance_type="weight").fit(X, y)
|
||||
xgb_model = xgb.XGBClassifier(random_state=0,
|
||||
tree_method="exact",
|
||||
learning_rate=0.1,
|
||||
importance_type="weight").fit(X, y)
|
||||
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
|
||||
|
||||
xgb_model = xgb.XGBClassifier(
|
||||
random_state=0, tree_method="exact", importance_type="weight").fit(X, y)
|
||||
xgb_model = xgb.XGBClassifier(random_state=0,
|
||||
tree_method="exact",
|
||||
learning_rate=0.1,
|
||||
importance_type="weight").fit(X, y)
|
||||
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
|
||||
|
||||
|
||||
@@ -151,7 +157,9 @@ def test_feature_importances_gain():
|
||||
y = digits['target']
|
||||
X = digits['data']
|
||||
xgb_model = xgb.XGBClassifier(
|
||||
random_state=0, tree_method="exact", importance_type="gain").fit(X, y)
|
||||
random_state=0, tree_method="exact",
|
||||
learning_rate=0.1,
|
||||
importance_type="gain").fit(X, y)
|
||||
|
||||
exp = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
|
||||
0.00326159, 0., 0., 0., 0., 0., 0., 0., 0.,
|
||||
@@ -169,11 +177,15 @@ def test_feature_importances_gain():
|
||||
y = pd.Series(digits['target'])
|
||||
X = pd.DataFrame(digits['data'])
|
||||
xgb_model = xgb.XGBClassifier(
|
||||
random_state=0, tree_method="exact", importance_type="gain").fit(X, y)
|
||||
random_state=0, tree_method="exact",
|
||||
learning_rate=0.1,
|
||||
importance_type="gain").fit(X, y)
|
||||
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
|
||||
|
||||
xgb_model = xgb.XGBClassifier(
|
||||
random_state=0, tree_method="exact", importance_type="gain").fit(X, y)
|
||||
random_state=0, tree_method="exact",
|
||||
learning_rate=0.1,
|
||||
importance_type="gain").fit(X, y)
|
||||
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
|
||||
|
||||
|
||||
@@ -191,6 +203,10 @@ def test_num_parallel_tree():
|
||||
dump = bst.get_booster().get_dump(dump_format='json')
|
||||
assert len(dump) == 4
|
||||
|
||||
config = json.loads(bst.get_booster().save_config())
|
||||
assert int(config['learner']['gradient_booster']['gbtree_train_param'][
|
||||
'num_parallel_tree']) == 4
|
||||
|
||||
|
||||
def test_boston_housing_regression():
|
||||
from sklearn.metrics import mean_squared_error
|
||||
@@ -244,7 +260,7 @@ def test_parameter_tuning():
|
||||
boston = load_boston()
|
||||
y = boston['target']
|
||||
X = boston['data']
|
||||
xgb_model = xgb.XGBRegressor()
|
||||
xgb_model = xgb.XGBRegressor(learning_rate=0.1)
|
||||
clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
|
||||
'n_estimators': [50, 100, 200]},
|
||||
cv=3, verbose=1, iid=True)
|
||||
|
||||
Reference in New Issue
Block a user