[breaking] Remove deprecated parameters in the skl interface. (#9986)
This commit is contained in:
@@ -16,14 +16,14 @@ def training_continuation(tmpdir: str, use_pickle: bool) -> None:
|
||||
"""Basic training continuation."""
|
||||
# Train 128 iterations in 1 session
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
clf = xgboost.XGBClassifier(n_estimators=128)
|
||||
clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss")
|
||||
clf = xgboost.XGBClassifier(n_estimators=128, eval_metric="logloss")
|
||||
clf.fit(X, y, eval_set=[(X, y)])
|
||||
print("Total boosted rounds:", clf.get_booster().num_boosted_rounds())
|
||||
|
||||
# Train 128 iterations in 2 sessions, with the first one runs for 32 iterations and
|
||||
# the second one runs for 96 iterations
|
||||
clf = xgboost.XGBClassifier(n_estimators=32)
|
||||
clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss")
|
||||
clf = xgboost.XGBClassifier(n_estimators=32, eval_metric="logloss")
|
||||
clf.fit(X, y, eval_set=[(X, y)])
|
||||
assert clf.get_booster().num_boosted_rounds() == 32
|
||||
|
||||
# load back the model, this could be a checkpoint
|
||||
@@ -39,8 +39,8 @@ def training_continuation(tmpdir: str, use_pickle: bool) -> None:
|
||||
loaded = xgboost.XGBClassifier()
|
||||
loaded.load_model(path)
|
||||
|
||||
clf = xgboost.XGBClassifier(n_estimators=128 - 32)
|
||||
clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss", xgb_model=loaded)
|
||||
clf = xgboost.XGBClassifier(n_estimators=128 - 32, eval_metric="logloss")
|
||||
clf.fit(X, y, eval_set=[(X, y)], xgb_model=loaded)
|
||||
|
||||
print("Total boosted rounds:", clf.get_booster().num_boosted_rounds())
|
||||
|
||||
@@ -56,19 +56,24 @@ def training_continuation_early_stop(tmpdir: str, use_pickle: bool) -> None:
|
||||
n_estimators = 512
|
||||
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
clf = xgboost.XGBClassifier(n_estimators=n_estimators)
|
||||
clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss", callbacks=[early_stop])
|
||||
clf = xgboost.XGBClassifier(
|
||||
n_estimators=n_estimators, eval_metric="logloss", callbacks=[early_stop]
|
||||
)
|
||||
clf.fit(X, y, eval_set=[(X, y)])
|
||||
print("Total boosted rounds:", clf.get_booster().num_boosted_rounds())
|
||||
best = clf.best_iteration
|
||||
|
||||
# Train 512 iterations in 2 sessions, with the first one runs for 128 iterations and
|
||||
# the second one runs until early stop.
|
||||
clf = xgboost.XGBClassifier(n_estimators=128)
|
||||
clf = xgboost.XGBClassifier(
|
||||
n_estimators=128, eval_metric="logloss", callbacks=[early_stop]
|
||||
)
|
||||
# Reinitialize the early stop callback
|
||||
early_stop = xgboost.callback.EarlyStopping(
|
||||
rounds=early_stopping_rounds, save_best=True
|
||||
)
|
||||
clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss", callbacks=[early_stop])
|
||||
clf.set_params(callbacks=[early_stop])
|
||||
clf.fit(X, y, eval_set=[(X, y)])
|
||||
assert clf.get_booster().num_boosted_rounds() == 128
|
||||
|
||||
# load back the model, this could be a checkpoint
|
||||
@@ -87,13 +92,13 @@ def training_continuation_early_stop(tmpdir: str, use_pickle: bool) -> None:
|
||||
early_stop = xgboost.callback.EarlyStopping(
|
||||
rounds=early_stopping_rounds, save_best=True
|
||||
)
|
||||
clf = xgboost.XGBClassifier(n_estimators=n_estimators - 128)
|
||||
clf = xgboost.XGBClassifier(
|
||||
n_estimators=n_estimators - 128, eval_metric="logloss", callbacks=[early_stop]
|
||||
)
|
||||
clf.fit(
|
||||
X,
|
||||
y,
|
||||
eval_set=[(X, y)],
|
||||
eval_metric="logloss",
|
||||
callbacks=[early_stop],
|
||||
xgb_model=loaded,
|
||||
)
|
||||
|
||||
|
||||
@@ -16,30 +16,35 @@ labels, y = np.unique(y, return_inverse=True)
|
||||
X_train, X_test = X[:1600], X[1600:]
|
||||
y_train, y_test = y[:1600], y[1600:]
|
||||
|
||||
param_dist = {'objective':'binary:logistic', 'n_estimators':2}
|
||||
param_dist = {"objective": "binary:logistic", "n_estimators": 2}
|
||||
|
||||
clf = xgb.XGBModel(**param_dist)
|
||||
clf = xgb.XGBModel(
|
||||
**param_dist,
|
||||
eval_metric="logloss",
|
||||
)
|
||||
# Or you can use: clf = xgb.XGBClassifier(**param_dist)
|
||||
|
||||
clf.fit(X_train, y_train,
|
||||
eval_set=[(X_train, y_train), (X_test, y_test)],
|
||||
eval_metric='logloss',
|
||||
verbose=True)
|
||||
clf.fit(
|
||||
X_train,
|
||||
y_train,
|
||||
eval_set=[(X_train, y_train), (X_test, y_test)],
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
# Load evals result by calling the evals_result() function
|
||||
evals_result = clf.evals_result()
|
||||
|
||||
print('Access logloss metric directly from validation_0:')
|
||||
print(evals_result['validation_0']['logloss'])
|
||||
print("Access logloss metric directly from validation_0:")
|
||||
print(evals_result["validation_0"]["logloss"])
|
||||
|
||||
print('')
|
||||
print('Access metrics through a loop:')
|
||||
print("")
|
||||
print("Access metrics through a loop:")
|
||||
for e_name, e_mtrs in evals_result.items():
|
||||
print('- {}'.format(e_name))
|
||||
print("- {}".format(e_name))
|
||||
for e_mtr_name, e_mtr_vals in e_mtrs.items():
|
||||
print(' - {}'.format(e_mtr_name))
|
||||
print(' - {}'.format(e_mtr_vals))
|
||||
print(" - {}".format(e_mtr_name))
|
||||
print(" - {}".format(e_mtr_vals))
|
||||
|
||||
print('')
|
||||
print('Access complete dict:')
|
||||
print("")
|
||||
print("Access complete dict:")
|
||||
print(evals_result)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
'''
|
||||
"""
|
||||
Collection of examples for using sklearn interface
|
||||
==================================================
|
||||
|
||||
@@ -8,7 +8,7 @@ For an introduction to XGBoost's scikit-learn estimator interface, see
|
||||
Created on 1 Apr 2015
|
||||
|
||||
@author: Jamie Hall
|
||||
'''
|
||||
"""
|
||||
import pickle
|
||||
|
||||
import numpy as np
|
||||
@@ -22,8 +22,8 @@ rng = np.random.RandomState(31337)
|
||||
|
||||
print("Zeros and Ones from the Digits dataset: binary classification")
|
||||
digits = load_digits(n_class=2)
|
||||
y = digits['target']
|
||||
X = digits['data']
|
||||
y = digits["target"]
|
||||
X = digits["data"]
|
||||
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
||||
for train_index, test_index in kf.split(X):
|
||||
xgb_model = xgb.XGBClassifier(n_jobs=1).fit(X[train_index], y[train_index])
|
||||
@@ -33,8 +33,8 @@ for train_index, test_index in kf.split(X):
|
||||
|
||||
print("Iris: multiclass classification")
|
||||
iris = load_iris()
|
||||
y = iris['target']
|
||||
X = iris['data']
|
||||
y = iris["target"]
|
||||
X = iris["data"]
|
||||
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
|
||||
for train_index, test_index in kf.split(X):
|
||||
xgb_model = xgb.XGBClassifier(n_jobs=1).fit(X[train_index], y[train_index])
|
||||
@@ -53,9 +53,13 @@ for train_index, test_index in kf.split(X):
|
||||
|
||||
print("Parameter optimization")
|
||||
xgb_model = xgb.XGBRegressor(n_jobs=1)
|
||||
clf = GridSearchCV(xgb_model,
|
||||
{'max_depth': [2, 4],
|
||||
'n_estimators': [50, 100]}, verbose=1, n_jobs=1, cv=3)
|
||||
clf = GridSearchCV(
|
||||
xgb_model,
|
||||
{"max_depth": [2, 4], "n_estimators": [50, 100]},
|
||||
verbose=1,
|
||||
n_jobs=1,
|
||||
cv=3,
|
||||
)
|
||||
clf.fit(X, y)
|
||||
print(clf.best_score_)
|
||||
print(clf.best_params_)
|
||||
@@ -69,9 +73,8 @@ print(np.allclose(clf.predict(X), clf2.predict(X)))
|
||||
|
||||
# Early-stopping
|
||||
|
||||
X = digits['data']
|
||||
y = digits['target']
|
||||
X = digits["data"]
|
||||
y = digits["target"]
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
|
||||
clf = xgb.XGBClassifier(n_jobs=1)
|
||||
clf.fit(X_train, y_train, early_stopping_rounds=10, eval_metric="auc",
|
||||
eval_set=[(X_test, y_test)])
|
||||
clf = xgb.XGBClassifier(n_jobs=1, early_stopping_rounds=10, eval_metric="auc")
|
||||
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
|
||||
|
||||
@@ -12,6 +12,7 @@ import xgboost as xgb
|
||||
if __name__ == "__main__":
|
||||
print("Parallel Parameter optimization")
|
||||
X, y = fetch_california_housing(return_X_y=True)
|
||||
# Make sure the number of threads is balanced.
|
||||
xgb_model = xgb.XGBRegressor(
|
||||
n_jobs=multiprocessing.cpu_count() // 2, tree_method="hist"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user