Move skl eval_metric and early_stopping rounds to model params. (#6751)

A new parameter `custom_metric` is added to `train` and `cv` to distinguish the behaviour from the old `feval`.  And `feval` is deprecated.  The new `custom_metric` receives transformed prediction when the built-in objective is used.  This enables XGBoost to use cost functions from other libraries like scikit-learn directly without going through the definition of the link function.

`eval_metric` and `early_stopping_rounds` in sklearn interface are moved from `fit` to `__init__` and is now saved as part of the scikit-learn model.  The old ones in `fit` function are now deprecated. The new `eval_metric` in `__init__` has the same new behaviour as `custom_metric`.

Added more detailed documents for the behaviour of custom objective and metric.
This commit is contained in:
Jiaming Yuan
2021-10-28 17:20:20 +08:00
committed by GitHub
parent 6b074add66
commit 45aef75cca
13 changed files with 685 additions and 190 deletions

View File

@@ -1271,3 +1271,76 @@ def test_prediction_config():
reg.set_params(booster="gblinear")
assert reg._can_use_inplace_predict() is False
def test_evaluation_metric():
from sklearn.datasets import load_diabetes, load_digits
from sklearn.metrics import mean_absolute_error
X, y = load_diabetes(return_X_y=True)
n_estimators = 16
with tm.captured_output() as (out, err):
reg = xgb.XGBRegressor(
tree_method="hist",
eval_metric=mean_absolute_error,
n_estimators=n_estimators,
)
reg.fit(X, y, eval_set=[(X, y)])
lines = out.getvalue().strip().split('\n')
assert len(lines) == n_estimators
for line in lines:
assert line.find("mean_absolute_error") != -1
def metric(predt: np.ndarray, Xy: xgb.DMatrix):
y = Xy.get_label()
return "m", np.abs(predt - y).sum()
with pytest.warns(UserWarning):
reg = xgb.XGBRegressor(
tree_method="hist",
n_estimators=1,
)
reg.fit(X, y, eval_set=[(X, y)], eval_metric=metric)
def merror(y_true: np.ndarray, predt: np.ndarray):
n_samples = y_true.shape[0]
assert n_samples == predt.size
errors = np.zeros(y_true.shape[0])
errors[y != predt] = 1.0
return np.sum(errors) / n_samples
X, y = load_digits(n_class=10, return_X_y=True)
clf = xgb.XGBClassifier(
use_label_encoder=False,
tree_method="hist",
eval_metric=merror,
n_estimators=16,
objective="multi:softmax"
)
clf.fit(X, y, eval_set=[(X, y)])
custom = clf.evals_result()
clf = xgb.XGBClassifier(
use_label_encoder=False,
tree_method="hist",
eval_metric="merror",
n_estimators=16,
objective="multi:softmax"
)
clf.fit(X, y, eval_set=[(X, y)])
internal = clf.evals_result()
np.testing.assert_allclose(
custom["validation_0"]["merror"], internal["validation_0"]["merror"]
)
clf = xgb.XGBRFClassifier(
use_label_encoder=False,
tree_method="hist", n_estimators=16,
objective=tm.softprob_obj(10),
eval_metric=merror,
)
with pytest.raises(AssertionError):
# shape check inside the `merror` function
clf.fit(X, y, eval_set=[(X, y)])