[breaking] Remove deprecated parameters in the skl interface. (#9986)
This commit is contained in:
@@ -9,37 +9,41 @@ rng = np.random.RandomState(1337)
|
||||
|
||||
|
||||
class TestEvalMetrics:
|
||||
xgb_params_01 = {'nthread': 1, 'eval_metric': 'error'}
|
||||
xgb_params_01 = {"nthread": 1, "eval_metric": "error"}
|
||||
|
||||
xgb_params_02 = {'nthread': 1, 'eval_metric': ['error']}
|
||||
xgb_params_02 = {"nthread": 1, "eval_metric": ["error"]}
|
||||
|
||||
xgb_params_03 = {'nthread': 1, 'eval_metric': ['rmse', 'error']}
|
||||
xgb_params_03 = {"nthread": 1, "eval_metric": ["rmse", "error"]}
|
||||
|
||||
xgb_params_04 = {'nthread': 1, 'eval_metric': ['error', 'rmse']}
|
||||
xgb_params_04 = {"nthread": 1, "eval_metric": ["error", "rmse"]}
|
||||
|
||||
def evalerror_01(self, preds, dtrain):
|
||||
labels = dtrain.get_label()
|
||||
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
|
||||
return "error", float(sum(labels != (preds > 0.0))) / len(labels)
|
||||
|
||||
def evalerror_02(self, preds, dtrain):
|
||||
labels = dtrain.get_label()
|
||||
return [('error', float(sum(labels != (preds > 0.0))) / len(labels))]
|
||||
return [("error", float(sum(labels != (preds > 0.0))) / len(labels))]
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def evalerror_03(self, preds, dtrain):
|
||||
from sklearn.metrics import mean_squared_error
|
||||
|
||||
labels = dtrain.get_label()
|
||||
return [('rmse', mean_squared_error(labels, preds)),
|
||||
('error', float(sum(labels != (preds > 0.0))) / len(labels))]
|
||||
return [
|
||||
("rmse", mean_squared_error(labels, preds)),
|
||||
("error", float(sum(labels != (preds > 0.0))) / len(labels)),
|
||||
]
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def evalerror_04(self, preds, dtrain):
|
||||
from sklearn.metrics import mean_squared_error
|
||||
|
||||
labels = dtrain.get_label()
|
||||
return [('error', float(sum(labels != (preds > 0.0))) / len(labels)),
|
||||
('rmse', mean_squared_error(labels, preds))]
|
||||
return [
|
||||
("error", float(sum(labels != (preds > 0.0))) / len(labels)),
|
||||
("rmse", mean_squared_error(labels, preds)),
|
||||
]
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_eval_metrics(self):
|
||||
@@ -50,15 +54,15 @@ class TestEvalMetrics:
|
||||
from sklearn.datasets import load_digits
|
||||
|
||||
digits = load_digits(n_class=2)
|
||||
X = digits['data']
|
||||
y = digits['target']
|
||||
X = digits["data"]
|
||||
y = digits["target"]
|
||||
|
||||
Xt, Xv, yt, yv = train_test_split(X, y, test_size=0.2, random_state=0)
|
||||
|
||||
dtrain = xgb.DMatrix(Xt, label=yt)
|
||||
dvalid = xgb.DMatrix(Xv, label=yv)
|
||||
|
||||
watchlist = [(dtrain, 'train'), (dvalid, 'val')]
|
||||
watchlist = [(dtrain, "train"), (dvalid, "val")]
|
||||
|
||||
gbdt_01 = xgb.train(self.xgb_params_01, dtrain, num_boost_round=10)
|
||||
gbdt_02 = xgb.train(self.xgb_params_02, dtrain, num_boost_round=10)
|
||||
@@ -66,26 +70,54 @@ class TestEvalMetrics:
|
||||
assert gbdt_01.predict(dvalid)[0] == gbdt_02.predict(dvalid)[0]
|
||||
assert gbdt_01.predict(dvalid)[0] == gbdt_03.predict(dvalid)[0]
|
||||
|
||||
gbdt_01 = xgb.train(self.xgb_params_01, dtrain, 10, watchlist,
|
||||
early_stopping_rounds=2)
|
||||
gbdt_02 = xgb.train(self.xgb_params_02, dtrain, 10, watchlist,
|
||||
early_stopping_rounds=2)
|
||||
gbdt_03 = xgb.train(self.xgb_params_03, dtrain, 10, watchlist,
|
||||
early_stopping_rounds=2)
|
||||
gbdt_04 = xgb.train(self.xgb_params_04, dtrain, 10, watchlist,
|
||||
early_stopping_rounds=2)
|
||||
gbdt_01 = xgb.train(
|
||||
self.xgb_params_01, dtrain, 10, watchlist, early_stopping_rounds=2
|
||||
)
|
||||
gbdt_02 = xgb.train(
|
||||
self.xgb_params_02, dtrain, 10, watchlist, early_stopping_rounds=2
|
||||
)
|
||||
gbdt_03 = xgb.train(
|
||||
self.xgb_params_03, dtrain, 10, watchlist, early_stopping_rounds=2
|
||||
)
|
||||
gbdt_04 = xgb.train(
|
||||
self.xgb_params_04, dtrain, 10, watchlist, early_stopping_rounds=2
|
||||
)
|
||||
assert gbdt_01.predict(dvalid)[0] == gbdt_02.predict(dvalid)[0]
|
||||
assert gbdt_01.predict(dvalid)[0] == gbdt_03.predict(dvalid)[0]
|
||||
assert gbdt_03.predict(dvalid)[0] != gbdt_04.predict(dvalid)[0]
|
||||
|
||||
gbdt_01 = xgb.train(self.xgb_params_01, dtrain, 10, watchlist,
|
||||
early_stopping_rounds=2, feval=self.evalerror_01)
|
||||
gbdt_02 = xgb.train(self.xgb_params_02, dtrain, 10, watchlist,
|
||||
early_stopping_rounds=2, feval=self.evalerror_02)
|
||||
gbdt_03 = xgb.train(self.xgb_params_03, dtrain, 10, watchlist,
|
||||
early_stopping_rounds=2, feval=self.evalerror_03)
|
||||
gbdt_04 = xgb.train(self.xgb_params_04, dtrain, 10, watchlist,
|
||||
early_stopping_rounds=2, feval=self.evalerror_04)
|
||||
gbdt_01 = xgb.train(
|
||||
self.xgb_params_01,
|
||||
dtrain,
|
||||
10,
|
||||
watchlist,
|
||||
early_stopping_rounds=2,
|
||||
feval=self.evalerror_01,
|
||||
)
|
||||
gbdt_02 = xgb.train(
|
||||
self.xgb_params_02,
|
||||
dtrain,
|
||||
10,
|
||||
watchlist,
|
||||
early_stopping_rounds=2,
|
||||
feval=self.evalerror_02,
|
||||
)
|
||||
gbdt_03 = xgb.train(
|
||||
self.xgb_params_03,
|
||||
dtrain,
|
||||
10,
|
||||
watchlist,
|
||||
early_stopping_rounds=2,
|
||||
feval=self.evalerror_03,
|
||||
)
|
||||
gbdt_04 = xgb.train(
|
||||
self.xgb_params_04,
|
||||
dtrain,
|
||||
10,
|
||||
watchlist,
|
||||
early_stopping_rounds=2,
|
||||
feval=self.evalerror_04,
|
||||
)
|
||||
assert gbdt_01.predict(dvalid)[0] == gbdt_02.predict(dvalid)[0]
|
||||
assert gbdt_01.predict(dvalid)[0] == gbdt_03.predict(dvalid)[0]
|
||||
assert gbdt_03.predict(dvalid)[0] != gbdt_04.predict(dvalid)[0]
|
||||
@@ -93,6 +125,7 @@ class TestEvalMetrics:
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_gamma_deviance(self):
|
||||
from sklearn.metrics import mean_gamma_deviance
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
n_samples = 100
|
||||
n_features = 30
|
||||
@@ -101,8 +134,13 @@ class TestEvalMetrics:
|
||||
y = rng.randn(n_samples)
|
||||
y = y - y.min() * 100
|
||||
|
||||
reg = xgb.XGBRegressor(tree_method="hist", objective="reg:gamma", n_estimators=10)
|
||||
reg.fit(X, y, eval_metric="gamma-deviance")
|
||||
reg = xgb.XGBRegressor(
|
||||
tree_method="hist",
|
||||
objective="reg:gamma",
|
||||
n_estimators=10,
|
||||
eval_metric="gamma-deviance",
|
||||
)
|
||||
reg.fit(X, y)
|
||||
|
||||
booster = reg.get_booster()
|
||||
score = reg.predict(X)
|
||||
@@ -113,16 +151,26 @@ class TestEvalMetrics:
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_gamma_lik(self) -> None:
|
||||
import scipy.stats as stats
|
||||
|
||||
rng = np.random.default_rng(1994)
|
||||
n_samples = 32
|
||||
n_features = 10
|
||||
|
||||
X = rng.normal(0, 1, size=n_samples * n_features).reshape((n_samples, n_features))
|
||||
X = rng.normal(0, 1, size=n_samples * n_features).reshape(
|
||||
(n_samples, n_features)
|
||||
)
|
||||
|
||||
alpha, loc, beta = 5.0, 11.1, 22
|
||||
y = stats.gamma.rvs(alpha, loc=loc, scale=beta, size=n_samples, random_state=rng)
|
||||
reg = xgb.XGBRegressor(tree_method="hist", objective="reg:gamma", n_estimators=64)
|
||||
reg.fit(X, y, eval_metric="gamma-nloglik", eval_set=[(X, y)])
|
||||
y = stats.gamma.rvs(
|
||||
alpha, loc=loc, scale=beta, size=n_samples, random_state=rng
|
||||
)
|
||||
reg = xgb.XGBRegressor(
|
||||
tree_method="hist",
|
||||
objective="reg:gamma",
|
||||
n_estimators=64,
|
||||
eval_metric="gamma-nloglik",
|
||||
)
|
||||
reg.fit(X, y, eval_set=[(X, y)])
|
||||
|
||||
score = reg.predict(X)
|
||||
|
||||
@@ -134,7 +182,7 @@ class TestEvalMetrics:
|
||||
# XGBoost uses the canonical link function of gamma in evaluation function.
|
||||
# so \theta = - (1.0 / y)
|
||||
# dispersion is hardcoded as 1.0, so shape (a in scipy parameter) is also 1.0
|
||||
beta = - (1.0 / (- (1.0 / y))) # == y
|
||||
beta = -(1.0 / (-(1.0 / y))) # == y
|
||||
nloglik_stats = -stats.gamma.logpdf(score, a=1.0, scale=beta)
|
||||
|
||||
np.testing.assert_allclose(nloglik, np.mean(nloglik_stats), rtol=1e-3)
|
||||
@@ -153,7 +201,7 @@ class TestEvalMetrics:
|
||||
n_features,
|
||||
n_informative=n_features,
|
||||
n_redundant=0,
|
||||
random_state=rng
|
||||
random_state=rng,
|
||||
)
|
||||
Xy = xgb.DMatrix(X, y)
|
||||
booster = xgb.train(
|
||||
@@ -197,7 +245,7 @@ class TestEvalMetrics:
|
||||
n_informative=n_features,
|
||||
n_redundant=0,
|
||||
n_classes=n_classes,
|
||||
random_state=rng
|
||||
random_state=rng,
|
||||
)
|
||||
if weighted:
|
||||
weights = rng.randn(n_samples)
|
||||
@@ -242,20 +290,25 @@ class TestEvalMetrics:
|
||||
def run_pr_auc_binary(self, tree_method):
|
||||
from sklearn.datasets import make_classification
|
||||
from sklearn.metrics import auc, precision_recall_curve
|
||||
|
||||
X, y = make_classification(128, 4, n_classes=2, random_state=1994)
|
||||
clf = xgb.XGBClassifier(tree_method=tree_method, n_estimators=1)
|
||||
clf.fit(X, y, eval_metric="aucpr", eval_set=[(X, y)])
|
||||
clf = xgb.XGBClassifier(
|
||||
tree_method=tree_method, n_estimators=1, eval_metric="aucpr"
|
||||
)
|
||||
clf.fit(X, y, eval_set=[(X, y)])
|
||||
evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
|
||||
|
||||
y_score = clf.predict_proba(X)[:, 1] # get the positive column
|
||||
precision, recall, _ = precision_recall_curve(y, y_score)
|
||||
prauc = auc(recall, precision)
|
||||
# Interpolation results are slightly different from sklearn, but overall should be
|
||||
# similar.
|
||||
# Interpolation results are slightly different from sklearn, but overall should
|
||||
# be similar.
|
||||
np.testing.assert_allclose(prauc, evals_result, rtol=1e-2)
|
||||
|
||||
clf = xgb.XGBClassifier(tree_method=tree_method, n_estimators=10)
|
||||
clf.fit(X, y, eval_metric="aucpr", eval_set=[(X, y)])
|
||||
clf = xgb.XGBClassifier(
|
||||
tree_method=tree_method, n_estimators=10, eval_metric="aucpr"
|
||||
)
|
||||
clf.fit(X, y, eval_set=[(X, y)])
|
||||
evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
|
||||
np.testing.assert_allclose(0.99, evals_result, rtol=1e-2)
|
||||
|
||||
@@ -264,16 +317,21 @@ class TestEvalMetrics:
|
||||
|
||||
def run_pr_auc_multi(self, tree_method):
|
||||
from sklearn.datasets import make_classification
|
||||
|
||||
X, y = make_classification(
|
||||
64, 16, n_informative=8, n_classes=3, random_state=1994
|
||||
)
|
||||
clf = xgb.XGBClassifier(tree_method=tree_method, n_estimators=1)
|
||||
clf.fit(X, y, eval_metric="aucpr", eval_set=[(X, y)])
|
||||
clf = xgb.XGBClassifier(
|
||||
tree_method=tree_method, n_estimators=1, eval_metric="aucpr"
|
||||
)
|
||||
clf.fit(X, y, eval_set=[(X, y)])
|
||||
evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
|
||||
# No available implementation for comparison, just check that XGBoost converges to
|
||||
# 1.0
|
||||
clf = xgb.XGBClassifier(tree_method=tree_method, n_estimators=10)
|
||||
clf.fit(X, y, eval_metric="aucpr", eval_set=[(X, y)])
|
||||
# No available implementation for comparison, just check that XGBoost converges
|
||||
# to 1.0
|
||||
clf = xgb.XGBClassifier(
|
||||
tree_method=tree_method, n_estimators=10, eval_metric="aucpr"
|
||||
)
|
||||
clf.fit(X, y, eval_set=[(X, y)])
|
||||
evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
|
||||
np.testing.assert_allclose(1.0, evals_result, rtol=1e-2)
|
||||
|
||||
@@ -282,9 +340,13 @@ class TestEvalMetrics:
|
||||
|
||||
def run_pr_auc_ltr(self, tree_method):
|
||||
from sklearn.datasets import make_classification
|
||||
|
||||
X, y = make_classification(128, 4, n_classes=2, random_state=1994)
|
||||
ltr = xgb.XGBRanker(
|
||||
tree_method=tree_method, n_estimators=16, objective="rank:pairwise"
|
||||
tree_method=tree_method,
|
||||
n_estimators=16,
|
||||
objective="rank:pairwise",
|
||||
eval_metric="aucpr",
|
||||
)
|
||||
groups = np.array([32, 32, 64])
|
||||
ltr.fit(
|
||||
@@ -293,7 +355,6 @@ class TestEvalMetrics:
|
||||
group=groups,
|
||||
eval_set=[(X, y)],
|
||||
eval_group=[groups],
|
||||
eval_metric="aucpr",
|
||||
)
|
||||
results = ltr.evals_result()["validation_0"]["aucpr"]
|
||||
assert results[-1] >= 0.99
|
||||
|
||||
Reference in New Issue
Block a user