Init estimation for regression. (#8272)
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
from xgboost.testing.updater import get_basescore
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
@@ -11,16 +12,12 @@ class TestEarlyStopping:
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_early_stopping_nonparallel(self):
|
||||
from sklearn.datasets import load_digits
|
||||
try:
|
||||
from sklearn.model_selection import train_test_split
|
||||
except ImportError:
|
||||
from sklearn.cross_validation import train_test_split
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
digits = load_digits(n_class=2)
|
||||
X = digits['data']
|
||||
y = digits['target']
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y,
|
||||
random_state=0)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
|
||||
clf1 = xgb.XGBClassifier(learning_rate=0.1)
|
||||
clf1.fit(X_train, y_train, early_stopping_rounds=5, eval_metric="auc",
|
||||
eval_set=[(X_test, y_test)])
|
||||
@@ -31,9 +28,23 @@ class TestEarlyStopping:
|
||||
assert clf1.best_score == clf2.best_score
|
||||
assert clf1.best_score != 1
|
||||
# check overfit
|
||||
clf3 = xgb.XGBClassifier(learning_rate=0.1)
|
||||
clf3.fit(X_train, y_train, early_stopping_rounds=10, eval_metric="auc",
|
||||
eval_set=[(X_test, y_test)])
|
||||
clf3 = xgb.XGBClassifier(
|
||||
learning_rate=0.1,
|
||||
eval_metric="auc",
|
||||
early_stopping_rounds=10
|
||||
)
|
||||
clf3.fit(X_train, y_train, eval_set=[(X_test, y_test)])
|
||||
base_score = get_basescore(clf3)
|
||||
assert 0.53 > base_score > 0.5
|
||||
|
||||
clf3 = xgb.XGBClassifier(
|
||||
learning_rate=0.1,
|
||||
base_score=.5,
|
||||
eval_metric="auc",
|
||||
early_stopping_rounds=10
|
||||
)
|
||||
clf3.fit(X_train, y_train, eval_set=[(X_test, y_test)])
|
||||
|
||||
assert clf3.best_score == 1
|
||||
|
||||
def evalerror(self, preds, dtrain):
|
||||
|
||||
@@ -9,11 +9,13 @@ train_data = xgb.DMatrix(np.array([[1]]), label=np.array([1]))
|
||||
class TestTreeRegularization:
|
||||
def test_alpha(self):
|
||||
params = {
|
||||
'tree_method': 'exact', 'verbosity': 0,
|
||||
'objective': 'reg:squarederror',
|
||||
'eta': 1,
|
||||
'lambda': 0,
|
||||
'alpha': 0.1
|
||||
"tree_method": "exact",
|
||||
"verbosity": 0,
|
||||
"objective": "reg:squarederror",
|
||||
"eta": 1,
|
||||
"lambda": 0,
|
||||
"alpha": 0.1,
|
||||
"base_score": 0.5,
|
||||
}
|
||||
|
||||
model = xgb.train(params, train_data, 1)
|
||||
@@ -27,11 +29,13 @@ class TestTreeRegularization:
|
||||
|
||||
def test_lambda(self):
|
||||
params = {
|
||||
'tree_method': 'exact', 'verbosity': 0,
|
||||
'objective': 'reg:squarederror',
|
||||
'eta': 1,
|
||||
'lambda': 1,
|
||||
'alpha': 0
|
||||
"tree_method": "exact",
|
||||
"verbosity": 0,
|
||||
"objective": "reg:squarederror",
|
||||
"eta": 1,
|
||||
"lambda": 1,
|
||||
"alpha": 0,
|
||||
"base_score": 0.5,
|
||||
}
|
||||
|
||||
model = xgb.train(params, train_data, 1)
|
||||
@@ -45,11 +49,13 @@ class TestTreeRegularization:
|
||||
|
||||
def test_alpha_and_lambda(self):
|
||||
params = {
|
||||
'tree_method': 'exact', 'verbosity': 1,
|
||||
'objective': 'reg:squarederror',
|
||||
'eta': 1,
|
||||
'lambda': 1,
|
||||
'alpha': 0.1
|
||||
"tree_method": "exact",
|
||||
"verbosity": 1,
|
||||
"objective": "reg:squarederror",
|
||||
"eta": 1,
|
||||
"lambda": 1,
|
||||
"alpha": 0.1,
|
||||
"base_score": 0.5,
|
||||
}
|
||||
|
||||
model = xgb.train(params, train_data, 1)
|
||||
|
||||
@@ -10,6 +10,7 @@ from xgboost.testing.params import (
|
||||
exact_parameter_strategy,
|
||||
hist_parameter_strategy,
|
||||
)
|
||||
from xgboost.testing.updater import check_init_estimation
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
@@ -449,3 +450,6 @@ class TestTreeMethod:
|
||||
)
|
||||
def test_adaptive(self, tree_method, weighted) -> None:
|
||||
self.run_adaptive(tree_method, weighted)
|
||||
|
||||
def test_init_estimation(self) -> None:
|
||||
check_init_estimation("hist")
|
||||
|
||||
@@ -9,6 +9,7 @@ except Exception:
|
||||
shap = None
|
||||
pass
|
||||
|
||||
|
||||
pytestmark = pytest.mark.skipif(shap is None, reason="Requires shap package")
|
||||
|
||||
|
||||
@@ -16,11 +17,16 @@ pytestmark = pytest.mark.skipif(shap is None, reason="Requires shap package")
|
||||
# Changes in binary format may cause problems
|
||||
def test_with_shap():
|
||||
from sklearn.datasets import fetch_california_housing
|
||||
|
||||
X, y = fetch_california_housing(return_X_y=True)
|
||||
dtrain = xgb.DMatrix(X, label=y)
|
||||
model = xgb.train({"learning_rate": 0.01}, dtrain, 10)
|
||||
explainer = shap.TreeExplainer(model)
|
||||
shap_values = explainer.shap_values(X)
|
||||
margin = model.predict(dtrain, output_margin=True)
|
||||
assert np.allclose(np.sum(shap_values, axis=len(shap_values.shape) - 1),
|
||||
margin - explainer.expected_value, 1e-3, 1e-3)
|
||||
assert np.allclose(
|
||||
np.sum(shap_values, axis=len(shap_values.shape) - 1),
|
||||
margin - explainer.expected_value,
|
||||
1e-3,
|
||||
1e-3,
|
||||
)
|
||||
|
||||
@@ -9,6 +9,7 @@ import numpy as np
|
||||
import pytest
|
||||
from sklearn.utils.estimator_checks import parametrize_with_checks
|
||||
from xgboost.testing.shared import get_feature_weights, validate_data_initialization
|
||||
from xgboost.testing.updater import get_basescore
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
@@ -196,19 +197,22 @@ def test_stacking_classification():
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
|
||||
clf.fit(X_train, y_train).score(X_test, y_test)
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_pandas())
|
||||
def test_feature_importances_weight():
|
||||
from sklearn.datasets import load_digits
|
||||
|
||||
digits = load_digits(n_class=2)
|
||||
y = digits['target']
|
||||
X = digits['data']
|
||||
y = digits["target"]
|
||||
X = digits["data"]
|
||||
|
||||
xgb_model = xgb.XGBClassifier(
|
||||
random_state=0,
|
||||
tree_method="exact",
|
||||
learning_rate=0.1,
|
||||
importance_type="weight",
|
||||
base_score=0.5,
|
||||
).fit(X, y)
|
||||
|
||||
xgb_model = xgb.XGBClassifier(random_state=0,
|
||||
tree_method="exact",
|
||||
learning_rate=0.1,
|
||||
importance_type="weight").fit(X, y)
|
||||
exp = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.00833333, 0.,
|
||||
0., 0., 0., 0., 0., 0., 0., 0.025, 0.14166667, 0., 0., 0.,
|
||||
0., 0., 0., 0.00833333, 0.25833333, 0., 0., 0., 0.,
|
||||
@@ -223,16 +227,22 @@ def test_feature_importances_weight():
|
||||
import pandas as pd
|
||||
y = pd.Series(digits['target'])
|
||||
X = pd.DataFrame(digits['data'])
|
||||
xgb_model = xgb.XGBClassifier(random_state=0,
|
||||
tree_method="exact",
|
||||
learning_rate=0.1,
|
||||
importance_type="weight").fit(X, y)
|
||||
xgb_model = xgb.XGBClassifier(
|
||||
random_state=0,
|
||||
tree_method="exact",
|
||||
learning_rate=0.1,
|
||||
base_score=.5,
|
||||
importance_type="weight"
|
||||
).fit(X, y)
|
||||
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
|
||||
|
||||
xgb_model = xgb.XGBClassifier(random_state=0,
|
||||
tree_method="exact",
|
||||
learning_rate=0.1,
|
||||
importance_type="weight").fit(X, y)
|
||||
xgb_model = xgb.XGBClassifier(
|
||||
random_state=0,
|
||||
tree_method="exact",
|
||||
learning_rate=0.1,
|
||||
importance_type="weight",
|
||||
base_score=.5,
|
||||
).fit(X, y)
|
||||
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
@@ -274,6 +284,7 @@ def test_feature_importances_gain():
|
||||
random_state=0, tree_method="exact",
|
||||
learning_rate=0.1,
|
||||
importance_type="gain",
|
||||
base_score=0.5,
|
||||
).fit(X, y)
|
||||
|
||||
exp = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
|
||||
@@ -296,6 +307,7 @@ def test_feature_importances_gain():
|
||||
tree_method="exact",
|
||||
learning_rate=0.1,
|
||||
importance_type="gain",
|
||||
base_score=0.5,
|
||||
).fit(X, y)
|
||||
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
|
||||
|
||||
@@ -304,6 +316,7 @@ def test_feature_importances_gain():
|
||||
tree_method="exact",
|
||||
learning_rate=0.1,
|
||||
importance_type="gain",
|
||||
base_score=0.5,
|
||||
).fit(X, y)
|
||||
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
|
||||
|
||||
@@ -593,18 +606,21 @@ def test_split_value_histograms():
|
||||
|
||||
digits_2class = load_digits(n_class=2)
|
||||
|
||||
X = digits_2class['data']
|
||||
y = digits_2class['target']
|
||||
X = digits_2class["data"]
|
||||
y = digits_2class["target"]
|
||||
|
||||
dm = xgb.DMatrix(X, label=y)
|
||||
params = {'max_depth': 6, 'eta': 0.01, 'verbosity': 0,
|
||||
'objective': 'binary:logistic'}
|
||||
params = {
|
||||
"max_depth": 6,
|
||||
"eta": 0.01,
|
||||
"verbosity": 0,
|
||||
"objective": "binary:logistic",
|
||||
"base_score": 0.5,
|
||||
}
|
||||
|
||||
gbdt = xgb.train(params, dm, num_boost_round=10)
|
||||
assert gbdt.get_split_value_histogram("not_there",
|
||||
as_pandas=True).shape[0] == 0
|
||||
assert gbdt.get_split_value_histogram("not_there",
|
||||
as_pandas=False).shape[0] == 0
|
||||
assert gbdt.get_split_value_histogram("not_there", as_pandas=True).shape[0] == 0
|
||||
assert gbdt.get_split_value_histogram("not_there", as_pandas=False).shape[0] == 0
|
||||
assert gbdt.get_split_value_histogram("f28", bins=0).shape[0] == 1
|
||||
assert gbdt.get_split_value_histogram("f28", bins=1).shape[0] == 1
|
||||
assert gbdt.get_split_value_histogram("f28", bins=2).shape[0] == 2
|
||||
@@ -748,11 +764,7 @@ def test_sklearn_get_default_params():
|
||||
cls = xgb.XGBClassifier()
|
||||
assert cls.get_params()["base_score"] is None
|
||||
cls.fit(X[:4, ...], y[:4, ...])
|
||||
base_score = float(
|
||||
json.loads(cls.get_booster().save_config())["learner"]["learner_model_param"][
|
||||
"base_score"
|
||||
]
|
||||
)
|
||||
base_score = get_basescore(cls)
|
||||
np.testing.assert_equal(base_score, 0.5)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user