Support learning rate for zero-hessian objectives. (#8866)

This commit is contained in:
Jiaming Yuan
2023-03-06 20:33:28 +08:00
committed by GitHub
parent 173096a6a7
commit 228a46e8ad
34 changed files with 464 additions and 434 deletions

View File

@@ -1,3 +1,4 @@
import json
import os
import tempfile
from contextlib import nullcontext
@@ -355,47 +356,125 @@ class TestCallbacks:
with warning_check:
xgb.cv(param, dtrain, num_round, callbacks=[scheduler(eta_decay)])
@pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"])
def run_eta_decay_leaf_output(self, tree_method: str, objective: str) -> None:
# check decay has effect on leaf output.
num_round = 4
scheduler = xgb.callback.LearningRateScheduler
dpath = tm.data_dir(__file__)
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train"))
dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test"))
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
param = {
"max_depth": 2,
"objective": objective,
"eval_metric": "error",
"tree_method": tree_method,
}
if objective == "reg:quantileerror":
param["quantile_alpha"] = 0.3
def eta_decay_0(i):
return num_round / (i + 1)
bst0 = xgb.train(
param,
dtrain,
num_round,
watchlist,
callbacks=[scheduler(eta_decay_0)],
)
def eta_decay_1(i: int) -> float:
if i > 1:
return 5.0
return num_round / (i + 1)
bst1 = xgb.train(
param,
dtrain,
num_round,
watchlist,
callbacks=[scheduler(eta_decay_1)],
)
bst_json0 = bst0.save_raw(raw_format="json")
bst_json1 = bst1.save_raw(raw_format="json")
j0 = json.loads(bst_json0)
j1 = json.loads(bst_json1)
tree_2th_0 = j0["learner"]["gradient_booster"]["model"]["trees"][2]
tree_2th_1 = j1["learner"]["gradient_booster"]["model"]["trees"][2]
assert tree_2th_0["base_weights"] == tree_2th_1["base_weights"]
assert tree_2th_0["split_conditions"] == tree_2th_1["split_conditions"]
tree_3th_0 = j0["learner"]["gradient_booster"]["model"]["trees"][3]
tree_3th_1 = j1["learner"]["gradient_booster"]["model"]["trees"][3]
assert tree_3th_0["base_weights"] != tree_3th_1["base_weights"]
assert tree_3th_0["split_conditions"] != tree_3th_1["split_conditions"]
@pytest.mark.parametrize("tree_method", ["hist", "approx", "approx"])
def test_eta_decay(self, tree_method):
self.run_eta_decay(tree_method)
@pytest.mark.parametrize(
"tree_method,objective",
[
("hist", "binary:logistic"),
("hist", "reg:absoluteerror"),
("hist", "reg:quantileerror"),
("approx", "binary:logistic"),
("approx", "reg:absoluteerror"),
("approx", "reg:quantileerror"),
],
)
def test_eta_decay_leaf_output(self, tree_method: str, objective: str) -> None:
self.run_eta_decay_leaf_output(tree_method, objective)
def test_check_point(self):
from sklearn.datasets import load_breast_cancer
X, y = load_breast_cancer(return_X_y=True)
m = xgb.DMatrix(X, y)
with tempfile.TemporaryDirectory() as tmpdir:
check_point = xgb.callback.TrainingCheckPoint(directory=tmpdir,
iterations=1,
name='model')
xgb.train({'objective': 'binary:logistic'}, m,
num_boost_round=10,
verbose_eval=False,
callbacks=[check_point])
check_point = xgb.callback.TrainingCheckPoint(
directory=tmpdir, iterations=1, name="model"
)
xgb.train(
{"objective": "binary:logistic"},
m,
num_boost_round=10,
verbose_eval=False,
callbacks=[check_point],
)
for i in range(1, 10):
assert os.path.exists(
os.path.join(tmpdir, 'model_' + str(i) + '.json'))
assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".json"))
check_point = xgb.callback.TrainingCheckPoint(directory=tmpdir,
iterations=1,
as_pickle=True,
name='model')
xgb.train({'objective': 'binary:logistic'}, m,
num_boost_round=10,
verbose_eval=False,
callbacks=[check_point])
check_point = xgb.callback.TrainingCheckPoint(
directory=tmpdir, iterations=1, as_pickle=True, name="model"
)
xgb.train(
{"objective": "binary:logistic"},
m,
num_boost_round=10,
verbose_eval=False,
callbacks=[check_point],
)
for i in range(1, 10):
assert os.path.exists(
os.path.join(tmpdir, 'model_' + str(i) + '.pkl'))
assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".pkl"))
def test_callback_list(self):
X, y = tm.get_california_housing()
m = xgb.DMatrix(X, y)
callbacks = [xgb.callback.EarlyStopping(rounds=10)]
for i in range(4):
xgb.train({'objective': 'reg:squarederror',
'eval_metric': 'rmse'}, m,
evals=[(m, 'Train')],
num_boost_round=1,
verbose_eval=True,
callbacks=callbacks)
xgb.train(
{"objective": "reg:squarederror", "eval_metric": "rmse"},
m,
evals=[(m, "Train")],
num_boost_round=1,
verbose_eval=True,
callbacks=callbacks,
)
assert len(callbacks) == 1

View File

@@ -51,11 +51,8 @@ class TestPickling:
def test_model_pickling_json(self):
def check(config):
updater = config["learner"]["gradient_booster"]["updater"]
if params["tree_method"] == "exact":
subsample = updater["grow_colmaker"]["train_param"]["subsample"]
else:
subsample = updater["grow_quantile_histmaker"]["train_param"]["subsample"]
tree_param = config["learner"]["gradient_booster"]["tree_train_param"]
subsample = tree_param["subsample"]
assert float(subsample) == 0.5
params = {"nthread": 8, "tree_method": "hist", "subsample": 0.5}

View File

@@ -447,7 +447,8 @@ class TestTreeMethod:
{
"tree_method": tree_method,
"objective": "reg:absoluteerror",
"subsample": 0.8
"subsample": 0.8,
"eta": 1.0,
},
Xy,
num_boost_round=10,

View File

@@ -1018,14 +1018,18 @@ def test_XGBClassifier_resume():
def test_constraint_parameters():
reg = xgb.XGBRegressor(interaction_constraints='[[0, 1], [2, 3, 4]]')
reg = xgb.XGBRegressor(interaction_constraints="[[0, 1], [2, 3, 4]]")
X = np.random.randn(10, 10)
y = np.random.randn(10)
reg.fit(X, y)
config = json.loads(reg.get_booster().save_config())
assert config['learner']['gradient_booster']['updater']['grow_colmaker'][
'train_param']['interaction_constraints'] == '[[0, 1], [2, 3, 4]]'
assert (
config["learner"]["gradient_booster"]["tree_train_param"][
"interaction_constraints"
]
== "[[0, 1], [2, 3, 4]]"
)
def test_parameter_validation():