Initial support for quantile loss. (#8750)

- Add support for Python.
- Add objective.
This commit is contained in:
Jiaming Yuan
2023-02-16 02:30:18 +08:00
committed by GitHub
parent 282b1729da
commit cce4af4acf
26 changed files with 701 additions and 70 deletions

View File

@@ -1926,6 +1926,8 @@ class Booster:
elif isinstance(params, str) and value is not None:
params = [(params, value)]
for key, val in cast(Iterable[Tuple[str, str]], params):
if isinstance(val, np.ndarray):
val = val.tolist()
if val is not None:
_check_call(
_LIB.XGBoosterSetParam(self.handle, c_str(key), c_str(str(val)))

View File

@@ -1,7 +1,10 @@
"""Tests for updaters."""
import json
from functools import partial, update_wrapper
from typing import Dict
import numpy as np
import xgboost.testing as tm
import xgboost as xgb
@@ -68,3 +71,90 @@ def check_init_estimation(tree_method: str) -> None:
n_samples=4096, n_labels=3, n_classes=5, random_state=17
)
run_clf(X, y)
# pylint: disable=too-many-locals
def check_quantile_loss(tree_method: str, weighted: bool) -> None:
"""Test for quantile loss."""
from sklearn.datasets import make_regression
from sklearn.metrics import mean_pinball_loss
from xgboost.sklearn import _metric_decorator
n_samples = 4096
n_features = 8
n_estimators = 8
# non-zero base score can cause floating point difference with GPU predictor.
# multi-class has small difference than single target in the prediction kernel
base_score = 0.0
rng = np.random.RandomState(1994)
# pylint: disable=unbalanced-tuple-unpacking
X, y = make_regression(
n_samples=n_samples,
n_features=n_features,
random_state=rng,
)
if weighted:
weight = rng.random(size=n_samples)
else:
weight = None
Xy = xgb.QuantileDMatrix(X, y, weight=weight)
alpha = np.array([0.1, 0.5])
evals_result: Dict[str, Dict] = {}
booster_multi = xgb.train(
{
"objective": "reg:quantileerror",
"tree_method": tree_method,
"quantile_alpha": alpha,
"base_score": base_score,
},
Xy,
num_boost_round=n_estimators,
evals=[(Xy, "Train")],
evals_result=evals_result,
)
predt_multi = booster_multi.predict(Xy, strict_shape=True)
assert tm.non_increasing(evals_result["Train"]["quantile"])
assert evals_result["Train"]["quantile"][-1] < 20.0
# check that there's a way to use custom metric and compare the results.
metrics = [
_metric_decorator(
update_wrapper(
partial(mean_pinball_loss, sample_weight=weight, alpha=alpha[i]),
mean_pinball_loss,
)
)
for i in range(alpha.size)
]
predts = np.empty(predt_multi.shape)
for i in range(alpha.shape[0]):
a = alpha[i]
booster_i = xgb.train(
{
"objective": "reg:quantileerror",
"tree_method": tree_method,
"quantile_alpha": a,
"base_score": base_score,
},
Xy,
num_boost_round=n_estimators,
evals=[(Xy, "Train")],
custom_metric=metrics[i],
evals_result=evals_result,
)
assert tm.non_increasing(evals_result["Train"]["quantile"])
assert evals_result["Train"]["quantile"][-1] < 30.0
np.testing.assert_allclose(
np.array(evals_result["Train"]["quantile"]),
np.array(evals_result["Train"]["mean_pinball_loss"]),
atol=1e-6,
rtol=1e-6,
)
predts[:, i] = booster_i.predict(Xy)
for i in range(alpha.shape[0]):
np.testing.assert_allclose(predts[:, i], predt_multi[:, i])