Initial support for quantile loss. (#8750)

- Add support for Python. - Add objective.
2023-02-16 02:30:18 +08:00
parent 282b1729da
commit cce4af4acf
26 changed files with 701 additions and 70 deletions
--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@@ -1926,6 +1926,8 @@ class Booster:
        elif isinstance(params, str) and value is not None:
            params = [(params, value)]
        for key, val in cast(Iterable[Tuple[str, str]], params):
+            if isinstance(val, np.ndarray):
+                val = val.tolist()
            if val is not None:
                _check_call(
                    _LIB.XGBoosterSetParam(self.handle, c_str(key), c_str(str(val)))
--- a/python-package/xgboost/testing/updater.py
+++ b/python-package/xgboost/testing/updater.py
@@ -1,7 +1,10 @@
 """Tests for updaters."""
 import json
+from functools import partial, update_wrapper
+from typing import Dict

 import numpy as np
+import xgboost.testing as tm

 import xgboost as xgb

@@ -68,3 +71,90 @@ def check_init_estimation(tree_method: str) -> None:
        n_samples=4096, n_labels=3, n_classes=5, random_state=17
    )
    run_clf(X, y)
+
+
+# pylint: disable=too-many-locals
+def check_quantile_loss(tree_method: str, weighted: bool) -> None:
+    """Test for quantile loss."""
+    from sklearn.datasets import make_regression
+    from sklearn.metrics import mean_pinball_loss
+    from xgboost.sklearn import _metric_decorator
+
+    n_samples = 4096
+    n_features = 8
+    n_estimators = 8
+    # non-zero base score can cause floating point difference with GPU predictor.
+    # multi-class has small difference than single target in the prediction kernel
+    base_score = 0.0
+    rng = np.random.RandomState(1994)
+    # pylint: disable=unbalanced-tuple-unpacking
+    X, y = make_regression(
+        n_samples=n_samples,
+        n_features=n_features,
+        random_state=rng,
+    )
+    if weighted:
+        weight = rng.random(size=n_samples)
+    else:
+        weight = None
+
+    Xy = xgb.QuantileDMatrix(X, y, weight=weight)
+
+    alpha = np.array([0.1, 0.5])
+    evals_result: Dict[str, Dict] = {}
+    booster_multi = xgb.train(
+        {
+            "objective": "reg:quantileerror",
+            "tree_method": tree_method,
+            "quantile_alpha": alpha,
+            "base_score": base_score,
+        },
+        Xy,
+        num_boost_round=n_estimators,
+        evals=[(Xy, "Train")],
+        evals_result=evals_result,
+    )
+    predt_multi = booster_multi.predict(Xy, strict_shape=True)
+
+    assert tm.non_increasing(evals_result["Train"]["quantile"])
+    assert evals_result["Train"]["quantile"][-1] < 20.0
+    # check that there's a way to use custom metric and compare the results.
+    metrics = [
+        _metric_decorator(
+            update_wrapper(
+                partial(mean_pinball_loss, sample_weight=weight, alpha=alpha[i]),
+                mean_pinball_loss,
+            )
+        )
+        for i in range(alpha.size)
+    ]
+
+    predts = np.empty(predt_multi.shape)
+    for i in range(alpha.shape[0]):
+        a = alpha[i]
+
+        booster_i = xgb.train(
+            {
+                "objective": "reg:quantileerror",
+                "tree_method": tree_method,
+                "quantile_alpha": a,
+                "base_score": base_score,
+            },
+            Xy,
+            num_boost_round=n_estimators,
+            evals=[(Xy, "Train")],
+            custom_metric=metrics[i],
+            evals_result=evals_result,
+        )
+        assert tm.non_increasing(evals_result["Train"]["quantile"])
+        assert evals_result["Train"]["quantile"][-1] < 30.0
+        np.testing.assert_allclose(
+            np.array(evals_result["Train"]["quantile"]),
+            np.array(evals_result["Train"]["mean_pinball_loss"]),
+            atol=1e-6,
+            rtol=1e-6,
+        )
+        predts[:, i] = booster_i.predict(Xy)
+
+    for i in range(alpha.shape[0]):
+        np.testing.assert_allclose(predts[:, i], predt_multi[:, i])