[dask, sklearn] Fix predict proba. (#6566)

* For sklearn: - Handles user defined objective function. - Handles `softmax`. * For dask: - Use the implementation from sklearn, the previous implementation doesn't perform any extra handling.
2021-01-05 08:29:06 +08:00
parent 516a93d25c
commit 60cfd14349
5 changed files with 74 additions and 9 deletions
--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@@ -160,7 +160,7 @@ def test_boost_from_prediction(tree_method: str) -> None:
                tree_method=tree_method,
            )
            model_0.fit(X=X_, y=y_)
-            margin = model_0.predict_proba(X_, output_margin=True)
+            margin = model_0.predict(X_, output_margin=True)

            model_1 = xgb.dask.DaskXGBClassifier(
                learning_rate=0.3,
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -79,6 +79,18 @@ def test_multiclass_classification():
        check_pred(preds3, labels, output_margin=True)
        check_pred(preds4, labels, output_margin=False)

+    cls = xgb.XGBClassifier(n_estimators=4).fit(X, y)
+    assert cls.n_classes_ == 3
+    proba = cls.predict_proba(X)
+    assert proba.shape[0] == X.shape[0]
+    assert proba.shape[1] == cls.n_classes_
+
+    # custom objective, the default is multi:softprob so no transformation is required.
+    cls = xgb.XGBClassifier(n_estimators=4, objective=tm.softprob_obj(3)).fit(X, y)
+    proba = cls.predict_proba(X)
+    assert proba.shape[0] == X.shape[0]
+    assert proba.shape[1] == cls.n_classes_
+

 def test_ranking():
    # generate random data
@@ -788,6 +800,11 @@ def test_save_load_model():
        booster.save_model(model_path)
        cls = xgb.XGBClassifier()
        cls.load_model(model_path)
+
+        proba = cls.predict_proba(X)
+        assert proba.shape[0] == X.shape[0]
+        assert proba.shape[1] == 2  # binary
+
        predt_1 = cls.predict_proba(X)[:, 1]
        assert np.allclose(predt_0, predt_1)

--- a/tests/python/testing.py
+++ b/tests/python/testing.py
@@ -253,6 +253,34 @@ def eval_error_metric(predt, dtrain: xgb.DMatrix):
    return 'CustomErr', np.sum(r)


+def softmax(x):
+    e = np.exp(x)
+    return e / np.sum(e)
+
+
+def softprob_obj(classes):
+    def objective(labels, predt):
+        rows = labels.shape[0]
+        grad = np.zeros((rows, classes), dtype=float)
+        hess = np.zeros((rows, classes), dtype=float)
+        eps = 1e-6
+        for r in range(predt.shape[0]):
+            target = labels[r]
+            p = softmax(predt[r, :])
+            for c in range(predt.shape[1]):
+                assert target >= 0 or target <= classes
+                g = p[c] - 1.0 if c == target else p[c]
+                h = max((2.0 * p[c] * (1.0 - p[c])).item(), eps)
+                grad[r, c] = g
+                hess[r, c] = h
+
+        grad = grad.reshape((rows * classes, 1))
+        hess = hess.reshape((rows * classes, 1))
+        return grad, hess
+
+    return objective
+
+
 class DirectoryExcursion:
    def __init__(self, path: os.PathLike, cleanup=False):
        '''Change directory.  Change back and optionally cleaning up the directory when exit.