Use matrix for gradient. (#9508)

- Use the `linalg::Matrix` for storing gradients. - New API for the custom objective. - Custom objective for multi-class/multi-target is now required to return the correct shape. - Custom objective for Python can accept arrays with any strides. (row-major, column-major)
2023-08-24 05:29:52 +08:00
parent 6103dca0bb
commit 972730cde0
77 changed files with 1052 additions and 651 deletions
--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@@ -2053,12 +2053,14 @@ class Booster:
        else:
            pred = self.predict(dtrain, output_margin=True, training=True)
            grad, hess = fobj(pred, dtrain)
-            self.boost(dtrain, grad, hess)
+            self.boost(dtrain, iteration=iteration, grad=grad, hess=hess)

-    def boost(self, dtrain: DMatrix, grad: np.ndarray, hess: np.ndarray) -> None:
-        """Boost the booster for one iteration, with customized gradient
-        statistics.  Like :py:func:`xgboost.Booster.update`, this
-        function should not be called directly by users.
+    def boost(
+        self, dtrain: DMatrix, iteration: int, grad: NumpyOrCupy, hess: NumpyOrCupy
+    ) -> None:
+        """Boost the booster for one iteration with customized gradient statistics.
+        Like :py:func:`xgboost.Booster.update`, this function should not be called
+        directly by users.

        Parameters
        ----------
@@ -2070,19 +2072,53 @@ class Booster:
            The second order of gradient.

        """
-        if len(grad) != len(hess):
-            raise ValueError(f"grad / hess length mismatch: {len(grad)} / {len(hess)}")
-        if not isinstance(dtrain, DMatrix):
-            raise TypeError(f"invalid training matrix: {type(dtrain).__name__}")
+        from .data import (
+            _array_interface,
+            _cuda_array_interface,
+            _ensure_np_dtype,
+            _is_cupy_array,
+        )
+
        self._assign_dmatrix_features(dtrain)

+        def is_flatten(array: NumpyOrCupy) -> bool:
+            return len(array.shape) == 1 or array.shape[1] == 1
+
+        def array_interface(array: NumpyOrCupy) -> bytes:
+            # Can we check for __array_interface__ instead of a specific type instead?
+            msg = (
+                "Expecting `np.ndarray` or `cupy.ndarray` for gradient and hessian."
+                f" Got: {type(array)}"
+            )
+            if not isinstance(array, np.ndarray) and not _is_cupy_array(array):
+                raise TypeError(msg)
+
+            n_samples = dtrain.num_row()
+            if array.shape[0] != n_samples and is_flatten(array):
+                warnings.warn(
+                    "Since 2.1.0, the shape of the gradient and hessian is required to"
+                    " be (n_samples, n_targets) or (n_samples, n_classes).",
+                    FutureWarning,
+                )
+                array = array.reshape(n_samples, array.size // n_samples)
+
+            if isinstance(array, np.ndarray):
+                array, _ = _ensure_np_dtype(array, array.dtype)
+                interface = _array_interface(array)
+            elif _is_cupy_array(array):
+                interface = _cuda_array_interface(array)
+            else:
+                raise TypeError(msg)
+
+            return interface
+
        _check_call(
-            _LIB.XGBoosterBoostOneIter(
+            _LIB.XGBoosterTrainOneIter(
                self.handle,
                dtrain.handle,
-                c_array(ctypes.c_float, grad),
-                c_array(ctypes.c_float, hess),
-                c_bst_ulong(len(grad)),
+                iteration,
+                array_interface(grad),
+                array_interface(hess),
            )
        )

--- a/python-package/xgboost/testing/init.py
+++ b/python-package/xgboost/testing/init.py
@@ -763,13 +763,31 @@ def softmax(x: np.ndarray) -> np.ndarray:
    return e / np.sum(e)


-def softprob_obj(classes: int) -> SklObjective:
+def softprob_obj(
+    classes: int, use_cupy: bool = False, order: str = "C", gdtype: str = "float32"
+) -> SklObjective:
+    """Custom softprob objective for testing.
+
+    Parameters
+    ----------
+    use_cupy :
+        Whether the objective should return cupy arrays.
+    order :
+        The order of gradient matrices. "C" or "F".
+    gdtype :
+        DType for gradient. Hessian is not set. This is for testing asymmetric types.
+    """
+    if use_cupy:
+        import cupy as backend
+    else:
+        backend = np
+
    def objective(
-        labels: np.ndarray, predt: np.ndarray
-    ) -> Tuple[np.ndarray, np.ndarray]:
+        labels: backend.ndarray, predt: backend.ndarray
+    ) -> Tuple[backend.ndarray, backend.ndarray]:
        rows = labels.shape[0]
-        grad = np.zeros((rows, classes), dtype=float)
-        hess = np.zeros((rows, classes), dtype=float)
+        grad = backend.zeros((rows, classes), dtype=np.float32)
+        hess = backend.zeros((rows, classes), dtype=np.float32)
        eps = 1e-6
        for r in range(predt.shape[0]):
            target = labels[r]
@@ -781,8 +799,10 @@ def softprob_obj(classes: int) -> SklObjective:
                grad[r, c] = g
                hess[r, c] = h

-        grad = grad.reshape((rows * classes, 1))
-        hess = hess.reshape((rows * classes, 1))
+        grad = grad.reshape((rows, classes))
+        hess = hess.reshape((rows, classes))
+        grad = backend.require(grad, requirements=order, dtype=gdtype)
+        hess = backend.require(hess, requirements=order)
        return grad, hess

    return objective
--- a/python-package/xgboost/training.py
+++ b/python-package/xgboost/training.py
@@ -178,7 +178,7 @@ def train(
    for i in range(start_iteration, num_boost_round):
        if cb_container.before_iteration(bst, i, dtrain, evals):
            break
-        bst.update(dtrain, i, obj)
+        bst.update(dtrain, iteration=i, fobj=obj)
        if cb_container.after_iteration(bst, i, dtrain, evals):
            break