Use matrix for gradient. (#9508)

- Use the `linalg::Matrix` for storing gradients.
- New API for the custom objective.
- Custom objective for multi-class/multi-target is now required to return the correct shape.
- Custom objective for Python can accept arrays with any strides. (row-major, column-major)
This commit is contained in:
Jiaming Yuan
2023-08-24 05:29:52 +08:00
committed by GitHub
parent 6103dca0bb
commit 972730cde0
77 changed files with 1052 additions and 651 deletions

View File

@@ -2053,12 +2053,14 @@ class Booster:
else:
pred = self.predict(dtrain, output_margin=True, training=True)
grad, hess = fobj(pred, dtrain)
self.boost(dtrain, grad, hess)
self.boost(dtrain, iteration=iteration, grad=grad, hess=hess)
def boost(self, dtrain: DMatrix, grad: np.ndarray, hess: np.ndarray) -> None:
"""Boost the booster for one iteration, with customized gradient
statistics. Like :py:func:`xgboost.Booster.update`, this
function should not be called directly by users.
def boost(
self, dtrain: DMatrix, iteration: int, grad: NumpyOrCupy, hess: NumpyOrCupy
) -> None:
"""Boost the booster for one iteration with customized gradient statistics.
Like :py:func:`xgboost.Booster.update`, this function should not be called
directly by users.
Parameters
----------
@@ -2070,19 +2072,53 @@ class Booster:
The second order of gradient.
"""
if len(grad) != len(hess):
raise ValueError(f"grad / hess length mismatch: {len(grad)} / {len(hess)}")
if not isinstance(dtrain, DMatrix):
raise TypeError(f"invalid training matrix: {type(dtrain).__name__}")
from .data import (
_array_interface,
_cuda_array_interface,
_ensure_np_dtype,
_is_cupy_array,
)
self._assign_dmatrix_features(dtrain)
def is_flatten(array: NumpyOrCupy) -> bool:
return len(array.shape) == 1 or array.shape[1] == 1
def array_interface(array: NumpyOrCupy) -> bytes:
# Can we check for __array_interface__ instead of a specific type instead?
msg = (
"Expecting `np.ndarray` or `cupy.ndarray` for gradient and hessian."
f" Got: {type(array)}"
)
if not isinstance(array, np.ndarray) and not _is_cupy_array(array):
raise TypeError(msg)
n_samples = dtrain.num_row()
if array.shape[0] != n_samples and is_flatten(array):
warnings.warn(
"Since 2.1.0, the shape of the gradient and hessian is required to"
" be (n_samples, n_targets) or (n_samples, n_classes).",
FutureWarning,
)
array = array.reshape(n_samples, array.size // n_samples)
if isinstance(array, np.ndarray):
array, _ = _ensure_np_dtype(array, array.dtype)
interface = _array_interface(array)
elif _is_cupy_array(array):
interface = _cuda_array_interface(array)
else:
raise TypeError(msg)
return interface
_check_call(
_LIB.XGBoosterBoostOneIter(
_LIB.XGBoosterTrainOneIter(
self.handle,
dtrain.handle,
c_array(ctypes.c_float, grad),
c_array(ctypes.c_float, hess),
c_bst_ulong(len(grad)),
iteration,
array_interface(grad),
array_interface(hess),
)
)

View File

@@ -763,13 +763,31 @@ def softmax(x: np.ndarray) -> np.ndarray:
return e / np.sum(e)
def softprob_obj(classes: int) -> SklObjective:
def softprob_obj(
classes: int, use_cupy: bool = False, order: str = "C", gdtype: str = "float32"
) -> SklObjective:
"""Custom softprob objective for testing.
Parameters
----------
use_cupy :
Whether the objective should return cupy arrays.
order :
The order of gradient matrices. "C" or "F".
gdtype :
DType for gradient. Hessian is not set. This is for testing asymmetric types.
"""
if use_cupy:
import cupy as backend
else:
backend = np
def objective(
labels: np.ndarray, predt: np.ndarray
) -> Tuple[np.ndarray, np.ndarray]:
labels: backend.ndarray, predt: backend.ndarray
) -> Tuple[backend.ndarray, backend.ndarray]:
rows = labels.shape[0]
grad = np.zeros((rows, classes), dtype=float)
hess = np.zeros((rows, classes), dtype=float)
grad = backend.zeros((rows, classes), dtype=np.float32)
hess = backend.zeros((rows, classes), dtype=np.float32)
eps = 1e-6
for r in range(predt.shape[0]):
target = labels[r]
@@ -781,8 +799,10 @@ def softprob_obj(classes: int) -> SklObjective:
grad[r, c] = g
hess[r, c] = h
grad = grad.reshape((rows * classes, 1))
hess = hess.reshape((rows * classes, 1))
grad = grad.reshape((rows, classes))
hess = hess.reshape((rows, classes))
grad = backend.require(grad, requirements=order, dtype=gdtype)
hess = backend.require(hess, requirements=order)
return grad, hess
return objective

View File

@@ -178,7 +178,7 @@ def train(
for i in range(start_iteration, num_boost_round):
if cb_container.before_iteration(bst, i, dtrain, evals):
break
bst.update(dtrain, i, obj)
bst.update(dtrain, iteration=i, fobj=obj)
if cb_container.after_iteration(bst, i, dtrain, evals):
break