Use matrix for gradient. (#9508)
- Use the `linalg::Matrix` for storing gradients. - New API for the custom objective. - Custom objective for multi-class/multi-target is now required to return the correct shape. - Custom objective for Python can accept arrays with any strides. (row-major, column-major)
This commit is contained in:
@@ -2053,12 +2053,14 @@ class Booster:
|
||||
else:
|
||||
pred = self.predict(dtrain, output_margin=True, training=True)
|
||||
grad, hess = fobj(pred, dtrain)
|
||||
self.boost(dtrain, grad, hess)
|
||||
self.boost(dtrain, iteration=iteration, grad=grad, hess=hess)
|
||||
|
||||
def boost(self, dtrain: DMatrix, grad: np.ndarray, hess: np.ndarray) -> None:
|
||||
"""Boost the booster for one iteration, with customized gradient
|
||||
statistics. Like :py:func:`xgboost.Booster.update`, this
|
||||
function should not be called directly by users.
|
||||
def boost(
|
||||
self, dtrain: DMatrix, iteration: int, grad: NumpyOrCupy, hess: NumpyOrCupy
|
||||
) -> None:
|
||||
"""Boost the booster for one iteration with customized gradient statistics.
|
||||
Like :py:func:`xgboost.Booster.update`, this function should not be called
|
||||
directly by users.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
@@ -2070,19 +2072,53 @@ class Booster:
|
||||
The second order of gradient.
|
||||
|
||||
"""
|
||||
if len(grad) != len(hess):
|
||||
raise ValueError(f"grad / hess length mismatch: {len(grad)} / {len(hess)}")
|
||||
if not isinstance(dtrain, DMatrix):
|
||||
raise TypeError(f"invalid training matrix: {type(dtrain).__name__}")
|
||||
from .data import (
|
||||
_array_interface,
|
||||
_cuda_array_interface,
|
||||
_ensure_np_dtype,
|
||||
_is_cupy_array,
|
||||
)
|
||||
|
||||
self._assign_dmatrix_features(dtrain)
|
||||
|
||||
def is_flatten(array: NumpyOrCupy) -> bool:
|
||||
return len(array.shape) == 1 or array.shape[1] == 1
|
||||
|
||||
def array_interface(array: NumpyOrCupy) -> bytes:
|
||||
# Can we check for __array_interface__ instead of a specific type instead?
|
||||
msg = (
|
||||
"Expecting `np.ndarray` or `cupy.ndarray` for gradient and hessian."
|
||||
f" Got: {type(array)}"
|
||||
)
|
||||
if not isinstance(array, np.ndarray) and not _is_cupy_array(array):
|
||||
raise TypeError(msg)
|
||||
|
||||
n_samples = dtrain.num_row()
|
||||
if array.shape[0] != n_samples and is_flatten(array):
|
||||
warnings.warn(
|
||||
"Since 2.1.0, the shape of the gradient and hessian is required to"
|
||||
" be (n_samples, n_targets) or (n_samples, n_classes).",
|
||||
FutureWarning,
|
||||
)
|
||||
array = array.reshape(n_samples, array.size // n_samples)
|
||||
|
||||
if isinstance(array, np.ndarray):
|
||||
array, _ = _ensure_np_dtype(array, array.dtype)
|
||||
interface = _array_interface(array)
|
||||
elif _is_cupy_array(array):
|
||||
interface = _cuda_array_interface(array)
|
||||
else:
|
||||
raise TypeError(msg)
|
||||
|
||||
return interface
|
||||
|
||||
_check_call(
|
||||
_LIB.XGBoosterBoostOneIter(
|
||||
_LIB.XGBoosterTrainOneIter(
|
||||
self.handle,
|
||||
dtrain.handle,
|
||||
c_array(ctypes.c_float, grad),
|
||||
c_array(ctypes.c_float, hess),
|
||||
c_bst_ulong(len(grad)),
|
||||
iteration,
|
||||
array_interface(grad),
|
||||
array_interface(hess),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@@ -763,13 +763,31 @@ def softmax(x: np.ndarray) -> np.ndarray:
|
||||
return e / np.sum(e)
|
||||
|
||||
|
||||
def softprob_obj(classes: int) -> SklObjective:
|
||||
def softprob_obj(
|
||||
classes: int, use_cupy: bool = False, order: str = "C", gdtype: str = "float32"
|
||||
) -> SklObjective:
|
||||
"""Custom softprob objective for testing.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
use_cupy :
|
||||
Whether the objective should return cupy arrays.
|
||||
order :
|
||||
The order of gradient matrices. "C" or "F".
|
||||
gdtype :
|
||||
DType for gradient. Hessian is not set. This is for testing asymmetric types.
|
||||
"""
|
||||
if use_cupy:
|
||||
import cupy as backend
|
||||
else:
|
||||
backend = np
|
||||
|
||||
def objective(
|
||||
labels: np.ndarray, predt: np.ndarray
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
labels: backend.ndarray, predt: backend.ndarray
|
||||
) -> Tuple[backend.ndarray, backend.ndarray]:
|
||||
rows = labels.shape[0]
|
||||
grad = np.zeros((rows, classes), dtype=float)
|
||||
hess = np.zeros((rows, classes), dtype=float)
|
||||
grad = backend.zeros((rows, classes), dtype=np.float32)
|
||||
hess = backend.zeros((rows, classes), dtype=np.float32)
|
||||
eps = 1e-6
|
||||
for r in range(predt.shape[0]):
|
||||
target = labels[r]
|
||||
@@ -781,8 +799,10 @@ def softprob_obj(classes: int) -> SklObjective:
|
||||
grad[r, c] = g
|
||||
hess[r, c] = h
|
||||
|
||||
grad = grad.reshape((rows * classes, 1))
|
||||
hess = hess.reshape((rows * classes, 1))
|
||||
grad = grad.reshape((rows, classes))
|
||||
hess = hess.reshape((rows, classes))
|
||||
grad = backend.require(grad, requirements=order, dtype=gdtype)
|
||||
hess = backend.require(hess, requirements=order)
|
||||
return grad, hess
|
||||
|
||||
return objective
|
||||
|
||||
@@ -178,7 +178,7 @@ def train(
|
||||
for i in range(start_iteration, num_boost_round):
|
||||
if cb_container.before_iteration(bst, i, dtrain, evals):
|
||||
break
|
||||
bst.update(dtrain, i, obj)
|
||||
bst.update(dtrain, iteration=i, fobj=obj)
|
||||
if cb_container.after_iteration(bst, i, dtrain, evals):
|
||||
break
|
||||
|
||||
|
||||
Reference in New Issue
Block a user