[breaking] Remove the predictor param, allow fallback to prediction using DMatrix. (#9129)

- A `DeviceOrd` struct is implemented to indicate the device. It will eventually replace the `gpu_id` parameter.
- The `predictor` parameter is removed.
- Fallback to `DMatrix` when `inplace_predict` is not available.
- The heuristic for choosing a predictor is only used during training.
This commit is contained in:
Jiaming Yuan
2023-07-03 19:23:54 +08:00
committed by GitHub
parent 3a0f787703
commit 39390cc2ee
54 changed files with 1049 additions and 778 deletions

View File

@@ -2187,20 +2187,25 @@ class Booster:
base_margin: Any = None,
strict_shape: bool = False,
) -> NumpyOrCupy:
"""Run prediction in-place, Unlike :py:meth:`predict` method, inplace prediction
does not cache the prediction result.
"""Run prediction in-place when possible, Unlike :py:meth:`predict` method,
inplace prediction does not cache the prediction result.
Calling only ``inplace_predict`` in multiple threads is safe and lock
free. But the safety does not hold when used in conjunction with other
methods. E.g. you can't train the booster in one thread and perform
prediction in the other.
.. note::
If the device ordinal of the input data doesn't match the one configured for
the booster, data will be copied to the booster device.
.. code-block:: python
booster.set_param({"predictor": "gpu_predictor"})
booster.set_param({"gpu_id": "0", "tree_method": "gpu_hist"})
booster.inplace_predict(cupy_array)
booster.set_param({"predictor": "cpu_predictor"})
booster.set_param({"gpu_id": "-1", "tree_method": "hist"})
booster.inplace_predict(numpy_array)
.. versionadded:: 1.1.0
@@ -2208,9 +2213,7 @@ class Booster:
Parameters
----------
data :
The input data, must not be a view for numpy array. Set
``predictor`` to ``gpu_predictor`` for running prediction on CuPy
array or CuDF DataFrame.
The input data.
iteration_range :
See :py:meth:`predict` for details.
predict_type :

View File

@@ -277,9 +277,6 @@ __model_doc = f"""
Device ordinal.
validate_parameters : Optional[bool]
Give warnings for unknown parameter.
predictor : Optional[str]
Force XGBoost to use specific predictor, available choices are [cpu_predictor,
gpu_predictor].
enable_categorical : bool
.. versionadded:: 1.5.0
@@ -652,7 +649,6 @@ class XGBModel(XGBModelBase):
importance_type: Optional[str] = None,
gpu_id: Optional[int] = None,
validate_parameters: Optional[bool] = None,
predictor: Optional[str] = None,
enable_categorical: bool = False,
feature_types: Optional[FeatureTypes] = None,
max_cat_to_onehot: Optional[int] = None,
@@ -699,7 +695,6 @@ class XGBModel(XGBModelBase):
self.importance_type = importance_type
self.gpu_id = gpu_id
self.validate_parameters = validate_parameters
self.predictor = predictor
self.enable_categorical = enable_categorical
self.feature_types = feature_types
self.max_cat_to_onehot = max_cat_to_onehot
@@ -1093,12 +1088,7 @@ class XGBModel(XGBModelBase):
return self
def _can_use_inplace_predict(self) -> bool:
# When predictor is explicitly set, using `inplace_predict` might result into
# error with incompatible data type.
# Inplace predict doesn't handle as many data types as DMatrix, but it's
# sufficient for dask interface where input is simpiler.
predictor = self.get_xgb_params().get("predictor", None)
if predictor in ("auto", None) and self.booster != "gblinear":
if self.booster != "gblinear":
return True
return False
@@ -1124,9 +1114,9 @@ class XGBModel(XGBModelBase):
iteration_range: Optional[Tuple[int, int]] = None,
) -> ArrayLike:
"""Predict with `X`. If the model is trained with early stopping, then
:py:attr:`best_iteration` is used automatically. For tree models, when data is
on GPU, like cupy array or cuDF dataframe and `predictor` is not specified, the
prediction is run on GPU automatically, otherwise it will run on CPU.
:py:attr:`best_iteration` is used automatically. The estimator uses
`inplace_predict` by default and falls back to using :py:class:`DMatrix` if
devices between the data and the estimator don't match.
.. note:: This function is only thread safe for `gbtree` and `dart`.
@@ -1588,7 +1578,9 @@ class XGBClassifier(XGBModel, XGBClassifierMixIn, XGBClassifierBase):
) -> np.ndarray:
"""Predict the probability of each `X` example being of a given class. If the
model is trained with early stopping, then :py:attr:`best_iteration` is used
automatically.
automatically. The estimator uses `inplace_predict` by default and falls back to
using :py:class:`DMatrix` if devices between the data and the estimator don't
match.
.. note:: This function is only thread safe for `gbtree` and `dart`.

View File

@@ -25,6 +25,7 @@ from typing import (
Set,
Tuple,
TypedDict,
TypeVar,
Union,
)
@@ -711,6 +712,27 @@ def predictor_equal(lhs: xgb.DMatrix, rhs: xgb.DMatrix) -> bool:
)
M = TypeVar("M", xgb.Booster, xgb.XGBModel)
def set_ordinal(ordinal: int, booster: M) -> M:
"""Temporary solution for setting the device ordinal until we move away from
`gpu_id`.
"""
if ordinal < 0:
params = {"gpu_id": -1, "tree_method": "hist"}
else:
params = {"gpu_id": ordinal, "tree_method": "gpu_hist"}
if isinstance(booster, xgb.Booster):
booster.set_param(params)
elif isinstance(booster, xgb.XGBModel):
booster.set_params(**params)
return booster
def eval_error_metric(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, np.float64]:
"""Evaluation metric for xgb.train"""
label = dtrain.get_label()