[breaking] Remove the predictor param, allow fallback to prediction using DMatrix. (#9129)
- A `DeviceOrd` struct is implemented to indicate the device. It will eventually replace the `gpu_id` parameter. - The `predictor` parameter is removed. - Fallback to `DMatrix` when `inplace_predict` is not available. - The heuristic for choosing a predictor is only used during training.
This commit is contained in:
@@ -2187,20 +2187,25 @@ class Booster:
|
||||
base_margin: Any = None,
|
||||
strict_shape: bool = False,
|
||||
) -> NumpyOrCupy:
|
||||
"""Run prediction in-place, Unlike :py:meth:`predict` method, inplace prediction
|
||||
does not cache the prediction result.
|
||||
"""Run prediction in-place when possible, Unlike :py:meth:`predict` method,
|
||||
inplace prediction does not cache the prediction result.
|
||||
|
||||
Calling only ``inplace_predict`` in multiple threads is safe and lock
|
||||
free. But the safety does not hold when used in conjunction with other
|
||||
methods. E.g. you can't train the booster in one thread and perform
|
||||
prediction in the other.
|
||||
|
||||
.. note::
|
||||
|
||||
If the device ordinal of the input data doesn't match the one configured for
|
||||
the booster, data will be copied to the booster device.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
booster.set_param({"predictor": "gpu_predictor"})
|
||||
booster.set_param({"gpu_id": "0", "tree_method": "gpu_hist"})
|
||||
booster.inplace_predict(cupy_array)
|
||||
|
||||
booster.set_param({"predictor": "cpu_predictor"})
|
||||
booster.set_param({"gpu_id": "-1", "tree_method": "hist"})
|
||||
booster.inplace_predict(numpy_array)
|
||||
|
||||
.. versionadded:: 1.1.0
|
||||
@@ -2208,9 +2213,7 @@ class Booster:
|
||||
Parameters
|
||||
----------
|
||||
data :
|
||||
The input data, must not be a view for numpy array. Set
|
||||
``predictor`` to ``gpu_predictor`` for running prediction on CuPy
|
||||
array or CuDF DataFrame.
|
||||
The input data.
|
||||
iteration_range :
|
||||
See :py:meth:`predict` for details.
|
||||
predict_type :
|
||||
|
||||
@@ -277,9 +277,6 @@ __model_doc = f"""
|
||||
Device ordinal.
|
||||
validate_parameters : Optional[bool]
|
||||
Give warnings for unknown parameter.
|
||||
predictor : Optional[str]
|
||||
Force XGBoost to use specific predictor, available choices are [cpu_predictor,
|
||||
gpu_predictor].
|
||||
enable_categorical : bool
|
||||
|
||||
.. versionadded:: 1.5.0
|
||||
@@ -652,7 +649,6 @@ class XGBModel(XGBModelBase):
|
||||
importance_type: Optional[str] = None,
|
||||
gpu_id: Optional[int] = None,
|
||||
validate_parameters: Optional[bool] = None,
|
||||
predictor: Optional[str] = None,
|
||||
enable_categorical: bool = False,
|
||||
feature_types: Optional[FeatureTypes] = None,
|
||||
max_cat_to_onehot: Optional[int] = None,
|
||||
@@ -699,7 +695,6 @@ class XGBModel(XGBModelBase):
|
||||
self.importance_type = importance_type
|
||||
self.gpu_id = gpu_id
|
||||
self.validate_parameters = validate_parameters
|
||||
self.predictor = predictor
|
||||
self.enable_categorical = enable_categorical
|
||||
self.feature_types = feature_types
|
||||
self.max_cat_to_onehot = max_cat_to_onehot
|
||||
@@ -1093,12 +1088,7 @@ class XGBModel(XGBModelBase):
|
||||
return self
|
||||
|
||||
def _can_use_inplace_predict(self) -> bool:
|
||||
# When predictor is explicitly set, using `inplace_predict` might result into
|
||||
# error with incompatible data type.
|
||||
# Inplace predict doesn't handle as many data types as DMatrix, but it's
|
||||
# sufficient for dask interface where input is simpiler.
|
||||
predictor = self.get_xgb_params().get("predictor", None)
|
||||
if predictor in ("auto", None) and self.booster != "gblinear":
|
||||
if self.booster != "gblinear":
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -1124,9 +1114,9 @@ class XGBModel(XGBModelBase):
|
||||
iteration_range: Optional[Tuple[int, int]] = None,
|
||||
) -> ArrayLike:
|
||||
"""Predict with `X`. If the model is trained with early stopping, then
|
||||
:py:attr:`best_iteration` is used automatically. For tree models, when data is
|
||||
on GPU, like cupy array or cuDF dataframe and `predictor` is not specified, the
|
||||
prediction is run on GPU automatically, otherwise it will run on CPU.
|
||||
:py:attr:`best_iteration` is used automatically. The estimator uses
|
||||
`inplace_predict` by default and falls back to using :py:class:`DMatrix` if
|
||||
devices between the data and the estimator don't match.
|
||||
|
||||
.. note:: This function is only thread safe for `gbtree` and `dart`.
|
||||
|
||||
@@ -1588,7 +1578,9 @@ class XGBClassifier(XGBModel, XGBClassifierMixIn, XGBClassifierBase):
|
||||
) -> np.ndarray:
|
||||
"""Predict the probability of each `X` example being of a given class. If the
|
||||
model is trained with early stopping, then :py:attr:`best_iteration` is used
|
||||
automatically.
|
||||
automatically. The estimator uses `inplace_predict` by default and falls back to
|
||||
using :py:class:`DMatrix` if devices between the data and the estimator don't
|
||||
match.
|
||||
|
||||
.. note:: This function is only thread safe for `gbtree` and `dart`.
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ from typing import (
|
||||
Set,
|
||||
Tuple,
|
||||
TypedDict,
|
||||
TypeVar,
|
||||
Union,
|
||||
)
|
||||
|
||||
@@ -711,6 +712,27 @@ def predictor_equal(lhs: xgb.DMatrix, rhs: xgb.DMatrix) -> bool:
|
||||
)
|
||||
|
||||
|
||||
M = TypeVar("M", xgb.Booster, xgb.XGBModel)
|
||||
|
||||
|
||||
def set_ordinal(ordinal: int, booster: M) -> M:
|
||||
"""Temporary solution for setting the device ordinal until we move away from
|
||||
`gpu_id`.
|
||||
|
||||
"""
|
||||
if ordinal < 0:
|
||||
params = {"gpu_id": -1, "tree_method": "hist"}
|
||||
else:
|
||||
params = {"gpu_id": ordinal, "tree_method": "gpu_hist"}
|
||||
|
||||
if isinstance(booster, xgb.Booster):
|
||||
booster.set_param(params)
|
||||
elif isinstance(booster, xgb.XGBModel):
|
||||
booster.set_params(**params)
|
||||
|
||||
return booster
|
||||
|
||||
|
||||
def eval_error_metric(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, np.float64]:
|
||||
"""Evaluation metric for xgb.train"""
|
||||
label = dtrain.get_label()
|
||||
|
||||
Reference in New Issue
Block a user