[breaking] Remove the predictor param, allow fallback to prediction using DMatrix. (#9129)

- A `DeviceOrd` struct is implemented to indicate the device. It will eventually replace the `gpu_id` parameter. - The `predictor` parameter is removed. - Fallback to `DMatrix` when `inplace_predict` is not available. - The heuristic for choosing a predictor is only used during training.
2023-07-03 19:23:54 +08:00
parent 3a0f787703
commit 39390cc2ee
54 changed files with 1049 additions and 778 deletions
--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@@ -2187,20 +2187,25 @@ class Booster:
        base_margin: Any = None,
        strict_shape: bool = False,
    ) -> NumpyOrCupy:
-        """Run prediction in-place, Unlike :py:meth:`predict` method, inplace prediction
-        does not cache the prediction result.
+        """Run prediction in-place when possible, Unlike :py:meth:`predict` method,
+        inplace prediction does not cache the prediction result.

        Calling only ``inplace_predict`` in multiple threads is safe and lock
        free.  But the safety does not hold when used in conjunction with other
        methods. E.g. you can't train the booster in one thread and perform
        prediction in the other.

+        .. note::
+
+            If the device ordinal of the input data doesn't match the one configured for
+            the booster, data will be copied to the booster device.
+
        .. code-block:: python

-            booster.set_param({"predictor": "gpu_predictor"})
+            booster.set_param({"gpu_id": "0", "tree_method": "gpu_hist"})
            booster.inplace_predict(cupy_array)

-            booster.set_param({"predictor": "cpu_predictor"})
+            booster.set_param({"gpu_id": "-1", "tree_method": "hist"})
            booster.inplace_predict(numpy_array)

        .. versionadded:: 1.1.0
@@ -2208,9 +2213,7 @@ class Booster:
        Parameters
        ----------
        data :
-            The input data, must not be a view for numpy array.  Set
-            ``predictor`` to ``gpu_predictor`` for running prediction on CuPy
-            array or CuDF DataFrame.
+            The input data.
        iteration_range :
            See :py:meth:`predict` for details.
        predict_type :
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -277,9 +277,6 @@ __model_doc = f"""
        Device ordinal.
    validate_parameters : Optional[bool]
        Give warnings for unknown parameter.
-    predictor : Optional[str]
-        Force XGBoost to use specific predictor, available choices are [cpu_predictor,
-        gpu_predictor].
    enable_categorical : bool

        .. versionadded:: 1.5.0
@@ -652,7 +649,6 @@ class XGBModel(XGBModelBase):
        importance_type: Optional[str] = None,
        gpu_id: Optional[int] = None,
        validate_parameters: Optional[bool] = None,
-        predictor: Optional[str] = None,
        enable_categorical: bool = False,
        feature_types: Optional[FeatureTypes] = None,
        max_cat_to_onehot: Optional[int] = None,
@@ -699,7 +695,6 @@ class XGBModel(XGBModelBase):
        self.importance_type = importance_type
        self.gpu_id = gpu_id
        self.validate_parameters = validate_parameters
-        self.predictor = predictor
        self.enable_categorical = enable_categorical
        self.feature_types = feature_types
        self.max_cat_to_onehot = max_cat_to_onehot
@@ -1093,12 +1088,7 @@ class XGBModel(XGBModelBase):
            return self

    def _can_use_inplace_predict(self) -> bool:
-        # When predictor is explicitly set, using `inplace_predict` might result into
-        # error with incompatible data type.
-        # Inplace predict doesn't handle as many data types as DMatrix, but it's
-        # sufficient for dask interface where input is simpiler.
-        predictor = self.get_xgb_params().get("predictor", None)
-        if predictor in ("auto", None) and self.booster != "gblinear":
+        if self.booster != "gblinear":
            return True
        return False

@@ -1124,9 +1114,9 @@ class XGBModel(XGBModelBase):
        iteration_range: Optional[Tuple[int, int]] = None,
    ) -> ArrayLike:
        """Predict with `X`.  If the model is trained with early stopping, then
-        :py:attr:`best_iteration` is used automatically.  For tree models, when data is
-        on GPU, like cupy array or cuDF dataframe and `predictor` is not specified, the
-        prediction is run on GPU automatically, otherwise it will run on CPU.
+        :py:attr:`best_iteration` is used automatically. The estimator uses
+        `inplace_predict` by default and falls back to using :py:class:`DMatrix` if
+        devices between the data and the estimator don't match.

        .. note:: This function is only thread safe for `gbtree` and `dart`.

@@ -1588,7 +1578,9 @@ class XGBClassifier(XGBModel, XGBClassifierMixIn, XGBClassifierBase):
    ) -> np.ndarray:
        """Predict the probability of each `X` example being of a given class. If the
        model is trained with early stopping, then :py:attr:`best_iteration` is used
-        automatically.
+        automatically. The estimator uses `inplace_predict` by default and falls back to
+        using :py:class:`DMatrix` if devices between the data and the estimator don't
+        match.

        .. note:: This function is only thread safe for `gbtree` and `dart`.

--- a/python-package/xgboost/testing/init.py
+++ b/python-package/xgboost/testing/init.py
@@ -25,6 +25,7 @@ from typing import (
    Set,
    Tuple,
    TypedDict,
+    TypeVar,
    Union,
 )

@@ -711,6 +712,27 @@ def predictor_equal(lhs: xgb.DMatrix, rhs: xgb.DMatrix) -> bool:
    )


+M = TypeVar("M", xgb.Booster, xgb.XGBModel)
+
+
+def set_ordinal(ordinal: int, booster: M) -> M:
+    """Temporary solution for setting the device ordinal until we move away from
+    `gpu_id`.
+
+    """
+    if ordinal < 0:
+        params = {"gpu_id": -1, "tree_method": "hist"}
+    else:
+        params = {"gpu_id": ordinal, "tree_method": "gpu_hist"}
+
+    if isinstance(booster, xgb.Booster):
+        booster.set_param(params)
+    elif isinstance(booster, xgb.XGBModel):
+        booster.set_params(**params)
+
+    return booster
+
+
 def eval_error_metric(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, np.float64]:
    """Evaluation metric for xgb.train"""
    label = dtrain.get_label()