[breaking] Remove the predictor param, allow fallback to prediction using DMatrix. (#9129)

- A `DeviceOrd` struct is implemented to indicate the device. It will eventually replace the `gpu_id` parameter. - The `predictor` parameter is removed. - Fallback to `DMatrix` when `inplace_predict` is not available. - The heuristic for choosing a predictor is only used during training.
2023-07-03 19:23:54 +08:00
parent 3a0f787703
commit 39390cc2ee
54 changed files with 1049 additions and 778 deletions
--- a/tests/python/test_predict.py
+++ b/tests/python/test_predict.py
@@ -28,7 +28,7 @@ def run_threaded_predict(X, rows, predict_func):
        assert f.result()


-def run_predict_leaf(predictor):
+def run_predict_leaf(gpu_id: int) -> np.ndarray:
    rows = 100
    cols = 4
    classes = 5
@@ -42,13 +42,13 @@ def run_predict_leaf(predictor):
        {
            "num_parallel_tree": num_parallel_tree,
            "num_class": classes,
-            "predictor": predictor,
            "tree_method": "hist",
        },
        m,
        num_boost_round=num_boost_round,
    )

+    booster = tm.set_ordinal(gpu_id, booster)
    empty = xgb.DMatrix(np.ones(shape=(0, cols)))
    empty_leaf = booster.predict(empty, pred_leaf=True)
    assert empty_leaf.shape[0] == 0
@@ -74,13 +74,14 @@ def run_predict_leaf(predictor):

    # When there's only 1 tree, the output is a 1 dim vector
    booster = xgb.train({"tree_method": "hist"}, num_boost_round=1, dtrain=m)
+    booster = tm.set_ordinal(gpu_id, booster)
    assert booster.predict(m, pred_leaf=True).shape == (rows,)

    return leaf


-def test_predict_leaf():
-    run_predict_leaf("cpu_predictor")
+def test_predict_leaf() -> None:
+    run_predict_leaf(-1)


 def test_predict_shape():
--- a/tests/python/test_updaters.py
+++ b/tests/python/test_updaters.py
@@ -274,7 +274,7 @@ class TestTreeMethod:
    ) -> None:
        parameters: Dict[str, Any] = {"tree_method": tree_method}
        cat, label = tm.make_categorical(
-            n_samples=rows, n_features=cols, n_categories=cats, onehot=False, sparsity=0.5
+            rows, n_features=cols, n_categories=cats, onehot=False, sparsity=0.5
        )
        Xy = xgb.DMatrix(cat, label, enable_categorical=True)

@@ -294,7 +294,9 @@ class TestTreeMethod:
            y_predt = booster.predict(Xy)

            rmse = tm.root_mean_square(label, y_predt)
-            np.testing.assert_allclose(rmse, evals_result["Train"]["rmse"][-1])
+            np.testing.assert_allclose(
+                rmse, evals_result["Train"]["rmse"][-1], rtol=2e-5
+            )

        # Test with OHE split
        run(self.USE_ONEHOT)
@@ -311,10 +313,8 @@ class TestTreeMethod:
        by_etl_results: Dict[str, Dict[str, List[float]]] = {}
        by_builtin_results: Dict[str, Dict[str, List[float]]] = {}

-        predictor = "gpu_predictor" if tree_method == "gpu_hist" else None
        parameters: Dict[str, Any] = {
            "tree_method": tree_method,
-            "predictor": predictor,
            # Use one-hot exclusively
            "max_cat_to_onehot": self.USE_ONEHOT
        }
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -1418,23 +1418,6 @@ def test_categorical():
    np.testing.assert_allclose(predt_cat, predt_enc)


-def test_prediction_config():
-    reg = xgb.XGBRegressor()
-    assert reg._can_use_inplace_predict() is True
-
-    reg.set_params(predictor="cpu_predictor")
-    assert reg._can_use_inplace_predict() is False
-
-    reg.set_params(predictor="auto")
-    assert reg._can_use_inplace_predict() is True
-
-    reg.set_params(predictor=None)
-    assert reg._can_use_inplace_predict() is True
-
-    reg.set_params(booster="gblinear")
-    assert reg._can_use_inplace_predict() is False
-
-
 def test_evaluation_metric():
    from sklearn.datasets import load_diabetes, load_digits
    from sklearn.metrics import mean_absolute_error