[breaking] Remove the predictor param, allow fallback to prediction using DMatrix. (#9129)

- A `DeviceOrd` struct is implemented to indicate the device. It will eventually replace the `gpu_id` parameter. - The `predictor` parameter is removed. - Fallback to `DMatrix` when `inplace_predict` is not available. - The heuristic for choosing a predictor is only used during training.
2023-07-03 19:23:54 +08:00
parent 3a0f787703
commit 39390cc2ee
54 changed files with 1049 additions and 778 deletions
--- a/tests/python-gpu/test_gpu_pickling.py
+++ b/tests/python-gpu/test_gpu_pickling.py
@@ -1,5 +1,4 @@
-'''Test model IO with pickle.'''
-import json
+"""Test model IO with pickle."""
 import os
 import pickle
 import subprocess
@@ -11,49 +10,48 @@ import xgboost as xgb
 from xgboost import XGBClassifier
 from xgboost import testing as tm

-model_path = './model.pkl'
+model_path = "./model.pkl"

 pytestmark = tm.timeout(30)


 def build_dataset():
    N = 10
-    x = np.linspace(0, N*N, N*N)
+    x = np.linspace(0, N * N, N * N)
    x = x.reshape((N, N))
    y = np.linspace(0, N, N)
    return x, y


 def save_pickle(bst, path):
-    with open(path, 'wb') as fd:
+    with open(path, "wb") as fd:
        pickle.dump(bst, fd)


 def load_pickle(path):
-    with open(path, 'rb') as fd:
+    with open(path, "rb") as fd:
        bst = pickle.load(fd)
    return bst


 class TestPickling:
-    args_template = [
-        "pytest",
-        "--verbose",
-        "-s",
-        "--fulltrace"]
+    args_template = ["pytest", "--verbose", "-s", "--fulltrace"]

    def run_pickling(self, bst) -> None:
        save_pickle(bst, model_path)
        args = [
-            "pytest", "--verbose", "-s", "--fulltrace",
-            "./tests/python-gpu/load_pickle.py::TestLoadPickle::test_load_pkl"
+            "pytest",
+            "--verbose",
+            "-s",
+            "--fulltrace",
+            "./tests/python-gpu/load_pickle.py::TestLoadPickle::test_load_pkl",
        ]
-        command = ''
+        command = ""
        for arg in args:
            command += arg
-            command += ' '
+            command += " "

-        cuda_environment = {'CUDA_VISIBLE_DEVICES': '-1'}
+        cuda_environment = {"CUDA_VISIBLE_DEVICES": "-1"}
        env = os.environ.copy()
        # Passing new_environment directly to `env' argument results
        # in failure on Windows:
@@ -72,7 +70,7 @@ class TestPickling:
        x, y = build_dataset()
        train_x = xgb.DMatrix(x, label=y)

-        param = {'tree_method': 'gpu_hist', "gpu_id": 0}
+        param = {"tree_method": "gpu_hist", "gpu_id": 0}
        bst = xgb.train(param, train_x)
        self.run_pickling(bst)

@@ -91,43 +89,46 @@ class TestPickling:
        X, y = build_dataset()
        dtrain = xgb.DMatrix(X, y)

-        bst = xgb.train({'tree_method': 'gpu_hist',
-                         'gpu_id': 1},
-                        dtrain, num_boost_round=6)
+        bst = xgb.train(
+            {"tree_method": "gpu_hist", "gpu_id": 1}, dtrain, num_boost_round=6
+        )

-        model_path = 'model.pkl'
+        model_path = "model.pkl"
        save_pickle(bst, model_path)
-        cuda_environment = {'CUDA_VISIBLE_DEVICES': '0'}
+        cuda_environment = {"CUDA_VISIBLE_DEVICES": "0"}
        env = os.environ.copy()
        env.update(cuda_environment)
        args = self.args_template.copy()
        args.append(
-            "./tests/python-gpu/"
-            "load_pickle.py::TestLoadPickle::test_wrap_gpu_id"
+            "./tests/python-gpu/" "load_pickle.py::TestLoadPickle::test_wrap_gpu_id"
        )
        status = subprocess.call(args, env=env)
        assert status == 0
        os.remove(model_path)

-    def test_pickled_predictor(self):
-        x, y = build_dataset()
+    def test_pickled_context(self):
+        x, y = tm.make_sparse_regression(10, 10, sparsity=0.8, as_dense=True)
        train_x = xgb.DMatrix(x, label=y)

-        param = {'tree_method': 'gpu_hist',
-                 'verbosity': 1, 'predictor': 'gpu_predictor'}
+        param = {"tree_method": "gpu_hist", "verbosity": 1}
        bst = xgb.train(param, train_x)
-        config = json.loads(bst.save_config())
-        assert config['learner']['gradient_booster']['gbtree_train_param'][
-            'predictor'] == 'gpu_predictor'
+
+        with tm.captured_output() as (out, err):
+            bst.inplace_predict(x)
+
+        # The warning is redirected to Python callback, so it's printed in stdout
+        # instead of stderr.
+        stdout = out.getvalue()
+        assert stdout.find("mismatched devices") != -1

        save_pickle(bst, model_path)

        args = self.args_template.copy()
-        args.append(
-            "./tests/python-gpu/"
-            "load_pickle.py::TestLoadPickle::test_predictor_type_is_auto")
+        root = tm.project_root(__file__)
+        path = os.path.join(root, "tests", "python-gpu", "load_pickle.py")
+        args.append(path + "::TestLoadPickle::test_context_is_removed")

-        cuda_environment = {'CUDA_VISIBLE_DEVICES': '-1'}
+        cuda_environment = {"CUDA_VISIBLE_DEVICES": "-1"}
        env = os.environ.copy()
        env.update(cuda_environment)

@@ -138,25 +139,29 @@ class TestPickling:
        args = self.args_template.copy()
        args.append(
            "./tests/python-gpu/"
-            "load_pickle.py::TestLoadPickle::test_predictor_type_is_gpu")
+            "load_pickle.py::TestLoadPickle::test_context_is_preserved"
+        )

        # Load in environment that has GPU.
        env = os.environ.copy()
-        assert 'CUDA_VISIBLE_DEVICES' not in env.keys()
+        assert "CUDA_VISIBLE_DEVICES" not in env.keys()
        status = subprocess.call(args, env=env)
        assert status == 0

        os.remove(model_path)

    @pytest.mark.skipif(**tm.no_sklearn())
-    def test_predict_sklearn_pickle(self):
+    def test_predict_sklearn_pickle(self) -> None:
        from sklearn.datasets import load_digits
+
        x, y = load_digits(return_X_y=True)

-        kwargs = {'tree_method': 'gpu_hist',
-                  'predictor': 'gpu_predictor',
-                  'objective': 'binary:logistic',
-                  'n_estimators': 10}
+        kwargs = {
+            "tree_method": "gpu_hist",
+            "objective": "binary:logistic",
+            "gpu_id": 0,
+            "n_estimators": 10,
+        }

        model = XGBClassifier(**kwargs)
        model.fit(x, y)
@@ -165,24 +170,25 @@ class TestPickling:
        del model

        # load model
-        model: xgb.XGBClassifier = load_pickle("model.pkl")
+        model = load_pickle("model.pkl")
        os.remove("model.pkl")

        gpu_pred = model.predict(x, output_margin=True)

        # Switch to CPU predictor
        bst = model.get_booster()
-        bst.set_param({'predictor': 'cpu_predictor'})
+        tm.set_ordinal(-1, bst)
        cpu_pred = model.predict(x, output_margin=True)
        np.testing.assert_allclose(cpu_pred, gpu_pred, rtol=1e-5)

    def test_training_on_cpu_only_env(self):
-        cuda_environment = {'CUDA_VISIBLE_DEVICES': '-1'}
+        cuda_environment = {"CUDA_VISIBLE_DEVICES": "-1"}
        env = os.environ.copy()
        env.update(cuda_environment)
        args = self.args_template.copy()
        args.append(
            "./tests/python-gpu/"
-            "load_pickle.py::TestLoadPickle::test_training_on_cpu_only_env")
+            "load_pickle.py::TestLoadPickle::test_training_on_cpu_only_env"
+        )
        status = subprocess.call(args, env=env)
        assert status == 0