Define the new device parameter. (#9362)

2023-07-13 19:30:25 +08:00
parent 2d0cd2817e
commit 04aff3af8e
63 changed files with 827 additions and 477 deletions
--- a/tests/python-gpu/test_gpu_prediction.py
+++ b/tests/python-gpu/test_gpu_prediction.py
@@ -39,7 +39,8 @@ predict_parameter_strategy = strategies.fixed_dictionaries(
    }
 )

-pytestmark = tm.timeout(20)
+# cupy nvrtc compilation can take a long time for the first run
+pytestmark = tm.timeout(30)


 class TestGPUPredict:
@@ -71,8 +72,8 @@ class TestGPUPredict:
                param = {
                    "objective": "binary:logistic",
                    "eval_metric": "logloss",
-                    "tree_method": "gpu_hist",
-                    "gpu_id": 0,
+                    "tree_method": "hist",
+                    "device": "gpu:0",
                    "max_depth": 1,
                }
                bst = xgb.train(
@@ -84,7 +85,7 @@ class TestGPUPredict:
                gpu_pred_test = bst.predict(dtest, output_margin=True)
                gpu_pred_val = bst.predict(dval, output_margin=True)

-                bst.set_param({"gpu_id": -1, "tree_method": "hist"})
+                bst.set_param({"device": "cpu", "tree_method": "hist"})
                bst_cpu = copy(bst)
                cpu_pred_train = bst_cpu.predict(dtrain, output_margin=True)
                cpu_pred_test = bst_cpu.predict(dtest, output_margin=True)
@@ -107,14 +108,15 @@ class TestGPUPredict:
        dtrain = xgb.DMatrix(X_train, label=y_train)

        params = {}
-        params["tree_method"] = "gpu_hist"
+        params["tree_method"] = "hist"
+        params["device"] = "cuda:0"
        bst = xgb.train(params, dtrain)

-        tm.set_ordinal(0, bst)
+        bst.set_param({"device": "cuda:0"})
        # Don't reuse the DMatrix for prediction, otherwise the result is cached.
        predict_gpu_0 = bst.predict(xgb.DMatrix(X_test))
        predict_gpu_1 = bst.predict(xgb.DMatrix(X_test))
-        tm.set_ordinal(-1, bst)
+        bst.set_param({"device": "cpu"})
        predict_cpu = bst.predict(xgb.DMatrix(X_test))

        assert np.allclose(predict_gpu_0, predict_gpu_1)
@@ -131,8 +133,8 @@ class TestGPUPredict:
        X_test, y_test = X[tr_size:, :], y[tr_size:]

        params = {
-            "tree_method": "gpu_hist",
-            "gpu_id": "0",
+            "tree_method": "hist",
+            "device": "cuda:0",
            "n_jobs": -1,
            "seed": 123,
        }
@@ -141,13 +143,54 @@ class TestGPUPredict:
        gpu_test_score = m.score(X_test, y_test)

        # Now with cpu
-        m = tm.set_ordinal(-1, m)
+        m.set_params(device="cpu")
        cpu_train_score = m.score(X_train, y_train)
        cpu_test_score = m.score(X_test, y_test)

        assert np.allclose(cpu_train_score, gpu_train_score)
        assert np.allclose(cpu_test_score, gpu_test_score)

+    @pytest.mark.parametrize("device", ["cpu", "cuda"])
+    @pytest.mark.skipif(**tm.no_cupy())
+    def test_inplace_predict_device_type(self, device: str) -> None:
+        """Test inplace predict with different device and data types.
+
+        The sklearn interface uses inplace predict by default and gbtree fallbacks to
+        DMatrix whenever device doesn't match. This test checks that XGBoost can handle
+        different combinations of device and input data type.
+
+        """
+        import cudf
+        import cupy as cp
+        import pandas as pd
+        from scipy.sparse import csr_matrix
+
+        reg = xgb.XGBRegressor(tree_method="hist", device=device)
+        n_samples = 4096
+        n_features = 13
+        X, y, w = tm.make_regression(n_samples, n_features, use_cupy=True)
+        X[X == 0.0] = 1.0
+
+        reg.fit(X, y, sample_weight=w)
+        predt_0 = reg.predict(X)
+
+        X = cp.asnumpy(X)
+        predt_1 = reg.predict(X)
+
+        df = pd.DataFrame(X)
+        predt_2 = reg.predict(df)
+
+        df = cudf.DataFrame(X)
+        predt_3 = reg.predict(df)
+
+        X_csr = csr_matrix(X)
+        predt_4 = reg.predict(X_csr)
+
+        np.testing.assert_allclose(predt_0, predt_1)
+        np.testing.assert_allclose(predt_0, predt_2)
+        np.testing.assert_allclose(predt_0, predt_3)
+        np.testing.assert_allclose(predt_0, predt_4)
+
    def run_inplace_base_margin(self, booster, dtrain, X, base_margin):
        import cupy as cp

@@ -175,7 +218,9 @@ class TestGPUPredict:
        dtrain = xgb.DMatrix(X, y)

        booster = xgb.train(
-            {"tree_method": "gpu_hist", "gpu_id": device}, dtrain, num_boost_round=10
+            {"tree_method": "hist", "device": f"cuda:{device}"},
+            dtrain,
+            num_boost_round=10,
        )

        test = xgb.DMatrix(X[:10, ...], missing=missing)
@@ -208,13 +253,13 @@ class TestGPUPredict:
        missing_idx = [i for i in range(0, X.shape[1], 16)]
        X[:, missing_idx] = missing
        reg = xgb.XGBRegressor(
-            tree_method="gpu_hist", n_estimators=8, missing=missing, gpu_id=device
+            tree_method="hist", n_estimators=8, missing=missing, device=f"cuda:{device}"
        )
        reg.fit(X, y)

-        reg = tm.set_ordinal(device, reg)
+        reg.set_params(device=f"cuda:{device}")
        gpu_predt = reg.predict(X)
-        reg = tm.set_ordinal(-1, reg)
+        reg = reg.set_params(device="cpu")
        cpu_predt = reg.predict(cp.asnumpy(X))
        np.testing.assert_allclose(gpu_predt, cpu_predt, atol=1e-6)
        cp.cuda.runtime.setDevice(0)
@@ -250,7 +295,9 @@ class TestGPUPredict:

        dtrain = xgb.DMatrix(X, y)

-        booster = xgb.train({"tree_method": "gpu_hist"}, dtrain, num_boost_round=10)
+        booster = xgb.train(
+            {"tree_method": "hist", "device": "cuda:0"}, dtrain, num_boost_round=10
+        )
        test = xgb.DMatrix(X)
        predt_from_array = booster.inplace_predict(X)
        predt_from_dmatrix = booster.predict(test)
@@ -280,12 +327,12 @@ class TestGPUPredict:
    def test_shap(self, num_rounds, dataset, param):
        if dataset.name.endswith("-l1"):  # not supported by the exact tree method
            return
-        param.update({"tree_method": "gpu_hist", "gpu_id": 0})
+        param.update({"tree_method": "hist", "device": "gpu:0"})
        param = dataset.set_params(param)
        dmat = dataset.get_dmat()
        bst = xgb.train(param, dmat, num_rounds)
        test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin)
-        bst = tm.set_ordinal(0, bst)
+        bst.set_param({"device": "gpu:0"})
        shap = bst.predict(test_dmat, pred_contribs=True)
        margin = bst.predict(test_dmat, output_margin=True)
        assume(len(dataset.y) > 0)
@@ -298,12 +345,12 @@ class TestGPUPredict:
    def test_shap_interactions(self, num_rounds, dataset, param):
        if dataset.name.endswith("-l1"):  # not supported by the exact tree method
            return
-        param.update({"tree_method": "hist", "gpu_id": 0})
+        param.update({"tree_method": "hist", "device": "cuda:0"})
        param = dataset.set_params(param)
        dmat = dataset.get_dmat()
        bst = xgb.train(param, dmat, num_rounds)
        test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin)
-        bst = tm.set_ordinal(0, bst)
+        bst.set_param({"device": "cuda:0"})
        shap = bst.predict(test_dmat, pred_interactions=True)
        margin = bst.predict(test_dmat, output_margin=True)
        assume(len(dataset.y) > 0)
@@ -317,16 +364,18 @@ class TestGPUPredict:
    def test_shap_categorical(self):
        X, y = tm.make_categorical(100, 20, 7, False)
        Xy = xgb.DMatrix(X, y, enable_categorical=True)
-        booster = xgb.train({"tree_method": "gpu_hist"}, Xy, num_boost_round=10)
+        booster = xgb.train(
+            {"tree_method": "hist", "device": "gpu:0"}, Xy, num_boost_round=10
+        )

-        booster = tm.set_ordinal(0, booster)
+        booster.set_param({"device": "cuda:0"})
        shap = booster.predict(Xy, pred_contribs=True)
        margin = booster.predict(Xy, output_margin=True)
        np.testing.assert_allclose(
            np.sum(shap, axis=len(shap.shape) - 1), margin, rtol=1e-3
        )

-        booster = tm.set_ordinal(-1, booster)
+        booster.set_param({"device": "cpu"})
        shap = booster.predict(Xy, pred_contribs=True)
        margin = booster.predict(Xy, output_margin=True)
        np.testing.assert_allclose(
@@ -334,8 +383,8 @@ class TestGPUPredict:
        )

    def test_predict_leaf_basic(self):
-        gpu_leaf = run_predict_leaf(0)
-        cpu_leaf = run_predict_leaf(-1)
+        gpu_leaf = run_predict_leaf("gpu:0")
+        cpu_leaf = run_predict_leaf("cpu")
        np.testing.assert_equal(gpu_leaf, cpu_leaf)

    def run_predict_leaf_booster(self, param, num_rounds, dataset):
@@ -344,23 +393,22 @@ class TestGPUPredict:
        booster = xgb.train(
            param, dtrain=dataset.get_dmat(), num_boost_round=num_rounds
        )
-        booster = tm.set_ordinal(-1, booster)
+        booster.set_param({"device": "cpu"})
        cpu_leaf = booster.predict(m, pred_leaf=True)

-        booster = tm.set_ordinal(0, booster)
+        booster.set_param({"device": "cuda:0"})
        gpu_leaf = booster.predict(m, pred_leaf=True)

        np.testing.assert_equal(cpu_leaf, gpu_leaf)

    @given(predict_parameter_strategy, tm.make_dataset_strategy())
    @settings(deadline=None, max_examples=20, print_blob=True)
-    def test_predict_leaf_gbtree(self, param, dataset):
+    def test_predict_leaf_gbtree(self, param: dict, dataset: tm.TestDataset) -> None:
        # Unsupported for random forest
        if param.get("num_parallel_tree", 1) > 1 and dataset.name.endswith("-l1"):
            return

-        param["booster"] = "gbtree"
-        param["tree_method"] = "gpu_hist"
+        param.update({"booster": "gbtree", "tree_method": "hist", "device": "cuda:0"})
        self.run_predict_leaf_booster(param, 10, dataset)

    @given(predict_parameter_strategy, tm.make_dataset_strategy())
@@ -370,8 +418,7 @@ class TestGPUPredict:
        if param.get("num_parallel_tree", 1) > 1 and dataset.name.endswith("-l1"):
            return

-        param["booster"] = "dart"
-        param["tree_method"] = "gpu_hist"
+        param.update({"booster": "dart", "tree_method": "hist", "device": "cuda:0"})
        self.run_predict_leaf_booster(param, 10, dataset)

    @pytest.mark.skipif(**tm.no_sklearn())
@@ -395,12 +442,12 @@ class TestGPUPredict:
        dtrain = xgb.DMatrix(df, label=y, enable_categorical=True)

        params = {
-            "tree_method": "gpu_hist",
+            "tree_method": "hist",
            "max_depth": 3,
            "learning_rate": 1.0,
            "base_score": 0.0,
            "eval_metric": "rmse",
-            "gpu_id": "0",
+            "device": "cuda:0",
        }

        eval_history = {}
@@ -412,7 +459,7 @@ class TestGPUPredict:
            verbose_eval=False,
            evals_result=eval_history,
        )
-        bst = tm.set_ordinal(0, bst)
+        bst.set_param({"device": "cuda:0"})
        pred = bst.predict(dtrain)
        rmse = mean_squared_error(y_true=y, y_pred=pred, squared=False)
        np.testing.assert_almost_equal(
@@ -434,14 +481,16 @@ class TestGPUPredict:
        Xy = xgb.DMatrix(X, y)
        if n_classes == 2:
            params = {
-                "tree_method": "gpu_hist",
+                "tree_method": "hist",
+                "device": "cuda:0",
                "booster": "dart",
                "rate_drop": 0.5,
                "objective": "binary:logistic",
            }
        else:
            params = {
-                "tree_method": "gpu_hist",
+                "tree_method": "hist",
+                "device": "cuda:0",
                "booster": "dart",
                "rate_drop": 0.5,
                "objective": "multi:softprob",
@@ -455,7 +504,7 @@ class TestGPUPredict:
        copied = booster.predict(Xy)

        # CPU
-        booster = tm.set_ordinal(-1, booster)
+        booster.set_param({"device": "cpu"})
        cpu_inplace = booster.inplace_predict(X_)
        cpu_copied = booster.predict(Xy)

@@ -465,7 +514,7 @@ class TestGPUPredict:
        cp.testing.assert_allclose(inplace, copied, atol=1e-6)

        # GPU
-        booster = tm.set_ordinal(0, booster)
+        booster.set_param({"device": "cuda:0"})
        inplace = booster.inplace_predict(X)
        copied = booster.predict(Xy)

@@ -482,7 +531,7 @@ class TestGPUPredict:
        orig = rng.randint(low=0, high=127, size=rows * cols).reshape(rows, cols)
        y = rng.randint(low=0, high=127, size=rows)
        dtrain = xgb.DMatrix(orig, label=y)
-        booster = xgb.train({"tree_method": "gpu_hist"}, dtrain)
+        booster = xgb.train({"tree_method": "hist", "device": "cuda:0"}, dtrain)

        predt_orig = booster.inplace_predict(orig)
        # all primitive types in numpy