Support categorical data with pandas Dataframe in inplace prediction (#7322)

2021-10-17 14:32:06 +08:00
parent 8e619010d0
commit f56e2e9a66
4 changed files with 40 additions and 21 deletions
--- a/tests/python-gpu/test_gpu_with_sklearn.py
+++ b/tests/python-gpu/test_gpu_with_sklearn.py
@@ -44,9 +44,12 @@ def test_num_parallel_tree():


@pytest.mark.skipif(**tm.no_pandas())
+@pytest.mark.skipif(**tm.no_cudf())
@pytest.mark.skipif(**tm.no_sklearn())
 def test_categorical():
    import pandas as pd
+    import cudf
+    import cupy as cp
    from sklearn.datasets import load_svmlight_file

    data_dir = os.path.join(tm.PROJECT_ROOT, "demo", "data")
@@ -59,7 +62,6 @@ def test_categorical():
    )
    X = pd.DataFrame(X.todense()).astype("category")
    clf.fit(X, y)
-    assert not clf._can_use_inplace_predict()

    with tempfile.TemporaryDirectory() as tempdir:
        model = os.path.join(tempdir, "categorial.json")
@@ -74,3 +76,25 @@ def test_categorical():
            )
            assert categories_sizes.shape[0] != 0
            np.testing.assert_allclose(categories_sizes, 1)
+
+    def check_predt(X, y):
+        reg = xgb.XGBRegressor(
+            tree_method="gpu_hist", enable_categorical=True, n_estimators=64
+        )
+        reg.fit(X, y)
+        predts = reg.predict(X)
+        booster = reg.get_booster()
+        assert "c" in booster.feature_types
+        assert len(booster.feature_types) == 1
+        inp_predts = booster.inplace_predict(X)
+        if isinstance(inp_predts, cp.ndarray):
+            inp_predts = cp.asnumpy(inp_predts)
+        np.testing.assert_allclose(predts, inp_predts)
+
+    y = [1, 2, 3]
+    X = pd.DataFrame({"f0": ["a", "b", "c"]})
+    X["f0"] = X["f0"].astype("category")
+    check_predt(X, y)
+
+    X = cudf.DataFrame(X)
+    check_predt(X, y)