Support categorical data with pandas Dataframe in inplace prediction (#7322)

This commit is contained in:
Jiaming Yuan
2021-10-17 14:32:06 +08:00
committed by GitHub
parent 8e619010d0
commit f56e2e9a66
4 changed files with 40 additions and 21 deletions

View File

@@ -44,9 +44,12 @@ def test_num_parallel_tree():
@pytest.mark.skipif(**tm.no_pandas())
@pytest.mark.skipif(**tm.no_cudf())
@pytest.mark.skipif(**tm.no_sklearn())
def test_categorical():
import pandas as pd
import cudf
import cupy as cp
from sklearn.datasets import load_svmlight_file
data_dir = os.path.join(tm.PROJECT_ROOT, "demo", "data")
@@ -59,7 +62,6 @@ def test_categorical():
)
X = pd.DataFrame(X.todense()).astype("category")
clf.fit(X, y)
assert not clf._can_use_inplace_predict()
with tempfile.TemporaryDirectory() as tempdir:
model = os.path.join(tempdir, "categorial.json")
@@ -74,3 +76,25 @@ def test_categorical():
)
assert categories_sizes.shape[0] != 0
np.testing.assert_allclose(categories_sizes, 1)
def check_predt(X, y):
reg = xgb.XGBRegressor(
tree_method="gpu_hist", enable_categorical=True, n_estimators=64
)
reg.fit(X, y)
predts = reg.predict(X)
booster = reg.get_booster()
assert "c" in booster.feature_types
assert len(booster.feature_types) == 1
inp_predts = booster.inplace_predict(X)
if isinstance(inp_predts, cp.ndarray):
inp_predts = cp.asnumpy(inp_predts)
np.testing.assert_allclose(predts, inp_predts)
y = [1, 2, 3]
X = pd.DataFrame({"f0": ["a", "b", "c"]})
X["f0"] = X["f0"].astype("category")
check_predt(X, y)
X = cudf.DataFrame(X)
check_predt(X, y)