Support categorical data with pandas Dataframe in inplace prediction (#7322)
This commit is contained in:
@@ -44,9 +44,12 @@ def test_num_parallel_tree():
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_pandas())
|
||||
@pytest.mark.skipif(**tm.no_cudf())
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_categorical():
|
||||
import pandas as pd
|
||||
import cudf
|
||||
import cupy as cp
|
||||
from sklearn.datasets import load_svmlight_file
|
||||
|
||||
data_dir = os.path.join(tm.PROJECT_ROOT, "demo", "data")
|
||||
@@ -59,7 +62,6 @@ def test_categorical():
|
||||
)
|
||||
X = pd.DataFrame(X.todense()).astype("category")
|
||||
clf.fit(X, y)
|
||||
assert not clf._can_use_inplace_predict()
|
||||
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
model = os.path.join(tempdir, "categorial.json")
|
||||
@@ -74,3 +76,25 @@ def test_categorical():
|
||||
)
|
||||
assert categories_sizes.shape[0] != 0
|
||||
np.testing.assert_allclose(categories_sizes, 1)
|
||||
|
||||
def check_predt(X, y):
|
||||
reg = xgb.XGBRegressor(
|
||||
tree_method="gpu_hist", enable_categorical=True, n_estimators=64
|
||||
)
|
||||
reg.fit(X, y)
|
||||
predts = reg.predict(X)
|
||||
booster = reg.get_booster()
|
||||
assert "c" in booster.feature_types
|
||||
assert len(booster.feature_types) == 1
|
||||
inp_predts = booster.inplace_predict(X)
|
||||
if isinstance(inp_predts, cp.ndarray):
|
||||
inp_predts = cp.asnumpy(inp_predts)
|
||||
np.testing.assert_allclose(predts, inp_predts)
|
||||
|
||||
y = [1, 2, 3]
|
||||
X = pd.DataFrame({"f0": ["a", "b", "c"]})
|
||||
X["f0"] = X["f0"].astype("category")
|
||||
check_predt(X, y)
|
||||
|
||||
X = cudf.DataFrame(X)
|
||||
check_predt(X, y)
|
||||
|
||||
Reference in New Issue
Block a user