Encode pandas categorical data automatically. (#7231)

This commit is contained in:
Jiaming Yuan
2021-09-17 11:09:55 +08:00
committed by GitHub
parent 32e0858501
commit 22d56cebf1
3 changed files with 40 additions and 16 deletions

View File

@@ -130,6 +130,17 @@ class TestPandas:
m = xgb.DMatrix(X, y, enable_categorical=True)
assert m.feature_types[0] == 'c'
X_0 = ["f", "o", "o"]
X_1 = [4, 3, 2]
X = pd.DataFrame({"feat_0": X_0, "feat_1": X_1})
X["feat_0"] = X["feat_0"].astype("category")
transformed, _, feature_types = xgb.data._transform_pandas_df(
X, enable_categorical=True
)
assert np.issubdtype(transformed[:, 0].dtype, np.integer)
assert transformed[:, 0].min() == 0
def test_pandas_sparse(self):
import pandas as pd
rows = 100