* Handle missing values in dataframe with category dtype. (#7331) * Replace -1 in pandas initializer. * Unify `IsValid` functor. * Mimic pandas data handling in cuDF glue code. * Check invalid categories. * Fix DDM sketching. * Fix pick error.
This commit is contained in:
@@ -138,9 +138,22 @@ class TestPandas:
|
||||
X, enable_categorical=True
|
||||
)
|
||||
|
||||
assert np.issubdtype(transformed[:, 0].dtype, np.integer)
|
||||
assert transformed[:, 0].min() == 0
|
||||
|
||||
# test missing value
|
||||
X = pd.DataFrame({"f0": ["a", "b", np.NaN]})
|
||||
X["f0"] = X["f0"].astype("category")
|
||||
arr, _, _ = xgb.data._transform_pandas_df(X, enable_categorical=True)
|
||||
assert not np.any(arr == -1.0)
|
||||
|
||||
X = X["f0"]
|
||||
with pytest.raises(ValueError):
|
||||
xgb.DMatrix(X, y)
|
||||
|
||||
Xy = xgb.DMatrix(X, y, enable_categorical=True)
|
||||
assert Xy.num_row() == 3
|
||||
assert Xy.num_col() == 1
|
||||
|
||||
def test_pandas_sparse(self):
|
||||
import pandas as pd
|
||||
rows = 100
|
||||
|
||||
Reference in New Issue
Block a user