Fix tests with pandas 2.0. (#9014)

* Fix tests with pandas 2.0.

- `is_categorical` is replaced by `is_categorical_dtype`.
- one hot encoding returns boolean type instead of integer type.
This commit is contained in:
Jiaming Yuan 2023-04-11 00:17:34 +08:00 committed by GitHub
parent ebd64f6e22
commit 2c8d735cb3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 8 additions and 5 deletions

View File

@ -664,7 +664,7 @@ class TestModels:
y = rng.randn(rows)
feature_names = ["test_feature_" + str(i) for i in range(cols)]
X_pd = pd.DataFrame(X, columns=feature_names)
X_pd.iloc[:, 3] = X_pd.iloc[:, 3].astype(np.int32)
X_pd[f"test_feature_{3}"] = X_pd.iloc[:, 3].astype(np.int32)
Xy = xgb.DMatrix(X_pd, y)
assert Xy.feature_types[3] == "int"

View File

@ -77,7 +77,10 @@ class TestPandas:
np.testing.assert_array_equal(result, exp)
dm = xgb.DMatrix(dummies)
assert dm.feature_names == ['B', 'A_X', 'A_Y', 'A_Z']
assert dm.feature_types == ['int', 'int', 'int', 'int']
if int(pd.__version__[0]) >= 2:
assert dm.feature_types == ['int', 'i', 'i', 'i']
else:
assert dm.feature_types == ['int', 'int', 'int', 'int']
assert dm.num_row() == 3
assert dm.num_col() == 4
@ -298,14 +301,14 @@ class TestPandas:
@pytest.mark.parametrize("DMatrixT", [xgb.DMatrix, xgb.QuantileDMatrix])
def test_nullable_type(self, DMatrixT) -> None:
from pandas.api.types import is_categorical
from pandas.api.types import is_categorical_dtype
for orig, df in pd_dtypes():
if hasattr(df.dtypes, "__iter__"):
enable_categorical = any(is_categorical for dtype in df.dtypes)
enable_categorical = any(is_categorical_dtype for dtype in df.dtypes)
else:
# series
enable_categorical = is_categorical(df.dtype)
enable_categorical = is_categorical_dtype(df.dtype)
f0_orig = orig[orig.columns[0]] if isinstance(orig, pd.DataFrame) else orig
f0 = df[df.columns[0]] if isinstance(df, pd.DataFrame) else df