From 2c8d735cb31cdbc237128fee322b6699171268ec Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 11 Apr 2023 00:17:34 +0800 Subject: [PATCH] Fix tests with pandas 2.0. (#9014) * Fix tests with pandas 2.0. - `is_categorical` is replaced by `is_categorical_dtype`. - one hot encoding returns boolean type instead of integer type. --- tests/python/test_basic_models.py | 2 +- tests/python/test_with_pandas.py | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/python/test_basic_models.py b/tests/python/test_basic_models.py index f9d6f37e1..d76205593 100644 --- a/tests/python/test_basic_models.py +++ b/tests/python/test_basic_models.py @@ -664,7 +664,7 @@ class TestModels: y = rng.randn(rows) feature_names = ["test_feature_" + str(i) for i in range(cols)] X_pd = pd.DataFrame(X, columns=feature_names) - X_pd.iloc[:, 3] = X_pd.iloc[:, 3].astype(np.int32) + X_pd[f"test_feature_{3}"] = X_pd.iloc[:, 3].astype(np.int32) Xy = xgb.DMatrix(X_pd, y) assert Xy.feature_types[3] == "int" diff --git a/tests/python/test_with_pandas.py b/tests/python/test_with_pandas.py index e5783b24d..07295eb6c 100644 --- a/tests/python/test_with_pandas.py +++ b/tests/python/test_with_pandas.py @@ -77,7 +77,10 @@ class TestPandas: np.testing.assert_array_equal(result, exp) dm = xgb.DMatrix(dummies) assert dm.feature_names == ['B', 'A_X', 'A_Y', 'A_Z'] - assert dm.feature_types == ['int', 'int', 'int', 'int'] + if int(pd.__version__[0]) >= 2: + assert dm.feature_types == ['int', 'i', 'i', 'i'] + else: + assert dm.feature_types == ['int', 'int', 'int', 'int'] assert dm.num_row() == 3 assert dm.num_col() == 4 @@ -298,14 +301,14 @@ class TestPandas: @pytest.mark.parametrize("DMatrixT", [xgb.DMatrix, xgb.QuantileDMatrix]) def test_nullable_type(self, DMatrixT) -> None: - from pandas.api.types import is_categorical + from pandas.api.types import is_categorical_dtype for orig, df in pd_dtypes(): if hasattr(df.dtypes, "__iter__"): - enable_categorical = any(is_categorical for dtype in df.dtypes) + enable_categorical = any(is_categorical_dtype for dtype in df.dtypes) else: # series - enable_categorical = is_categorical(df.dtype) + enable_categorical = is_categorical_dtype(df.dtype) f0_orig = orig[orig.columns[0]] if isinstance(orig, pd.DataFrame) else orig f0 = df[df.columns[0]] if isinstance(df, pd.DataFrame) else df