Don't shuffle columns in categorical tests. (#8446)

This commit is contained in:
Jiaming Yuan
2022-11-28 20:28:06 +08:00
committed by GitHub
parent 67ea1c3435
commit f2209c1fe4
3 changed files with 30 additions and 15 deletions

View File

@@ -486,6 +486,7 @@ def make_categorical(
onehot: bool,
sparsity: float = 0.0,
cat_ratio: float = 1.0,
shuffle: bool = False,
) -> Tuple[ArrayLike, np.ndarray]:
"""Generate categorical features for test.
@@ -499,6 +500,8 @@ def make_categorical(
The ratio of the amount of missing values over the number of all entries.
cat_ratio:
The ratio of features that are categorical.
shuffle:
Whether we should shuffle the columns.
Returns
-------
@@ -538,10 +541,12 @@ def make_categorical(
if onehot:
df = pd.get_dummies(df)
if shuffle:
columns = list(df.columns)
rng.shuffle(columns)
df = df[columns]
return pd.get_dummies(df), label
return df, label