Reduce warnings and flakiness in tests. (#10659)

- Fix warnings in tests.
- Try to reduce the flakiness of dask test.
This commit is contained in:
Jiaming Yuan
2024-08-03 07:32:47 +08:00
committed by GitHub
parent 2e7ba900ef
commit a185b693dc
4 changed files with 32 additions and 26 deletions

View File

@@ -37,6 +37,7 @@ from scipy import sparse
import xgboost as xgb
from xgboost import RabitTracker
from xgboost.core import ArrayLike
from xgboost.data import is_pd_cat_dtype
from xgboost.sklearn import SklObjective
from xgboost.testing.data import (
get_california_housing,
@@ -403,7 +404,6 @@ def make_categorical(
X, y
"""
import pandas as pd
from pandas.api.types import is_categorical_dtype
rng = np.random.RandomState(1994)
@@ -431,8 +431,8 @@ def make_categorical(
low=0, high=n_samples - 1, size=int(n_samples * sparsity)
)
df.iloc[index, i] = np.nan
if is_categorical_dtype(df.dtypes[i]):
assert n_categories == np.unique(df.dtypes[i].categories).size
if is_pd_cat_dtype(df.dtypes.iloc[i]):
assert n_categories == np.unique(df.dtypes.iloc[i].categories).size
if onehot:
df = pd.get_dummies(df)

View File

@@ -8,6 +8,7 @@ import numpy as np
import xgboost as xgb
import xgboost.testing as tm
from xgboost.data import is_pd_cat_dtype
def get_basescore(model: xgb.XGBModel) -> float:
@@ -166,8 +167,6 @@ def check_cut(
n_entries: int, indptr: np.ndarray, data: np.ndarray, dtypes: Any
) -> None:
"""Check the cut values."""
from pandas.api.types import is_categorical_dtype
assert data.shape[0] == indptr[-1]
assert data.shape[0] == n_entries
@@ -177,18 +176,18 @@ def check_cut(
end = int(indptr[i])
for j in range(beg + 1, end):
assert data[j] > data[j - 1]
if is_categorical_dtype(dtypes[i - 1]):
if is_pd_cat_dtype(dtypes.iloc[i - 1]):
assert data[j] == data[j - 1] + 1
def check_get_quantile_cut_device(tree_method: str, use_cupy: bool) -> None:
"""Check with optional cupy."""
from pandas.api.types import is_categorical_dtype
import pandas as pd
n_samples = 1024
n_features = 14
max_bin = 16
dtypes = [np.float32] * n_features
dtypes = pd.Series([np.float32] * n_features)
# numerical
X, y, w = tm.make_regression(n_samples, n_features, use_cupy=use_cupy)
@@ -237,7 +236,7 @@ def check_get_quantile_cut_device(tree_method: str, use_cupy: bool) -> None:
X, y = tm.make_categorical(
n_samples, n_features, n_categories, False, sparsity=0.8, cat_ratio=0.5
)
n_cat_features = len([0 for dtype in X.dtypes if is_categorical_dtype(dtype)])
n_cat_features = len([0 for dtype in X.dtypes if is_pd_cat_dtype(dtype)])
n_num_features = n_features - n_cat_features
n_entries = n_categories * n_cat_features + (max_bin + 1) * n_num_features
# - qdm