Mitigate flaky data iter test. (#8244)

- Reduce the number of batches.
- Verify labels.
This commit is contained in:
Jiaming Yuan 2022-09-14 17:54:14 +08:00 committed by GitHub
parent bdf265076d
commit 2e63af6117
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 13 additions and 3 deletions

View File

@ -19,7 +19,7 @@ def test_gpu_single_batch() -> None:
@given(
strategies.integers(0, 1024),
strategies.integers(1, 7),
strategies.integers(0, 13),
strategies.integers(0, 8),
strategies.booleans(),
)
@settings(deadline=None, print_blob=True)

View File

@ -92,6 +92,12 @@ def run_data_iterator(
assert non_increasing(results_from_it["Train"]["rmse"])
X, y, w = it.as_arrays()
if use_cupy:
_y = y.get()
else:
_y = y
np.testing.assert_allclose(Xy.get_label(), _y)
Xy = xgb.DMatrix(X, y, weight=w)
assert Xy.num_row() == n_samples_per_batch * n_batches
assert Xy.num_col() == n_features

View File

@ -12,6 +12,7 @@ from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED
import pytest
import gc
import xgboost as xgb
from xgboost.core import ArrayLike
import numpy as np
from scipy import sparse
import platform
@ -212,13 +213,16 @@ class IteratorForTest(xgb.core.DataIter):
def as_arrays(
self,
) -> Tuple[Union[np.ndarray, sparse.csr_matrix], np.ndarray, np.ndarray]:
) -> Tuple[Union[np.ndarray, sparse.csr_matrix], ArrayLike, ArrayLike]:
if isinstance(self.X[0], sparse.csr_matrix):
X = sparse.vstack(self.X, format="csr")
else:
X = np.concatenate(self.X, axis=0)
y = np.concatenate(self.y, axis=0)
if self.w:
w = np.concatenate(self.w, axis=0)
else:
w = None
return X, y, w