Mitigate flaky data iter test. (#8244)

- Reduce the number of batches.
- Verify labels.
This commit is contained in:
Jiaming Yuan 2022-09-14 17:54:14 +08:00 committed by GitHub
parent bdf265076d
commit 2e63af6117
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 13 additions and 3 deletions

View File

@ -19,7 +19,7 @@ def test_gpu_single_batch() -> None:
@given( @given(
strategies.integers(0, 1024), strategies.integers(0, 1024),
strategies.integers(1, 7), strategies.integers(1, 7),
strategies.integers(0, 13), strategies.integers(0, 8),
strategies.booleans(), strategies.booleans(),
) )
@settings(deadline=None, print_blob=True) @settings(deadline=None, print_blob=True)

View File

@ -92,6 +92,12 @@ def run_data_iterator(
assert non_increasing(results_from_it["Train"]["rmse"]) assert non_increasing(results_from_it["Train"]["rmse"])
X, y, w = it.as_arrays() X, y, w = it.as_arrays()
if use_cupy:
_y = y.get()
else:
_y = y
np.testing.assert_allclose(Xy.get_label(), _y)
Xy = xgb.DMatrix(X, y, weight=w) Xy = xgb.DMatrix(X, y, weight=w)
assert Xy.num_row() == n_samples_per_batch * n_batches assert Xy.num_row() == n_samples_per_batch * n_batches
assert Xy.num_col() == n_features assert Xy.num_col() == n_features

View File

@ -12,6 +12,7 @@ from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED
import pytest import pytest
import gc import gc
import xgboost as xgb import xgboost as xgb
from xgboost.core import ArrayLike
import numpy as np import numpy as np
from scipy import sparse from scipy import sparse
import platform import platform
@ -212,13 +213,16 @@ class IteratorForTest(xgb.core.DataIter):
def as_arrays( def as_arrays(
self, self,
) -> Tuple[Union[np.ndarray, sparse.csr_matrix], np.ndarray, np.ndarray]: ) -> Tuple[Union[np.ndarray, sparse.csr_matrix], ArrayLike, ArrayLike]:
if isinstance(self.X[0], sparse.csr_matrix): if isinstance(self.X[0], sparse.csr_matrix):
X = sparse.vstack(self.X, format="csr") X = sparse.vstack(self.X, format="csr")
else: else:
X = np.concatenate(self.X, axis=0) X = np.concatenate(self.X, axis=0)
y = np.concatenate(self.y, axis=0) y = np.concatenate(self.y, axis=0)
w = np.concatenate(self.w, axis=0) if self.w:
w = np.concatenate(self.w, axis=0)
else:
w = None
return X, y, w return X, y, w