[pyspark] fix empty data issue when constructing DMatrix (#8245)

Co-authored-by: Hyunsu Philip Cho <chohyu01@cs.washington.edu>
This commit is contained in:
Bobby Wang
2022-09-20 16:43:20 +08:00
committed by GitHub
parent 70df36c99c
commit 520586ffa7
5 changed files with 86 additions and 7 deletions

View File

@@ -68,11 +68,11 @@ def run_dmatrix_ctor(is_dqm: bool) -> None:
if is_dqm:
cols = [f"feat-{i}" for i in range(n_features)]
train_Xy, valid_Xy = create_dmatrix_from_partitions(
iter(dfs), cols, 0, kwargs, False
iter(dfs), cols, 0, kwargs, False, True
)
else:
train_Xy, valid_Xy = create_dmatrix_from_partitions(
iter(dfs), None, None, kwargs, False
iter(dfs), None, None, kwargs, False, True
)
assert valid_Xy is not None