Jiaming Yuan 546de5efd2
[pyspark] Cleanup data processing. (#8088)
- Use numpy stack for handling list of arrays.
- Reuse concat function from dask.
- Prepare for `QuantileDMatrix`.
- Remove unused code.
- Use iterator for prediction to avoid initializing xgboost model
2022-07-26 15:00:52 +08:00

24 lines
542 B
Python

import sys
from typing import List
import numpy as np
import pandas as pd
import pytest
sys.path.append("tests/python")
import testing as tm
if tm.no_spark()["condition"]:
pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
if sys.platform.startswith("win") or sys.platform.startswith("darwin"):
pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True)
from test_spark.test_data import run_dmatrix_ctor
@pytest.mark.skipif(**tm.no_cudf())
def test_qdm_ctor() -> None:
run_dmatrix_ctor(True)