merge latest, Jan 12 2024

This commit is contained in:
Hui Liu
2024-01-12 09:57:11 -08:00
251 changed files with 9023 additions and 5012 deletions

View File

@@ -5,9 +5,13 @@ import pytest
from xgboost import testing as tm
pytestmark = [
pytest.mark.skipif(**tm.no_dask()),
pytest.mark.skipif(**tm.no_dask_cuda()),
tm.timeout(60),
]
@pytest.mark.skipif(**tm.no_dask())
@pytest.mark.skipif(**tm.no_dask_cuda())
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.mgpu
def test_dask_training():
@@ -16,8 +20,6 @@ def test_dask_training():
subprocess.check_call(cmd)
@pytest.mark.skipif(**tm.no_dask_cuda())
@pytest.mark.skipif(**tm.no_dask())
@pytest.mark.mgpu
def test_dask_sklearn_demo():
script = os.path.join(tm.demo_dir(__file__), "dask", "sklearn_gpu_training.py")

View File

@@ -1,4 +1,4 @@
"""Copyright 2019-2022 XGBoost contributors"""
"""Copyright 2019-2023, XGBoost contributors"""
import asyncio
import json
from collections import OrderedDict
@@ -18,6 +18,7 @@ from xgboost.testing.params import hist_parameter_strategy
pytestmark = [
pytest.mark.skipif(**tm.no_dask()),
pytest.mark.skipif(**tm.no_dask_cuda()),
tm.timeout(60),
]
from ..test_with_dask.test_with_dask import generate_array
@@ -629,6 +630,7 @@ def test_nccl_load(local_cuda_client: Client, tree_method: str) -> None:
def run(wid: int) -> None:
# FIXME(jiamingy): https://github.com/dmlc/xgboost/issues/9147
from xgboost.core import _LIB, _register_log_callback
_register_log_callback(_LIB)
with CommunicatorContext(**args):

View File

@@ -2,7 +2,10 @@ import pytest
from xgboost import testing as tm
pytestmark = pytest.mark.skipif(**tm.no_spark())
pytestmark = [
pytest.mark.skipif(**tm.no_spark()),
tm.timeout(120),
]
from ..test_with_spark.test_data import run_dmatrix_ctor

View File

@@ -8,7 +8,10 @@ import sklearn
from xgboost import testing as tm
pytestmark = pytest.mark.skipif(**tm.no_spark())
pytestmark = [
pytest.mark.skipif(**tm.no_spark()),
tm.timeout(240),
]
from pyspark.ml.linalg import Vectors
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder

View File

@@ -1590,7 +1590,7 @@ class TestWithDask:
@given(
params=hist_parameter_strategy,
cache_param=hist_cache_strategy,
dataset=tm.make_dataset_strategy()
dataset=tm.make_dataset_strategy(),
)
@settings(
deadline=None, max_examples=10, suppress_health_check=suppress, print_blob=True
@@ -2250,16 +2250,27 @@ class TestDaskCallbacks:
],
)
for i in range(1, 10):
assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".json"))
assert os.path.exists(
os.path.join(
tmpdir,
f"model_{i}.{xgb.callback.TrainingCheckPoint.default_format}",
)
)
@gen_cluster(client=True, clean_kwargs={"processes": False, "threads": False}, allow_unclosed=True)
@gen_cluster(
client=True,
clean_kwargs={"processes": False, "threads": False},
allow_unclosed=True,
)
async def test_worker_left(c, s, a, b):
async with Worker(s.address):
dx = da.random.random((1000, 10)).rechunk(chunks=(10, None))
dy = da.random.random((1000,)).rechunk(chunks=(10,))
d_train = await xgb.dask.DaskDMatrix(
c, dx, dy,
c,
dx,
dy,
)
await async_poll_for(lambda: len(s.workers) == 2, timeout=5)
with pytest.raises(RuntimeError, match="Missing"):
@@ -2271,12 +2282,19 @@ async def test_worker_left(c, s, a, b):
)
@gen_cluster(client=True, Worker=Nanny, clean_kwargs={"processes": False, "threads": False}, allow_unclosed=True)
@gen_cluster(
client=True,
Worker=Nanny,
clean_kwargs={"processes": False, "threads": False},
allow_unclosed=True,
)
async def test_worker_restarted(c, s, a, b):
dx = da.random.random((1000, 10)).rechunk(chunks=(10, None))
dy = da.random.random((1000,)).rechunk(chunks=(10,))
d_train = await xgb.dask.DaskDMatrix(
c, dx, dy,
c,
dx,
dy,
)
await c.restart_workers([a.worker_address])
with pytest.raises(RuntimeError, match="Missing"):