[EM] Add basic distributed GPU tests. (#10861)

- Split Hist and Approx tests in unittests. - Basic GPU tests for distributed.
2024-10-01 01:28:43 +08:00
parent 92f1c48a22
commit 9ecb7583e9
4 changed files with 90 additions and 136 deletions
--- a/tests/test_distributed/test_with_dask/test_external_memory.py
+++ b/tests/test_distributed/test_with_dask/test_external_memory.py
@@ -1,77 +1,18 @@
-from typing import List, cast
+"""Copyright 2024, XGBoost contributors"""

-import numpy as np
-from distributed import Client, Scheduler, Worker, get_worker
+import pytest
+from distributed import Client, Scheduler, Worker
 from distributed.utils_test import gen_cluster

 import xgboost as xgb
 from xgboost import testing as tm
-from xgboost.compat import concat
-
-
-def run_external_memory(worker_id: int, n_workers: int, comm_args: dict) -> None:
-    n_samples_per_batch = 32
-    n_features = 4
-    n_batches = 16
-    use_cupy = False
-
-    n_threads = get_worker().state.nthreads
-    with xgb.collective.CommunicatorContext(dmlc_communicator="rabit", **comm_args):
-        it = tm.IteratorForTest(
-            *tm.make_batches(
-                n_samples_per_batch,
-                n_features,
-                n_batches,
-                use_cupy,
-                random_state=worker_id,
-            ),
-            cache="cache",
-        )
-        Xy = xgb.DMatrix(it, nthread=n_threads)
-        results: xgb.callback.TrainingCallback.EvalsLog = {}
-        booster = xgb.train(
-            {"tree_method": "hist", "nthread": n_threads},
-            Xy,
-            evals=[(Xy, "Train")],
-            num_boost_round=32,
-            evals_result=results,
-        )
-        assert tm.non_increasing(cast(List[float], results["Train"]["rmse"]))
-
-    lx, ly, lw = [], [], []
-    for i in range(n_workers):
-        x, y, w = tm.make_batches(
-            n_samples_per_batch,
-            n_features,
-            n_batches,
-            use_cupy,
-            random_state=i,
-        )
-        lx.extend(x)
-        ly.extend(y)
-        lw.extend(w)
-
-    X = concat(lx)
-    yconcat = concat(ly)
-    wconcat = concat(lw)
-    Xy = xgb.DMatrix(X, yconcat, weight=wconcat, nthread=n_threads)
-
-    results_local: xgb.callback.TrainingCallback.EvalsLog = {}
-    booster = xgb.train(
-        {"tree_method": "hist", "nthread": n_threads},
-        Xy,
-        evals=[(Xy, "Train")],
-        num_boost_round=32,
-        evals_result=results_local,
-    )
-    np.testing.assert_allclose(
-        results["Train"]["rmse"], results_local["Train"]["rmse"], rtol=1e-4
-    )
+from xgboost.testing.dask import check_external_memory


+@pytest.mark.parametrize("is_qdm", [True, False])
@gen_cluster(client=True)
 async def test_external_memory(
-    client: Client, s: Scheduler, a: Worker, b: Worker
+    client: Client, s: Scheduler, a: Worker, b: Worker, is_qdm: bool
 ) -> None:
    workers = tm.get_client_workers(client)
    args = await client.sync(
@@ -83,6 +24,11 @@ async def test_external_memory(
    n_workers = len(workers)

    futs = client.map(
-        run_external_memory, range(n_workers), n_workers=n_workers, comm_args=args
+        check_external_memory,
+        range(n_workers),
+        n_workers=n_workers,
+        device="cpu",
+        comm_args=args,
+        is_qdm=is_qdm,
    )
    await client.gather(futs)
--- a/tests/test_distributed/test_with_dask/test_with_dask.py
+++ b/tests/test_distributed/test_with_dask/test_with_dask.py
@@ -7,24 +7,9 @@ import pickle
 import socket
 import tempfile
 from concurrent.futures import ThreadPoolExecutor
-from copy import copy
 from functools import partial
-from itertools import starmap
-from math import ceil
-from operator import attrgetter, getitem
 from pathlib import Path
-from typing import (
-    Any,
-    Dict,
-    Generator,
-    List,
-    Literal,
-    Optional,
-    Tuple,
-    Type,
-    TypeVar,
-    Union,
-)
+from typing import Any, Dict, Generator, Literal, Optional, Tuple, Type, Union

 import hypothesis
 import numpy as np
@@ -37,7 +22,6 @@ from sklearn.datasets import make_classification, make_regression
 import xgboost as xgb
 from xgboost import dask as dxgb
 from xgboost import testing as tm
-from xgboost.data import _is_cudf_df
 from xgboost.testing.params import hist_cache_strategy, hist_parameter_strategy
 from xgboost.testing.shared import (
    get_feature_weights,