[EM] Add basic distributed GPU tests. (#10861)
- Split Hist and Approx tests in unittests. - Basic GPU tests for distributed.
This commit is contained in:
@@ -1,77 +1,18 @@
|
||||
from typing import List, cast
|
||||
"""Copyright 2024, XGBoost contributors"""
|
||||
|
||||
import numpy as np
|
||||
from distributed import Client, Scheduler, Worker, get_worker
|
||||
import pytest
|
||||
from distributed import Client, Scheduler, Worker
|
||||
from distributed.utils_test import gen_cluster
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.compat import concat
|
||||
|
||||
|
||||
def run_external_memory(worker_id: int, n_workers: int, comm_args: dict) -> None:
|
||||
n_samples_per_batch = 32
|
||||
n_features = 4
|
||||
n_batches = 16
|
||||
use_cupy = False
|
||||
|
||||
n_threads = get_worker().state.nthreads
|
||||
with xgb.collective.CommunicatorContext(dmlc_communicator="rabit", **comm_args):
|
||||
it = tm.IteratorForTest(
|
||||
*tm.make_batches(
|
||||
n_samples_per_batch,
|
||||
n_features,
|
||||
n_batches,
|
||||
use_cupy,
|
||||
random_state=worker_id,
|
||||
),
|
||||
cache="cache",
|
||||
)
|
||||
Xy = xgb.DMatrix(it, nthread=n_threads)
|
||||
results: xgb.callback.TrainingCallback.EvalsLog = {}
|
||||
booster = xgb.train(
|
||||
{"tree_method": "hist", "nthread": n_threads},
|
||||
Xy,
|
||||
evals=[(Xy, "Train")],
|
||||
num_boost_round=32,
|
||||
evals_result=results,
|
||||
)
|
||||
assert tm.non_increasing(cast(List[float], results["Train"]["rmse"]))
|
||||
|
||||
lx, ly, lw = [], [], []
|
||||
for i in range(n_workers):
|
||||
x, y, w = tm.make_batches(
|
||||
n_samples_per_batch,
|
||||
n_features,
|
||||
n_batches,
|
||||
use_cupy,
|
||||
random_state=i,
|
||||
)
|
||||
lx.extend(x)
|
||||
ly.extend(y)
|
||||
lw.extend(w)
|
||||
|
||||
X = concat(lx)
|
||||
yconcat = concat(ly)
|
||||
wconcat = concat(lw)
|
||||
Xy = xgb.DMatrix(X, yconcat, weight=wconcat, nthread=n_threads)
|
||||
|
||||
results_local: xgb.callback.TrainingCallback.EvalsLog = {}
|
||||
booster = xgb.train(
|
||||
{"tree_method": "hist", "nthread": n_threads},
|
||||
Xy,
|
||||
evals=[(Xy, "Train")],
|
||||
num_boost_round=32,
|
||||
evals_result=results_local,
|
||||
)
|
||||
np.testing.assert_allclose(
|
||||
results["Train"]["rmse"], results_local["Train"]["rmse"], rtol=1e-4
|
||||
)
|
||||
from xgboost.testing.dask import check_external_memory
|
||||
|
||||
|
||||
@pytest.mark.parametrize("is_qdm", [True, False])
|
||||
@gen_cluster(client=True)
|
||||
async def test_external_memory(
|
||||
client: Client, s: Scheduler, a: Worker, b: Worker
|
||||
client: Client, s: Scheduler, a: Worker, b: Worker, is_qdm: bool
|
||||
) -> None:
|
||||
workers = tm.get_client_workers(client)
|
||||
args = await client.sync(
|
||||
@@ -83,6 +24,11 @@ async def test_external_memory(
|
||||
n_workers = len(workers)
|
||||
|
||||
futs = client.map(
|
||||
run_external_memory, range(n_workers), n_workers=n_workers, comm_args=args
|
||||
check_external_memory,
|
||||
range(n_workers),
|
||||
n_workers=n_workers,
|
||||
device="cpu",
|
||||
comm_args=args,
|
||||
is_qdm=is_qdm,
|
||||
)
|
||||
await client.gather(futs)
|
||||
|
||||
@@ -7,24 +7,9 @@ import pickle
|
||||
import socket
|
||||
import tempfile
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from copy import copy
|
||||
from functools import partial
|
||||
from itertools import starmap
|
||||
from math import ceil
|
||||
from operator import attrgetter, getitem
|
||||
from pathlib import Path
|
||||
from typing import (
|
||||
Any,
|
||||
Dict,
|
||||
Generator,
|
||||
List,
|
||||
Literal,
|
||||
Optional,
|
||||
Tuple,
|
||||
Type,
|
||||
TypeVar,
|
||||
Union,
|
||||
)
|
||||
from typing import Any, Dict, Generator, Literal, Optional, Tuple, Type, Union
|
||||
|
||||
import hypothesis
|
||||
import numpy as np
|
||||
@@ -37,7 +22,6 @@ from sklearn.datasets import make_classification, make_regression
|
||||
import xgboost as xgb
|
||||
from xgboost import dask as dxgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.data import _is_cudf_df
|
||||
from xgboost.testing.params import hist_cache_strategy, hist_parameter_strategy
|
||||
from xgboost.testing.shared import (
|
||||
get_feature_weights,
|
||||
|
||||
Reference in New Issue
Block a user