[CI] Refactor tests to reduce CI time. (#8312)

This commit is contained in:
Rory Mitchell
2022-10-12 11:32:06 +02:00
committed by GitHub
parent 39afdac3be
commit ce0382dcb0
19 changed files with 95 additions and 122 deletions

View File

@@ -44,6 +44,8 @@ from xgboost.dask import DaskDMatrix
dask.config.set({"distributed.scheduler.allowed-failures": False})
pytestmark = pytest.mark.timeout(30)
if hasattr(HealthCheck, 'function_scoped_fixture'):
suppress = [HealthCheck.function_scoped_fixture]
else:
@@ -381,7 +383,7 @@ def test_categorical(client: "Client") -> None:
def test_dask_predict_shape_infer(client: "Client") -> None:
X, y = make_classification(n_samples=1000, n_informative=5, n_classes=3)
X, y = make_classification(n_samples=kRows, n_informative=5, n_classes=3)
X_ = dd.from_array(X, chunksize=100)
y_ = dd.from_array(y, chunksize=100)
dtrain = xgb.dask.DaskDMatrix(client, data=X_, label=y_)
@@ -522,8 +524,8 @@ def test_boost_from_prediction(tree_method: str, client: "Client") -> None:
def test_inplace_predict(client: "Client") -> None:
from sklearn.datasets import fetch_california_housing
X_, y_ = fetch_california_housing(return_X_y=True)
from sklearn.datasets import load_diabetes
X_, y_ = load_diabetes(return_X_y=True)
X, y = dd.from_array(X_, chunksize=32), dd.from_array(y_, chunksize=32)
reg = xgb.dask.DaskXGBRegressor(n_estimators=4).fit(X, y)
booster = reg.get_booster()
@@ -841,7 +843,7 @@ def run_empty_dmatrix_cls(client: "Client", parameters: dict) -> None:
def run_empty_dmatrix_auc(client: "Client", tree_method: str, n_workers: int) -> None:
from sklearn import datasets
n_samples = 100
n_features = 97
n_features = 7
rng = np.random.RandomState(1994)
make_classification = partial(
@@ -894,9 +896,9 @@ def run_empty_dmatrix_auc(client: "Client", tree_method: str, n_workers: int) ->
def test_empty_dmatrix_auc() -> None:
with LocalCluster(n_workers=8, dashboard_address=":0") as cluster:
with LocalCluster(n_workers=4, dashboard_address=":0") as cluster:
with Client(cluster) as client:
run_empty_dmatrix_auc(client, "hist", 8)
run_empty_dmatrix_auc(client, "hist", 4)
def run_auc(client: "Client", tree_method: str) -> None:
@@ -1033,7 +1035,7 @@ async def run_dask_classifier_asyncio(scheduler_address: str) -> None:
def test_with_asyncio() -> None:
with LocalCluster(dashboard_address=":0") as cluster:
with LocalCluster(n_workers=2, dashboard_address=":0") as cluster:
with Client(cluster) as client:
address = client.scheduler.address
output = asyncio.run(run_from_dask_array_asyncio(address))
@@ -1420,11 +1422,11 @@ class TestWithDask:
@given(params=hist_parameter_strategy,
dataset=tm.dataset_strategy)
@settings(deadline=None, suppress_health_check=suppress, print_blob=True)
@settings(deadline=None, max_examples=10, suppress_health_check=suppress, print_blob=True)
def test_hist(
self, params: Dict, dataset: tm.TestDataset, client: "Client"
) -> None:
num_rounds = 30
num_rounds = 10
self.run_updater_test(client, params, num_rounds, dataset, 'hist')
def test_quantile_dmatrix(self, client: Client) -> None:
@@ -1465,11 +1467,11 @@ class TestWithDask:
@given(params=exact_parameter_strategy,
dataset=tm.dataset_strategy)
@settings(deadline=None, suppress_health_check=suppress, print_blob=True)
@settings(deadline=None, max_examples=10, suppress_health_check=suppress, print_blob=True)
def test_approx(
self, client: "Client", params: Dict, dataset: tm.TestDataset
) -> None:
num_rounds = 30
num_rounds = 10
self.run_updater_test(client, params, num_rounds, dataset, 'approx')
def run_quantile(self, name: str) -> None:
@@ -1773,16 +1775,16 @@ class TestWithDask:
assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-5, 1e-5)
def test_shap(self, client: "Client") -> None:
from sklearn.datasets import fetch_california_housing, load_digits
X, y = fetch_california_housing(return_X_y=True)
from sklearn.datasets import load_diabetes, load_iris
X, y = load_diabetes(return_X_y=True)
params: Dict[str, Any] = {'objective': 'reg:squarederror'}
self.run_shap(X, y, params, client)
X, y = load_digits(return_X_y=True)
params = {'objective': 'multi:softmax', 'num_class': 10}
X, y = load_iris(return_X_y=True)
params = {'objective': 'multi:softmax', 'num_class': 3}
self.run_shap(X, y, params, client)
params = {'objective': 'multi:softprob', 'num_class': 10}
params = {'objective': 'multi:softprob', 'num_class': 3}
self.run_shap(X, y, params, client)
self.run_shap_cls_sklearn(X, y, client)
@@ -1818,8 +1820,8 @@ class TestWithDask:
1e-5, 1e-5)
def test_shap_interactions(self, client: "Client") -> None:
from sklearn.datasets import fetch_california_housing
X, y = fetch_california_housing(return_X_y=True)
from sklearn.datasets import load_diabetes
X, y = load_diabetes(return_X_y=True)
params = {'objective': 'reg:squarederror'}
self.run_shap_interactions(X, y, params, client)