Merge branch 'master' into sync-condition-2023May15

This commit is contained in:
amdsc21 2023-05-23 01:07:50 +02:00
commit b994a38b28
8 changed files with 66 additions and 51 deletions

View File

@ -603,26 +603,6 @@ sparse_datasets_strategy = strategies.sampled_from(
] ]
) )
_unweighted_datasets_strategy = strategies.sampled_from(
[
TestDataset(
"calif_housing", get_california_housing, "reg:squarederror", "rmse"
),
TestDataset(
"calif_housing-l1", get_california_housing, "reg:absoluteerror", "mae"
),
TestDataset("cancer", get_cancer, "binary:logistic", "logloss"),
TestDataset("sparse", get_sparse, "reg:squarederror", "rmse"),
TestDataset("sparse-l1", get_sparse, "reg:absoluteerror", "mae"),
TestDataset(
"empty",
lambda: (np.empty((0, 100)), np.empty(0)),
"reg:squarederror",
"rmse",
),
]
)
def make_datasets_with_margin( def make_datasets_with_margin(
unweighted_strategy: strategies.SearchStrategy, unweighted_strategy: strategies.SearchStrategy,
@ -664,7 +644,28 @@ def make_datasets_with_margin(
# A strategy for drawing from a set of example datasets. May add random weights to the # A strategy for drawing from a set of example datasets. May add random weights to the
# dataset # dataset
dataset_strategy = make_datasets_with_margin(_unweighted_datasets_strategy)() @memory.cache
def make_dataset_strategy() -> Callable:
_unweighted_datasets_strategy = strategies.sampled_from(
[
TestDataset(
"calif_housing", get_california_housing, "reg:squarederror", "rmse"
),
TestDataset(
"calif_housing-l1", get_california_housing, "reg:absoluteerror", "mae"
),
TestDataset("cancer", get_cancer, "binary:logistic", "logloss"),
TestDataset("sparse", get_sparse, "reg:squarederror", "rmse"),
TestDataset("sparse-l1", get_sparse, "reg:absoluteerror", "mae"),
TestDataset(
"empty",
lambda: (np.empty((0, 100)), np.empty(0)),
"reg:squarederror",
"rmse",
),
]
)
return make_datasets_with_margin(_unweighted_datasets_strategy)()
_unweighted_multi_datasets_strategy = strategies.sampled_from( _unweighted_multi_datasets_strategy = strategies.sampled_from(

View File

@ -28,8 +28,7 @@ def train_result(param, dmat, num_rounds):
class TestGPULinear: class TestGPULinear:
@given(parameter_strategy, strategies.integers(10, 50), @given(parameter_strategy, strategies.integers(10, 50), tm.make_dataset_strategy())
tm.dataset_strategy)
@settings(deadline=None, max_examples=20, print_blob=True) @settings(deadline=None, max_examples=20, print_blob=True)
def test_gpu_coordinate(self, param, num_rounds, dataset): def test_gpu_coordinate(self, param, num_rounds, dataset):
assume(len(dataset.y) > 0) assume(len(dataset.y) > 0)
@ -45,7 +44,7 @@ class TestGPULinear:
@given( @given(
parameter_strategy, parameter_strategy,
strategies.integers(10, 50), strategies.integers(10, 50),
tm.dataset_strategy, tm.make_dataset_strategy(),
strategies.floats(1e-5, 0.8), strategies.floats(1e-5, 0.8),
strategies.floats(1e-5, 0.8) strategies.floats(1e-5, 0.8)
) )

View File

@ -265,8 +265,9 @@ class TestGPUPredict:
base_margin = cudf.Series(rng.randn(rows)) base_margin = cudf.Series(rng.randn(rows))
self.run_inplace_base_margin(booster, dtrain, X, base_margin) self.run_inplace_base_margin(booster, dtrain, X, base_margin)
@given(strategies.integers(1, 10), @given(
tm.dataset_strategy, shap_parameter_strategy) strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy
)
@settings(deadline=None, max_examples=20, print_blob=True) @settings(deadline=None, max_examples=20, print_blob=True)
def test_shap(self, num_rounds, dataset, param): def test_shap(self, num_rounds, dataset, param):
if dataset.name.endswith("-l1"): # not supported by the exact tree method if dataset.name.endswith("-l1"): # not supported by the exact tree method
@ -281,8 +282,9 @@ class TestGPUPredict:
assume(len(dataset.y) > 0) assume(len(dataset.y) > 0)
assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-3, 1e-3) assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-3, 1e-3)
@given(strategies.integers(1, 10), @given(
tm.dataset_strategy, shap_parameter_strategy) strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy
)
@settings(deadline=None, max_examples=10, print_blob=True) @settings(deadline=None, max_examples=10, print_blob=True)
def test_shap_interactions(self, num_rounds, dataset, param): def test_shap_interactions(self, num_rounds, dataset, param):
if dataset.name.endswith("-l1"): # not supported by the exact tree method if dataset.name.endswith("-l1"): # not supported by the exact tree method
@ -335,7 +337,7 @@ class TestGPUPredict:
np.testing.assert_equal(cpu_leaf, gpu_leaf) np.testing.assert_equal(cpu_leaf, gpu_leaf)
@given(predict_parameter_strategy, tm.dataset_strategy) @given(predict_parameter_strategy, tm.make_dataset_strategy())
@settings(deadline=None, max_examples=20, print_blob=True) @settings(deadline=None, max_examples=20, print_blob=True)
def test_predict_leaf_gbtree(self, param, dataset): def test_predict_leaf_gbtree(self, param, dataset):
# Unsupported for random forest # Unsupported for random forest
@ -346,7 +348,7 @@ class TestGPUPredict:
param['tree_method'] = 'gpu_hist' param['tree_method'] = 'gpu_hist'
self.run_predict_leaf_booster(param, 10, dataset) self.run_predict_leaf_booster(param, 10, dataset)
@given(predict_parameter_strategy, tm.dataset_strategy) @given(predict_parameter_strategy, tm.make_dataset_strategy())
@settings(deadline=None, max_examples=20, print_blob=True) @settings(deadline=None, max_examples=20, print_blob=True)
def test_predict_leaf_dart(self, param: dict, dataset: tm.TestDataset) -> None: def test_predict_leaf_dart(self, param: dict, dataset: tm.TestDataset) -> None:
# Unsupported for random forest # Unsupported for random forest

View File

@ -48,7 +48,9 @@ class TestGPUUpdatersMulti:
class TestGPUUpdaters: class TestGPUUpdaters:
cputest = test_up.TestTreeMethod() cputest = test_up.TestTreeMethod()
@given(hist_parameter_strategy, strategies.integers(1, 20), tm.dataset_strategy) @given(
hist_parameter_strategy, strategies.integers(1, 20), tm.make_dataset_strategy()
)
@settings(deadline=None, max_examples=50, print_blob=True) @settings(deadline=None, max_examples=50, print_blob=True)
def test_gpu_hist(self, param, num_rounds, dataset): def test_gpu_hist(self, param, num_rounds, dataset):
param["tree_method"] = "gpu_hist" param["tree_method"] = "gpu_hist"
@ -150,7 +152,7 @@ class TestGPUUpdaters:
@given( @given(
hist_parameter_strategy, hist_parameter_strategy,
strategies.integers(1, 20), strategies.integers(1, 20),
tm.dataset_strategy tm.make_dataset_strategy(),
) )
@settings(deadline=None, max_examples=20, print_blob=True) @settings(deadline=None, max_examples=20, print_blob=True)
def test_gpu_hist_device_dmatrix( def test_gpu_hist_device_dmatrix(
@ -171,7 +173,7 @@ class TestGPUUpdaters:
@given( @given(
hist_parameter_strategy, hist_parameter_strategy,
strategies.integers(1, 3), strategies.integers(1, 3),
tm.dataset_strategy tm.make_dataset_strategy(),
) )
@settings(deadline=None, max_examples=10, print_blob=True) @settings(deadline=None, max_examples=10, print_blob=True)
def test_external_memory(self, param, num_rounds, dataset): def test_external_memory(self, param, num_rounds, dataset):
@ -213,7 +215,7 @@ class TestGPUUpdaters:
np.testing.assert_allclose(predictions, 0.0, 1e-6) np.testing.assert_allclose(predictions, 0.0, 1e-6)
@pytest.mark.mgpu @pytest.mark.mgpu
@given(tm.dataset_strategy, strategies.integers(0, 10)) @given(tm.make_dataset_strategy(), strategies.integers(0, 10))
@settings(deadline=None, max_examples=10, print_blob=True) @settings(deadline=None, max_examples=10, print_blob=True)
def test_specified_gpu_id_gpu_update(self, dataset, gpu_id): def test_specified_gpu_id_gpu_update(self, dataset, gpu_id):
param = {'tree_method': 'gpu_hist', 'gpu_id': gpu_id} param = {'tree_method': 'gpu_hist', 'gpu_id': gpu_id}

View File

@ -28,8 +28,12 @@ def train_result(param, dmat, num_rounds):
class TestLinear: class TestLinear:
@given(parameter_strategy, strategies.integers(10, 50), @given(
tm.dataset_strategy, coord_strategy) parameter_strategy,
strategies.integers(10, 50),
tm.make_dataset_strategy(),
coord_strategy
)
@settings(deadline=None, max_examples=20, print_blob=True) @settings(deadline=None, max_examples=20, print_blob=True)
def test_coordinate(self, param, num_rounds, dataset, coord_param): def test_coordinate(self, param, num_rounds, dataset, coord_param):
param['updater'] = 'coord_descent' param['updater'] = 'coord_descent'
@ -45,7 +49,7 @@ class TestLinear:
@given( @given(
parameter_strategy, parameter_strategy,
strategies.integers(10, 50), strategies.integers(10, 50),
tm.dataset_strategy, tm.make_dataset_strategy(),
coord_strategy, coord_strategy,
strategies.floats(1e-5, 0.8), strategies.floats(1e-5, 0.8),
strategies.floats(1e-5, 0.8) strategies.floats(1e-5, 0.8)
@ -61,8 +65,9 @@ class TestLinear:
note(result) note(result)
assert tm.non_increasing([result[0], result[-1]]) assert tm.non_increasing([result[0], result[-1]])
@given(parameter_strategy, strategies.integers(10, 50), @given(
tm.dataset_strategy) parameter_strategy, strategies.integers(10, 50), tm.make_dataset_strategy()
)
@settings(deadline=None, max_examples=20, print_blob=True) @settings(deadline=None, max_examples=20, print_blob=True)
def test_shotgun(self, param, num_rounds, dataset): def test_shotgun(self, param, num_rounds, dataset):
param['updater'] = 'shotgun' param['updater'] = 'shotgun'
@ -77,9 +82,13 @@ class TestLinear:
sampled_result = result sampled_result = result
assert tm.non_increasing(sampled_result) assert tm.non_increasing(sampled_result)
@given(parameter_strategy, strategies.integers(10, 50), @given(
tm.dataset_strategy, strategies.floats(1e-5, 1.0), parameter_strategy,
strategies.floats(1e-5, 1.0)) strategies.integers(10, 50),
tm.make_dataset_strategy(),
strategies.floats(1e-5, 1.0),
strategies.floats(1e-5, 1.0)
)
@settings(deadline=None, max_examples=20, print_blob=True) @settings(deadline=None, max_examples=20, print_blob=True)
def test_shotgun_regularised(self, param, num_rounds, dataset, alpha, lambd): def test_shotgun_regularised(self, param, num_rounds, dataset, alpha, lambd):
param['updater'] = 'shotgun' param['updater'] = 'shotgun'

View File

@ -87,8 +87,9 @@ class TestTreeMethod:
USE_ONEHOT = np.iinfo(np.int32).max USE_ONEHOT = np.iinfo(np.int32).max
USE_PART = 1 USE_PART = 1
@given(exact_parameter_strategy, strategies.integers(1, 20), @given(
tm.dataset_strategy) exact_parameter_strategy, strategies.integers(1, 20), tm.make_dataset_strategy()
)
@settings(deadline=None, print_blob=True) @settings(deadline=None, print_blob=True)
def test_exact(self, param, num_rounds, dataset): def test_exact(self, param, num_rounds, dataset):
if dataset.name.endswith("-l1"): if dataset.name.endswith("-l1"):
@ -102,7 +103,7 @@ class TestTreeMethod:
exact_parameter_strategy, exact_parameter_strategy,
hist_parameter_strategy, hist_parameter_strategy,
strategies.integers(1, 20), strategies.integers(1, 20),
tm.dataset_strategy, tm.make_dataset_strategy(),
) )
@settings(deadline=None, print_blob=True) @settings(deadline=None, print_blob=True)
def test_approx(self, param, hist_param, num_rounds, dataset): def test_approx(self, param, hist_param, num_rounds, dataset):
@ -141,7 +142,7 @@ class TestTreeMethod:
exact_parameter_strategy, exact_parameter_strategy,
hist_parameter_strategy, hist_parameter_strategy,
strategies.integers(1, 20), strategies.integers(1, 20),
tm.dataset_strategy tm.make_dataset_strategy()
) )
@settings(deadline=None, print_blob=True) @settings(deadline=None, print_blob=True)
def test_hist(self, param: dict, hist_param: dict, num_rounds: int, dataset: tm.TestDataset) -> None: def test_hist(self, param: dict, hist_param: dict, num_rounds: int, dataset: tm.TestDataset) -> None:

View File

@ -243,7 +243,7 @@ class TestDistributedGPU:
@given( @given(
params=hist_parameter_strategy, params=hist_parameter_strategy,
num_rounds=strategies.integers(1, 20), num_rounds=strategies.integers(1, 20),
dataset=tm.dataset_strategy, dataset=tm.make_dataset_strategy(),
dmatrix_type=strategies.sampled_from( dmatrix_type=strategies.sampled_from(
[dxgb.DaskDMatrix, dxgb.DaskQuantileDMatrix] [dxgb.DaskDMatrix, dxgb.DaskQuantileDMatrix]
), ),

View File

@ -1458,9 +1458,10 @@ class TestWithDask:
else: else:
assert history[-1] < history[0] assert history[-1] < history[0]
@given(params=hist_parameter_strategy, @given(params=hist_parameter_strategy, dataset=tm.make_dataset_strategy())
dataset=tm.dataset_strategy) @settings(
@settings(deadline=None, max_examples=10, suppress_health_check=suppress, print_blob=True) deadline=None, max_examples=10, suppress_health_check=suppress, print_blob=True
)
def test_hist( def test_hist(
self, params: Dict, dataset: tm.TestDataset, client: "Client" self, params: Dict, dataset: tm.TestDataset, client: "Client"
) -> None: ) -> None:
@ -1524,7 +1525,7 @@ class TestWithDask:
rmse = result["history"]["Valid"]["rmse"][-1] rmse = result["history"]["Valid"]["rmse"][-1]
assert rmse < 32.0 assert rmse < 32.0
@given(params=hist_parameter_strategy, dataset=tm.dataset_strategy) @given(params=hist_parameter_strategy, dataset=tm.make_dataset_strategy())
@settings( @settings(
deadline=None, max_examples=10, suppress_health_check=suppress, print_blob=True deadline=None, max_examples=10, suppress_health_check=suppress, print_blob=True
) )