From 8b3ecfca255ed99525ced4e183656561ce3764c3 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 28 Mar 2022 21:20:50 +0800 Subject: [PATCH] Mitigate flaky tests. (#7749) * Skip non-increasing test with external memory when subsample is used. * Increase bin numbers for boost from prediction test. This mitigates the effect of non-deterministic partitioning. --- tests/python-gpu/test_gpu_data_iterator.py | 2 +- tests/python-gpu/test_gpu_linear.py | 4 +-- tests/python-gpu/test_gpu_prediction.py | 10 +++--- tests/python-gpu/test_gpu_updaters.py | 10 +++--- tests/python-gpu/test_gpu_with_dask.py | 6 ++-- tests/python/test_data_iterator.py | 8 +++-- tests/python/test_linear.py | 8 ++--- tests/python/test_updaters.py | 8 ++--- tests/python/test_with_dask.py | 40 ++++++++++++---------- 9 files changed, 50 insertions(+), 46 deletions(-) diff --git a/tests/python-gpu/test_gpu_data_iterator.py b/tests/python-gpu/test_gpu_data_iterator.py index 2975834f9..f4eaab15e 100644 --- a/tests/python-gpu/test_gpu_data_iterator.py +++ b/tests/python-gpu/test_gpu_data_iterator.py @@ -22,7 +22,7 @@ def test_gpu_single_batch() -> None: strategies.integers(0, 13), strategies.booleans(), ) -@settings(deadline=None) +@settings(deadline=None, print_blob=True) def test_gpu_data_iterator( n_samples_per_batch: int, n_features: int, n_batches: int, subsample: bool ) -> None: diff --git a/tests/python-gpu/test_gpu_linear.py b/tests/python-gpu/test_gpu_linear.py index 9791169f8..af8fe1bbe 100644 --- a/tests/python-gpu/test_gpu_linear.py +++ b/tests/python-gpu/test_gpu_linear.py @@ -30,7 +30,7 @@ def train_result(param, dmat, num_rounds): class TestGPULinear: @given(parameter_strategy, strategies.integers(10, 50), tm.dataset_strategy) - @settings(deadline=None) + @settings(deadline=None, print_blob=True) def test_gpu_coordinate(self, param, num_rounds, dataset): assume(len(dataset.y) > 0) param['updater'] = 'gpu_coord_descent' @@ -45,7 +45,7 @@ class TestGPULinear: @given(parameter_strategy, strategies.integers(10, 50), tm.dataset_strategy, strategies.floats(1e-5, 1.0), strategies.floats(1e-5, 1.0)) - @settings(deadline=None) + @settings(deadline=None, print_blob=True) def test_gpu_coordinate_regularised(self, param, num_rounds, dataset, alpha, lambd): assume(len(dataset.y) > 0) param['updater'] = 'gpu_coord_descent' diff --git a/tests/python-gpu/test_gpu_prediction.py b/tests/python-gpu/test_gpu_prediction.py index d098b6b4a..38f4db07d 100644 --- a/tests/python-gpu/test_gpu_prediction.py +++ b/tests/python-gpu/test_gpu_prediction.py @@ -247,7 +247,7 @@ class TestGPUPredict: @given(strategies.integers(1, 10), tm.dataset_strategy, shap_parameter_strategy) - @settings(deadline=None) + @settings(deadline=None, print_blob=True) def test_shap(self, num_rounds, dataset, param): param.update({"predictor": "gpu_predictor", "gpu_id": 0}) param = dataset.set_params(param) @@ -261,7 +261,7 @@ class TestGPUPredict: @given(strategies.integers(1, 10), tm.dataset_strategy, shap_parameter_strategy) - @settings(deadline=None, max_examples=20) + @settings(deadline=None, max_examples=20, print_blob=True) def test_shap_interactions(self, num_rounds, dataset, param): param.update({"predictor": "gpu_predictor", "gpu_id": 0}) param = dataset.set_params(param) @@ -312,14 +312,14 @@ class TestGPUPredict: np.testing.assert_equal(cpu_leaf, gpu_leaf) @given(predict_parameter_strategy, tm.dataset_strategy) - @settings(deadline=None) + @settings(deadline=None, print_blob=True) def test_predict_leaf_gbtree(self, param, dataset): param['booster'] = 'gbtree' param['tree_method'] = 'gpu_hist' self.run_predict_leaf_booster(param, 10, dataset) @given(predict_parameter_strategy, tm.dataset_strategy) - @settings(deadline=None) + @settings(deadline=None, print_blob=True) def test_predict_leaf_dart(self, param, dataset): param['booster'] = 'dart' param['tree_method'] = 'gpu_hist' @@ -330,7 +330,7 @@ class TestGPUPredict: @given(df=data_frames([column('x0', elements=strategies.integers(min_value=0, max_value=3)), column('x1', elements=strategies.integers(min_value=0, max_value=5))], index=range_indexes(min_size=20, max_size=50))) - @settings(deadline=None) + @settings(deadline=None, print_blob=True) def test_predict_categorical_split(self, df): from sklearn.metrics import mean_squared_error diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py index 5c5d19644..a3427b566 100644 --- a/tests/python-gpu/test_gpu_updaters.py +++ b/tests/python-gpu/test_gpu_updaters.py @@ -46,7 +46,7 @@ class TestGPUUpdaters: cputest = test_up.TestTreeMethod() @given(parameter_strategy, strategies.integers(1, 20), tm.dataset_strategy) - @settings(deadline=None) + @settings(deadline=None, print_blob=True) def test_gpu_hist(self, param, num_rounds, dataset): param["tree_method"] = "gpu_hist" param = dataset.set_params(param) @@ -56,7 +56,7 @@ class TestGPUUpdaters: @given(strategies.integers(10, 400), strategies.integers(3, 8), strategies.integers(1, 2), strategies.integers(4, 7)) - @settings(deadline=None) + @settings(deadline=None, print_blob=True) @pytest.mark.skipif(**tm.no_pandas()) def test_categorical(self, rows, cols, rounds, cats): self.cputest.run_categorical_basic(rows, cols, rounds, cats, "gpu_hist") @@ -76,7 +76,7 @@ class TestGPUUpdaters: @pytest.mark.skipif(**tm.no_cupy()) @given(parameter_strategy, strategies.integers(1, 20), tm.dataset_strategy) - @settings(deadline=None) + @settings(deadline=None, print_blob=True) def test_gpu_hist_device_dmatrix(self, param, num_rounds, dataset): # We cannot handle empty dataset yet assume(len(dataset.y) > 0) @@ -88,7 +88,7 @@ class TestGPUUpdaters: @given(parameter_strategy, strategies.integers(1, 20), tm.dataset_strategy) - @settings(deadline=None) + @settings(deadline=None, print_blob=True) def test_external_memory(self, param, num_rounds, dataset): # We cannot handle empty dataset yet assume(len(dataset.y) > 0) @@ -127,7 +127,7 @@ class TestGPUUpdaters: @pytest.mark.mgpu @given(tm.dataset_strategy, strategies.integers(0, 10)) - @settings(deadline=None, max_examples=10) + @settings(deadline=None, max_examples=10, print_blob=True) def test_specified_gpu_id_gpu_update(self, dataset, gpu_id): param = {'tree_method': 'gpu_hist', 'gpu_id': gpu_id} param = dataset.set_params(param) diff --git a/tests/python-gpu/test_gpu_with_dask.py b/tests/python-gpu/test_gpu_with_dask.py index cb8b6a8e7..1f0339e91 100644 --- a/tests/python-gpu/test_gpu_with_dask.py +++ b/tests/python-gpu/test_gpu_with_dask.py @@ -27,7 +27,7 @@ from test_with_dask import run_empty_dmatrix_reg # noqa from test_with_dask import run_empty_dmatrix_auc # noqa from test_with_dask import run_auc # noqa from test_with_dask import run_boost_from_prediction # noqa -from test_with_dask import run_boost_from_prediction_multi_clasas # noqa +from test_with_dask import run_boost_from_prediction_multi_class # noqa from test_with_dask import run_dask_classifier # noqa from test_with_dask import run_empty_dmatrix_cls # noqa from test_with_dask import _get_client_workers # noqa @@ -216,7 +216,7 @@ def test_boost_from_prediction(local_cuda_cluster: LocalCUDACluster) -> None: X_, y_ = load_digits(return_X_y=True) X = dd.from_array(X_, chunksize=100).map_partitions(cudf.from_pandas) y = dd.from_array(y_, chunksize=100).map_partitions(cudf.from_pandas) - run_boost_from_prediction_multi_clasas(X, y, "gpu_hist", client) + run_boost_from_prediction_multi_class(X, y, "gpu_hist", client) class TestDistributedGPU: @@ -231,7 +231,7 @@ class TestDistributedGPU: num_rounds=strategies.integers(1, 20), dataset=tm.dataset_strategy, ) - @settings(deadline=duration(seconds=120), suppress_health_check=suppress) + @settings(deadline=duration(seconds=120), suppress_health_check=suppress, print_blob=True) @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.parametrize( "local_cuda_cluster", [{"n_workers": 2}], indirect=["local_cuda_cluster"] diff --git a/tests/python/test_data_iterator.py b/tests/python/test_data_iterator.py index e4254bb9e..233a3a4d0 100644 --- a/tests/python/test_data_iterator.py +++ b/tests/python/test_data_iterator.py @@ -108,7 +108,8 @@ def run_data_iterator( evals_result=results_from_it, verbose_eval=False, ) - assert non_increasing(results_from_it["Train"]["rmse"]) + if not subsample: + assert non_increasing(results_from_it["Train"]["rmse"]) X, y = it.as_arrays() Xy = xgb.DMatrix(X, y) @@ -125,7 +126,8 @@ def run_data_iterator( verbose_eval=False, ) arr_predt = from_arrays.predict(Xy) - assert non_increasing(results_from_arrays["Train"]["rmse"]) + if not subsample: + assert non_increasing(results_from_arrays["Train"]["rmse"]) rtol = 1e-2 # CPU sketching is more memory efficient but less consistent due to small chunks @@ -146,7 +148,7 @@ def run_data_iterator( strategies.integers(0, 13), strategies.booleans(), ) -@settings(deadline=None) +@settings(deadline=None, print_blob=True) def test_data_iterator( n_samples_per_batch: int, n_features: int, diff --git a/tests/python/test_linear.py b/tests/python/test_linear.py index 635048ddb..2ea3e44dd 100644 --- a/tests/python/test_linear.py +++ b/tests/python/test_linear.py @@ -26,7 +26,7 @@ def train_result(param, dmat, num_rounds): class TestLinear: @given(parameter_strategy, strategies.integers(10, 50), tm.dataset_strategy, coord_strategy) - @settings(deadline=None) + @settings(deadline=None, print_blob=True) def test_coordinate(self, param, num_rounds, dataset, coord_param): param['updater'] = 'coord_descent' param.update(coord_param) @@ -41,7 +41,7 @@ class TestLinear: @given(parameter_strategy, strategies.integers(10, 50), tm.dataset_strategy, coord_strategy, strategies.floats(1e-5, 1.0), strategies.floats(1e-5, 1.0)) - @settings(deadline=None) + @settings(deadline=None, print_blob=True) def test_coordinate_regularised(self, param, num_rounds, dataset, coord_param, alpha, lambd): param['updater'] = 'coord_descent' param['alpha'] = alpha @@ -54,7 +54,7 @@ class TestLinear: @given(parameter_strategy, strategies.integers(10, 50), tm.dataset_strategy) - @settings(deadline=None) + @settings(deadline=None, print_blob=True) def test_shotgun(self, param, num_rounds, dataset): param['updater'] = 'shotgun' param = dataset.set_params(param) @@ -71,7 +71,7 @@ class TestLinear: @given(parameter_strategy, strategies.integers(10, 50), tm.dataset_strategy, strategies.floats(1e-5, 1.0), strategies.floats(1e-5, 1.0)) - @settings(deadline=None) + @settings(deadline=None, print_blob=True) def test_shotgun_regularised(self, param, num_rounds, dataset, alpha, lambd): param['updater'] = 'shotgun' param['alpha'] = alpha diff --git a/tests/python/test_updaters.py b/tests/python/test_updaters.py index b73736c69..cdf40d843 100644 --- a/tests/python/test_updaters.py +++ b/tests/python/test_updaters.py @@ -38,7 +38,7 @@ def train_result(param, dmat, num_rounds): class TestTreeMethod: @given(exact_parameter_strategy, strategies.integers(1, 20), tm.dataset_strategy) - @settings(deadline=None) + @settings(deadline=None, print_blob=True) def test_exact(self, param, num_rounds, dataset): param['tree_method'] = 'exact' param = dataset.set_params(param) @@ -51,7 +51,7 @@ class TestTreeMethod: strategies.integers(1, 20), tm.dataset_strategy, ) - @settings(deadline=None) + @settings(deadline=None, print_blob=True) def test_approx(self, param, hist_param, num_rounds, dataset): param["tree_method"] = "approx" param = dataset.set_params(param) @@ -86,7 +86,7 @@ class TestTreeMethod: @given(exact_parameter_strategy, hist_parameter_strategy, strategies.integers(1, 20), tm.dataset_strategy) - @settings(deadline=None) + @settings(deadline=None, print_blob=True) def test_hist(self, param, hist_param, num_rounds, dataset): param['tree_method'] = 'hist' param = dataset.set_params(param) @@ -241,7 +241,7 @@ class TestTreeMethod: @given(strategies.integers(10, 400), strategies.integers(3, 8), strategies.integers(1, 2), strategies.integers(4, 7)) - @settings(deadline=None) + @settings(deadline=None, print_blob=True) @pytest.mark.skipif(**tm.no_pandas()) def test_categorical(self, rows, cols, rounds, cats): self.run_categorical_basic(rows, cols, rounds, cats, "approx") diff --git a/tests/python/test_with_dask.py b/tests/python/test_with_dask.py index 9a68f4453..484c92e3d 100644 --- a/tests/python/test_with_dask.py +++ b/tests/python/test_with_dask.py @@ -337,14 +337,14 @@ def test_dask_predict_shape_infer(client: "Client") -> None: assert prediction.shape[1] == 3 -def run_boost_from_prediction_multi_clasas( +def run_boost_from_prediction_multi_class( X: xgb.dask._DaskCollection, y: xgb.dask._DaskCollection, tree_method: str, - client: "Client" + client: "Client", ) -> None: model_0 = xgb.dask.DaskXGBClassifier( - learning_rate=0.3, random_state=0, n_estimators=4, tree_method=tree_method + learning_rate=0.3, n_estimators=4, tree_method=tree_method, max_bin=768 ) model_0.fit(X=X, y=y) margin = xgb.dask.inplace_predict( @@ -352,18 +352,18 @@ def run_boost_from_prediction_multi_clasas( ) model_1 = xgb.dask.DaskXGBClassifier( - learning_rate=0.3, random_state=0, n_estimators=4, tree_method=tree_method + learning_rate=0.3, n_estimators=4, tree_method=tree_method, max_bin=768 ) model_1.fit(X=X, y=y, base_margin=margin) predictions_1 = xgb.dask.predict( client, model_1.get_booster(), xgb.dask.DaskDMatrix(client, X, base_margin=margin), - output_margin=True + output_margin=True, ) model_2 = xgb.dask.DaskXGBClassifier( - learning_rate=0.3, random_state=0, n_estimators=8, tree_method=tree_method + learning_rate=0.3, n_estimators=8, tree_method=tree_method, max_bin=768 ) model_2.fit(X=X, y=y) predictions_2 = xgb.dask.inplace_predict( @@ -382,26 +382,29 @@ def run_boost_from_prediction_multi_clasas( def run_boost_from_prediction( - X: xgb.dask._DaskCollection, y: xgb.dask._DaskCollection, tree_method: str, client: "Client" + X: xgb.dask._DaskCollection, + y: xgb.dask._DaskCollection, + tree_method: str, + client: "Client", ) -> None: X = client.persist(X) y = client.persist(y) model_0 = xgb.dask.DaskXGBClassifier( - learning_rate=0.3, random_state=0, n_estimators=4, - tree_method=tree_method) + learning_rate=0.3, n_estimators=4, tree_method=tree_method, max_bin=512 + ) model_0.fit(X=X, y=y) margin = model_0.predict(X, output_margin=True) model_1 = xgb.dask.DaskXGBClassifier( - learning_rate=0.3, random_state=0, n_estimators=4, - tree_method=tree_method) + learning_rate=0.3, n_estimators=4, tree_method=tree_method, max_bin=512 + ) model_1.fit(X=X, y=y, base_margin=margin) predictions_1 = model_1.predict(X, base_margin=margin) cls_2 = xgb.dask.DaskXGBClassifier( - learning_rate=0.3, random_state=0, n_estimators=8, - tree_method=tree_method) + learning_rate=0.3, n_estimators=8, tree_method=tree_method, max_bin=512 + ) cls_2.fit(X=X, y=y) predictions_2 = cls_2.predict(X) @@ -415,8 +418,8 @@ def run_boost_from_prediction( unmargined = xgb.dask.DaskXGBClassifier(n_estimators=4) unmargined.fit(X=X, y=y, eval_set=[(X, y)], base_margin=margin) - margined_res = margined.evals_result()['validation_0']['logloss'] - unmargined_res = unmargined.evals_result()['validation_0']['logloss'] + margined_res = margined.evals_result()["validation_0"]["logloss"] + unmargined_res = unmargined.evals_result()["validation_0"]["logloss"] assert len(margined_res) == len(unmargined_res) for i in range(len(margined_res)): @@ -429,12 +432,11 @@ def test_boost_from_prediction(tree_method: str, client: "Client") -> None: from sklearn.datasets import load_breast_cancer, load_digits X_, y_ = load_breast_cancer(return_X_y=True) X, y = dd.from_array(X_, chunksize=200), dd.from_array(y_, chunksize=200) - run_boost_from_prediction(X, y, tree_method, client) X_, y_ = load_digits(return_X_y=True) X, y = dd.from_array(X_, chunksize=100), dd.from_array(y_, chunksize=100) - run_boost_from_prediction_multi_clasas(X, y, tree_method, client) + run_boost_from_prediction_multi_class(X, y, tree_method, client) def test_inplace_predict(client: "Client") -> None: @@ -1292,7 +1294,7 @@ class TestWithDask: @given(params=hist_parameter_strategy, dataset=tm.dataset_strategy) - @settings(deadline=None, suppress_health_check=suppress) + @settings(deadline=None, suppress_health_check=suppress, print_blob=True) def test_hist( self, params: Dict, dataset: tm.TestDataset, client: "Client" ) -> None: @@ -1301,7 +1303,7 @@ class TestWithDask: @given(params=exact_parameter_strategy, dataset=tm.dataset_strategy) - @settings(deadline=None, suppress_health_check=suppress) + @settings(deadline=None, suppress_health_check=suppress, print_blob=True) def test_approx( self, client: "Client", params: Dict, dataset: tm.TestDataset ) -> None: