[dask] Rework base margin test. (#6627)

2021-01-22 17:49:13 +08:00 · 2021-01-22 17:49:13 +08:00 · a275f40267
commit a275f40267
parent 7bc56fa0ed
1 changed files with 19 additions and 57 deletions
--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@ -149,68 +149,30 @@ def test_dask_predict_shape_infer() -> None:


@pytest.mark.parametrize("tree_method", ["hist", "approx"])
-def test_boost_from_prediction(tree_method: str) -> None:
-    if tree_method == 'approx':
-        pytest.xfail(reason='test_boost_from_prediction[approx] is flaky')
-
+def test_boost_from_prediction(tree_method: str, client: "Client") -> None:
    from sklearn.datasets import load_breast_cancer
-    X, y = load_breast_cancer(return_X_y=True)
+    X_, y_ = load_breast_cancer(return_X_y=True)

-    X_ = dd.from_array(X, chunksize=100)
-    y_ = dd.from_array(y, chunksize=100)
-
-    with LocalCluster(n_workers=4) as cluster:
-        with Client(cluster) as _:
+    X, y = dd.from_array(X_, chunksize=100), dd.from_array(y_, chunksize=100)
    model_0 = xgb.dask.DaskXGBClassifier(
-                learning_rate=0.3,
-                random_state=123,
-                n_estimators=4,
-                tree_method=tree_method,
-            )
-            model_0.fit(X=X_, y=y_)
-            margin = model_0.predict(X_, output_margin=True)
+        learning_rate=0.3, random_state=0, n_estimators=4,
+        tree_method=tree_method)
+    model_0.fit(X=X, y=y)
+    margin = model_0.predict(X, output_margin=True)

    model_1 = xgb.dask.DaskXGBClassifier(
-                learning_rate=0.3,
-                random_state=123,
-                n_estimators=4,
-                tree_method=tree_method,
-            )
-            model_1.fit(X=X_, y=y_, base_margin=margin)
-            predictions_1 = model_1.predict(X_, base_margin=margin)
-            proba_1 = model_1.predict_proba(X_, base_margin=margin)
+        learning_rate=0.3, random_state=0, n_estimators=4,
+        tree_method=tree_method)
+    model_1.fit(X=X, y=y, base_margin=margin)
+    predictions_1 = model_1.predict(X, base_margin=margin)

    cls_2 = xgb.dask.DaskXGBClassifier(
-                learning_rate=0.3,
-                random_state=123,
-                n_estimators=8,
-                tree_method=tree_method,
-            )
-            cls_2.fit(X=X_, y=y_)
-            predictions_2 = cls_2.predict(X_)
-            proba_2 = cls_2.predict_proba(X_)
+        learning_rate=0.3, random_state=0, n_estimators=8,
+        tree_method=tree_method)
+    cls_2.fit(X=X, y=y)
+    predictions_2 = cls_2.predict(X)

-            cls_3 = xgb.dask.DaskXGBClassifier(
-                learning_rate=0.3,
-                random_state=123,
-                n_estimators=8,
-                tree_method=tree_method,
-            )
-            cls_3.fit(X=X_, y=y_)
-            proba_3 = cls_3.predict_proba(X_)
-
-            # compute variance of probability percentages between two of the
-            # same model, use this to check to make sure approx is functioning
-            # within normal parameters
-            expected_variance = np.max(np.abs(proba_3 - proba_2)).compute()
-
-            if expected_variance > 0:
-                margin_variance = np.max(np.abs(proba_1 - proba_2)).compute()
-                # Ensure the margin variance is less than the expected variance + 10%
-                assert np.all(margin_variance <= expected_variance + .1)
-            else:
-                np.testing.assert_equal(predictions_1.compute(), predictions_2.compute())
-                np.testing.assert_almost_equal(proba_1.compute(), proba_2.compute())
+    assert np.all(predictions_1.compute() == predictions_2.compute())


 def test_dask_missing_value_reg() -> None: