From 7aec915dcdd0189b0049d3866d0f469c1f8497d9 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Sun, 10 Jan 2021 16:05:17 +0800
Subject: [PATCH] [Backport] Rename `data` to `X` in `predict_proba`. (#6555)
 (#6586)

* [Breaking] Rename `data` to `X` in `predict_proba`. (#6555)

New Scikit-Learn version uses keyword argument, and `X` is the predefined
keyword.

* Use pip to install latest Python graphviz on Windows CI.

* Suppress health check.
---
 python-package/xgboost/dask.py          |  8 +++----
 python-package/xgboost/sklearn.py       | 18 ++++++++--------
 tests/ci_build/conda_env/win64_test.yml |  2 +-
 tests/python-gpu/test_gpu_with_dask.py  | 28 ++++++++++++++++++-------
 tests/python/test_with_dask.py          | 14 +++++++++----
 5 files changed, 44 insertions(+), 26 deletions(-)

diff --git a/python-package/xgboost/dask.py b/python-package/xgboost/dask.py
index 4000c280a..d9830f924 100644
--- a/python-package/xgboost/dask.py
+++ b/python-package/xgboost/dask.py
@@ -1210,10 +1210,10 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
                                 early_stopping_rounds=early_stopping_rounds,
                                 verbose=verbose)
 
-    async def _predict_proba_async(self, data, output_margin=False,
+    async def _predict_proba_async(self, X, output_margin=False,
                                    base_margin=None):
         test_dmatrix = await DaskDMatrix(
-            client=self.client, data=data, base_margin=base_margin,
+            client=self.client, data=X, base_margin=base_margin,
             missing=self.missing
         )
         pred_probs = await predict(client=self.client,
@@ -1223,11 +1223,11 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
         return pred_probs
 
     # pylint: disable=arguments-differ,missing-docstring
-    def predict_proba(self, data, output_margin=False, base_margin=None):
+    def predict_proba(self, X, output_margin=False, base_margin=None):
         _assert_dask_support()
         return self.client.sync(
             self._predict_proba_async,
-            data,
+            X=X,
             output_margin=output_margin,
             base_margin=base_margin
         )
diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py
index 717ab1d3f..0572c77d4 100644
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -995,10 +995,9 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
             return self._le.inverse_transform(column_indexes)
         return column_indexes
 
-    def predict_proba(self, data, ntree_limit=None, validate_features=False,
+    def predict_proba(self, X, ntree_limit=None, validate_features=False,
                       base_margin=None):
-        """
-        Predict the probability of each `data` example being of a given class.
+        """ Predict the probability of each `X` example being of a given class.
 
         .. note:: This function is not thread safe
 
@@ -1008,21 +1007,22 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
 
         Parameters
         ----------
-        data : array_like
+        X : array_like
             Feature matrix.
         ntree_limit : int
-            Limit number of trees in the prediction; defaults to best_ntree_limit if defined
-            (i.e. it has been trained with early stopping), otherwise 0 (use all trees).
+            Limit number of trees in the prediction; defaults to best_ntree_limit if
+            defined (i.e. it has been trained with early stopping), otherwise 0 (use all
+            trees).
         validate_features : bool
-            When this is True, validate that the Booster's and data's feature_names are identical.
-            Otherwise, it is assumed that the feature_names are the same.
+            When this is True, validate that the Booster's and data's feature_names are
+            identical.  Otherwise, it is assumed that the feature_names are the same.
 
         Returns
         -------
         prediction : numpy array
             a numpy array with the probability of each data example being of a given class.
         """
-        test_dmatrix = DMatrix(data, base_margin=base_margin,
+        test_dmatrix = DMatrix(X, base_margin=base_margin,
                                missing=self.missing, nthread=self.n_jobs)
         if ntree_limit is None:
             ntree_limit = getattr(self, "best_ntree_limit", 0)
diff --git a/tests/ci_build/conda_env/win64_test.yml b/tests/ci_build/conda_env/win64_test.yml
index df06ebff2..f353c8af7 100644
--- a/tests/ci_build/conda_env/win64_test.yml
+++ b/tests/ci_build/conda_env/win64_test.yml
@@ -9,7 +9,6 @@ dependencies:
 - scikit-learn
 - pandas
 - pytest
-- python-graphviz
 - boto3
 - hypothesis
 - jsonschema
@@ -17,3 +16,4 @@ dependencies:
 - pip:
   - cupy-cuda101
   - modin[all]
+  - graphviz
diff --git a/tests/python-gpu/test_gpu_with_dask.py b/tests/python-gpu/test_gpu_with_dask.py
index a0bafd2ef..e2070af88 100644
--- a/tests/python-gpu/test_gpu_with_dask.py
+++ b/tests/python-gpu/test_gpu_with_dask.py
@@ -5,8 +5,10 @@ import numpy as np
 import asyncio
 import xgboost
 import subprocess
+import hypothesis
 from hypothesis import given, strategies, settings, note
 from hypothesis._settings import duration
+from hypothesis import HealthCheck
 from test_gpu_updaters import parameter_strategy
 
 if sys.platform.startswith("win"):
@@ -19,6 +21,11 @@ from test_with_dask import _get_client_workers  # noqa
 from test_with_dask import generate_array     # noqa
 import testing as tm                          # noqa
 
+if hasattr(HealthCheck, 'function_scoped_fixture'):
+    suppress = [HealthCheck.function_scoped_fixture]
+else:
+    suppress = hypothesis.utils.conventions.not_set
+
 
 try:
     import dask.dataframe as dd
@@ -161,19 +168,24 @@ class TestDistributedGPU:
             run_with_dask_dataframe(dxgb.DaskDMatrix, client)
             run_with_dask_dataframe(dxgb.DaskDeviceQuantileDMatrix, client)
 
-    @given(params=parameter_strategy, num_rounds=strategies.integers(1, 20),
-           dataset=tm.dataset_strategy)
-    @settings(deadline=duration(seconds=120))
+    @given(
+        params=parameter_strategy,
+        num_rounds=strategies.integers(1, 20),
+        dataset=tm.dataset_strategy,
+    )
+    @settings(deadline=duration(seconds=120), suppress_health_check=suppress)
     @pytest.mark.skipif(**tm.no_dask())
     @pytest.mark.skipif(**tm.no_dask_cuda())
-    @pytest.mark.parametrize('local_cuda_cluster', [{'n_workers': 2}], indirect=['local_cuda_cluster'])
+    @pytest.mark.parametrize(
+        "local_cuda_cluster", [{"n_workers": 2}], indirect=["local_cuda_cluster"]
+    )
     @pytest.mark.mgpu
     def test_gpu_hist(self, params, num_rounds, dataset, local_cuda_cluster):
         with Client(local_cuda_cluster) as client:
-            run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDMatrix,
-                         client)
-            run_gpu_hist(params, num_rounds, dataset,
-                         dxgb.DaskDeviceQuantileDMatrix, client)
+            run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDMatrix, client)
+            run_gpu_hist(
+                params, num_rounds, dataset, dxgb.DaskDeviceQuantileDMatrix, client
+            )
 
     @pytest.mark.skipif(**tm.no_cupy())
     @pytest.mark.skipif(**tm.no_dask())
diff --git a/tests/python/test_with_dask.py b/tests/python/test_with_dask.py
index ba697ab4d..0e876fcad 100644
--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@@ -8,7 +8,8 @@ import asyncio
 from sklearn.datasets import make_classification
 import os
 import subprocess
-from hypothesis import given, settings, note
+import hypothesis
+from hypothesis import given, settings, note, HealthCheck
 from test_updaters import hist_parameter_strategy, exact_parameter_strategy
 
 if sys.platform.startswith("win"):
@@ -17,6 +18,12 @@ if tm.no_dask()['condition']:
     pytest.skip(msg=tm.no_dask()['reason'], allow_module_level=True)
 
 
+if hasattr(HealthCheck, 'function_scoped_fixture'):
+    suppress = [HealthCheck.function_scoped_fixture]
+else:
+    suppress = hypothesis.utils.conventions.not_set
+
+
 try:
     from distributed import LocalCluster, Client, get_client
     from distributed.utils_test import client, loop, cluster_fixture
@@ -668,14 +675,14 @@ class TestWithDask:
 
     @given(params=hist_parameter_strategy,
            dataset=tm.dataset_strategy)
-    @settings(deadline=None)
+    @settings(deadline=None, suppress_health_check=suppress)
     def test_hist(self, params, dataset, client):
         num_rounds = 30
         self.run_updater_test(client, params, num_rounds, dataset, 'hist')
 
     @given(params=exact_parameter_strategy,
            dataset=tm.dataset_strategy)
-    @settings(deadline=None)
+    @settings(deadline=None, suppress_health_check=suppress)
     def test_approx(self, client, params, dataset):
         num_rounds = 30
         self.run_updater_test(client, params, num_rounds, dataset, 'approx')
@@ -795,7 +802,6 @@ class TestDaskCallbacks:
                 merged = xgb.dask._get_workers_from_data(train, evals=[(valid, 'Valid')])
                 assert len(merged) == 2
 
-
     def test_data_initialization(self):
         '''Assert each worker has the correct amount of data, and DMatrix initialization doesn't
         generate unnecessary copies of data.