diff --git a/python-package/xgboost/dask.py b/python-package/xgboost/dask.py index f21f70665..585d7fea3 100644 --- a/python-package/xgboost/dask.py +++ b/python-package/xgboost/dask.py @@ -1321,10 +1321,10 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase): feature_weights=feature_weights, callbacks=callbacks) - async def _predict_proba_async(self, data, output_margin=False, + async def _predict_proba_async(self, X, output_margin=False, base_margin=None): test_dmatrix = await DaskDMatrix( - client=self.client, data=data, base_margin=base_margin, + client=self.client, data=X, base_margin=base_margin, missing=self.missing ) pred_probs = await predict(client=self.client, @@ -1334,11 +1334,11 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase): return pred_probs # pylint: disable=arguments-differ,missing-docstring - def predict_proba(self, data, output_margin=False, base_margin=None): + def predict_proba(self, X, output_margin=False, base_margin=None): _assert_dask_support() return self.client.sync( self._predict_proba_async, - data, + X=X, output_margin=output_margin, base_margin=base_margin ) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 6f84f7a4a..bf9da0a13 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -1000,10 +1000,9 @@ class XGBClassifier(XGBModel, XGBClassifierBase): return self._le.inverse_transform(column_indexes) return column_indexes - def predict_proba(self, data, ntree_limit=None, validate_features=False, + def predict_proba(self, X, ntree_limit=None, validate_features=False, base_margin=None): - """ - Predict the probability of each `data` example being of a given class. + """ Predict the probability of each `X` example being of a given class. .. note:: This function is not thread safe @@ -1013,21 +1012,22 @@ class XGBClassifier(XGBModel, XGBClassifierBase): Parameters ---------- - data : array_like + X : array_like Feature matrix. ntree_limit : int - Limit number of trees in the prediction; defaults to best_ntree_limit if defined - (i.e. it has been trained with early stopping), otherwise 0 (use all trees). + Limit number of trees in the prediction; defaults to best_ntree_limit if + defined (i.e. it has been trained with early stopping), otherwise 0 (use all + trees). validate_features : bool - When this is True, validate that the Booster's and data's feature_names are identical. - Otherwise, it is assumed that the feature_names are the same. + When this is True, validate that the Booster's and data's feature_names are + identical. Otherwise, it is assumed that the feature_names are the same. Returns ------- prediction : numpy array a numpy array with the probability of each data example being of a given class. """ - test_dmatrix = DMatrix(data, base_margin=base_margin, + test_dmatrix = DMatrix(X, base_margin=base_margin, missing=self.missing, nthread=self.n_jobs) if ntree_limit is None: ntree_limit = getattr(self, "best_ntree_limit", 0) diff --git a/tests/ci_build/conda_env/win64_test.yml b/tests/ci_build/conda_env/win64_test.yml index df06ebff2..f353c8af7 100644 --- a/tests/ci_build/conda_env/win64_test.yml +++ b/tests/ci_build/conda_env/win64_test.yml @@ -9,7 +9,6 @@ dependencies: - scikit-learn - pandas - pytest -- python-graphviz - boto3 - hypothesis - jsonschema @@ -17,3 +16,4 @@ dependencies: - pip: - cupy-cuda101 - modin[all] + - graphviz