[Backport] Rename data to X in predict_proba. (#6555) (#6586)

* [Breaking] Rename `data` to `X` in `predict_proba`. (#6555)

New Scikit-Learn version uses keyword argument, and `X` is the predefined
keyword.

* Use pip to install latest Python graphviz on Windows CI.

* Suppress health check.
This commit is contained in:
Jiaming Yuan
2021-01-10 16:05:17 +08:00
committed by GitHub
parent a78d0d4110
commit 7aec915dcd
5 changed files with 44 additions and 26 deletions

View File

@@ -1210,10 +1210,10 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
early_stopping_rounds=early_stopping_rounds,
verbose=verbose)
async def _predict_proba_async(self, data, output_margin=False,
async def _predict_proba_async(self, X, output_margin=False,
base_margin=None):
test_dmatrix = await DaskDMatrix(
client=self.client, data=data, base_margin=base_margin,
client=self.client, data=X, base_margin=base_margin,
missing=self.missing
)
pred_probs = await predict(client=self.client,
@@ -1223,11 +1223,11 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
return pred_probs
# pylint: disable=arguments-differ,missing-docstring
def predict_proba(self, data, output_margin=False, base_margin=None):
def predict_proba(self, X, output_margin=False, base_margin=None):
_assert_dask_support()
return self.client.sync(
self._predict_proba_async,
data,
X=X,
output_margin=output_margin,
base_margin=base_margin
)

View File

@@ -995,10 +995,9 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
return self._le.inverse_transform(column_indexes)
return column_indexes
def predict_proba(self, data, ntree_limit=None, validate_features=False,
def predict_proba(self, X, ntree_limit=None, validate_features=False,
base_margin=None):
"""
Predict the probability of each `data` example being of a given class.
""" Predict the probability of each `X` example being of a given class.
.. note:: This function is not thread safe
@@ -1008,21 +1007,22 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
Parameters
----------
data : array_like
X : array_like
Feature matrix.
ntree_limit : int
Limit number of trees in the prediction; defaults to best_ntree_limit if defined
(i.e. it has been trained with early stopping), otherwise 0 (use all trees).
Limit number of trees in the prediction; defaults to best_ntree_limit if
defined (i.e. it has been trained with early stopping), otherwise 0 (use all
trees).
validate_features : bool
When this is True, validate that the Booster's and data's feature_names are identical.
Otherwise, it is assumed that the feature_names are the same.
When this is True, validate that the Booster's and data's feature_names are
identical. Otherwise, it is assumed that the feature_names are the same.
Returns
-------
prediction : numpy array
a numpy array with the probability of each data example being of a given class.
"""
test_dmatrix = DMatrix(data, base_margin=base_margin,
test_dmatrix = DMatrix(X, base_margin=base_margin,
missing=self.missing, nthread=self.n_jobs)
if ntree_limit is None:
ntree_limit = getattr(self, "best_ntree_limit", 0)