* [Breaking] Rename `data` to `X` in `predict_proba`. (#6555) New Scikit-Learn version uses keyword argument, and `X` is the predefined keyword. * Use pip to install latest Python graphviz on Windows CI. * Suppress health check.
This commit is contained in:
parent
a78d0d4110
commit
7aec915dcd
@ -1210,10 +1210,10 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
|
|||||||
early_stopping_rounds=early_stopping_rounds,
|
early_stopping_rounds=early_stopping_rounds,
|
||||||
verbose=verbose)
|
verbose=verbose)
|
||||||
|
|
||||||
async def _predict_proba_async(self, data, output_margin=False,
|
async def _predict_proba_async(self, X, output_margin=False,
|
||||||
base_margin=None):
|
base_margin=None):
|
||||||
test_dmatrix = await DaskDMatrix(
|
test_dmatrix = await DaskDMatrix(
|
||||||
client=self.client, data=data, base_margin=base_margin,
|
client=self.client, data=X, base_margin=base_margin,
|
||||||
missing=self.missing
|
missing=self.missing
|
||||||
)
|
)
|
||||||
pred_probs = await predict(client=self.client,
|
pred_probs = await predict(client=self.client,
|
||||||
@ -1223,11 +1223,11 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
|
|||||||
return pred_probs
|
return pred_probs
|
||||||
|
|
||||||
# pylint: disable=arguments-differ,missing-docstring
|
# pylint: disable=arguments-differ,missing-docstring
|
||||||
def predict_proba(self, data, output_margin=False, base_margin=None):
|
def predict_proba(self, X, output_margin=False, base_margin=None):
|
||||||
_assert_dask_support()
|
_assert_dask_support()
|
||||||
return self.client.sync(
|
return self.client.sync(
|
||||||
self._predict_proba_async,
|
self._predict_proba_async,
|
||||||
data,
|
X=X,
|
||||||
output_margin=output_margin,
|
output_margin=output_margin,
|
||||||
base_margin=base_margin
|
base_margin=base_margin
|
||||||
)
|
)
|
||||||
|
|||||||
@ -995,10 +995,9 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
|||||||
return self._le.inverse_transform(column_indexes)
|
return self._le.inverse_transform(column_indexes)
|
||||||
return column_indexes
|
return column_indexes
|
||||||
|
|
||||||
def predict_proba(self, data, ntree_limit=None, validate_features=False,
|
def predict_proba(self, X, ntree_limit=None, validate_features=False,
|
||||||
base_margin=None):
|
base_margin=None):
|
||||||
"""
|
""" Predict the probability of each `X` example being of a given class.
|
||||||
Predict the probability of each `data` example being of a given class.
|
|
||||||
|
|
||||||
.. note:: This function is not thread safe
|
.. note:: This function is not thread safe
|
||||||
|
|
||||||
@ -1008,21 +1007,22 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
data : array_like
|
X : array_like
|
||||||
Feature matrix.
|
Feature matrix.
|
||||||
ntree_limit : int
|
ntree_limit : int
|
||||||
Limit number of trees in the prediction; defaults to best_ntree_limit if defined
|
Limit number of trees in the prediction; defaults to best_ntree_limit if
|
||||||
(i.e. it has been trained with early stopping), otherwise 0 (use all trees).
|
defined (i.e. it has been trained with early stopping), otherwise 0 (use all
|
||||||
|
trees).
|
||||||
validate_features : bool
|
validate_features : bool
|
||||||
When this is True, validate that the Booster's and data's feature_names are identical.
|
When this is True, validate that the Booster's and data's feature_names are
|
||||||
Otherwise, it is assumed that the feature_names are the same.
|
identical. Otherwise, it is assumed that the feature_names are the same.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
prediction : numpy array
|
prediction : numpy array
|
||||||
a numpy array with the probability of each data example being of a given class.
|
a numpy array with the probability of each data example being of a given class.
|
||||||
"""
|
"""
|
||||||
test_dmatrix = DMatrix(data, base_margin=base_margin,
|
test_dmatrix = DMatrix(X, base_margin=base_margin,
|
||||||
missing=self.missing, nthread=self.n_jobs)
|
missing=self.missing, nthread=self.n_jobs)
|
||||||
if ntree_limit is None:
|
if ntree_limit is None:
|
||||||
ntree_limit = getattr(self, "best_ntree_limit", 0)
|
ntree_limit = getattr(self, "best_ntree_limit", 0)
|
||||||
|
|||||||
@ -9,7 +9,6 @@ dependencies:
|
|||||||
- scikit-learn
|
- scikit-learn
|
||||||
- pandas
|
- pandas
|
||||||
- pytest
|
- pytest
|
||||||
- python-graphviz
|
|
||||||
- boto3
|
- boto3
|
||||||
- hypothesis
|
- hypothesis
|
||||||
- jsonschema
|
- jsonschema
|
||||||
@ -17,3 +16,4 @@ dependencies:
|
|||||||
- pip:
|
- pip:
|
||||||
- cupy-cuda101
|
- cupy-cuda101
|
||||||
- modin[all]
|
- modin[all]
|
||||||
|
- graphviz
|
||||||
|
|||||||
@ -5,8 +5,10 @@ import numpy as np
|
|||||||
import asyncio
|
import asyncio
|
||||||
import xgboost
|
import xgboost
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import hypothesis
|
||||||
from hypothesis import given, strategies, settings, note
|
from hypothesis import given, strategies, settings, note
|
||||||
from hypothesis._settings import duration
|
from hypothesis._settings import duration
|
||||||
|
from hypothesis import HealthCheck
|
||||||
from test_gpu_updaters import parameter_strategy
|
from test_gpu_updaters import parameter_strategy
|
||||||
|
|
||||||
if sys.platform.startswith("win"):
|
if sys.platform.startswith("win"):
|
||||||
@ -19,6 +21,11 @@ from test_with_dask import _get_client_workers # noqa
|
|||||||
from test_with_dask import generate_array # noqa
|
from test_with_dask import generate_array # noqa
|
||||||
import testing as tm # noqa
|
import testing as tm # noqa
|
||||||
|
|
||||||
|
if hasattr(HealthCheck, 'function_scoped_fixture'):
|
||||||
|
suppress = [HealthCheck.function_scoped_fixture]
|
||||||
|
else:
|
||||||
|
suppress = hypothesis.utils.conventions.not_set
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import dask.dataframe as dd
|
import dask.dataframe as dd
|
||||||
@ -161,19 +168,24 @@ class TestDistributedGPU:
|
|||||||
run_with_dask_dataframe(dxgb.DaskDMatrix, client)
|
run_with_dask_dataframe(dxgb.DaskDMatrix, client)
|
||||||
run_with_dask_dataframe(dxgb.DaskDeviceQuantileDMatrix, client)
|
run_with_dask_dataframe(dxgb.DaskDeviceQuantileDMatrix, client)
|
||||||
|
|
||||||
@given(params=parameter_strategy, num_rounds=strategies.integers(1, 20),
|
@given(
|
||||||
dataset=tm.dataset_strategy)
|
params=parameter_strategy,
|
||||||
@settings(deadline=duration(seconds=120))
|
num_rounds=strategies.integers(1, 20),
|
||||||
|
dataset=tm.dataset_strategy,
|
||||||
|
)
|
||||||
|
@settings(deadline=duration(seconds=120), suppress_health_check=suppress)
|
||||||
@pytest.mark.skipif(**tm.no_dask())
|
@pytest.mark.skipif(**tm.no_dask())
|
||||||
@pytest.mark.skipif(**tm.no_dask_cuda())
|
@pytest.mark.skipif(**tm.no_dask_cuda())
|
||||||
@pytest.mark.parametrize('local_cuda_cluster', [{'n_workers': 2}], indirect=['local_cuda_cluster'])
|
@pytest.mark.parametrize(
|
||||||
|
"local_cuda_cluster", [{"n_workers": 2}], indirect=["local_cuda_cluster"]
|
||||||
|
)
|
||||||
@pytest.mark.mgpu
|
@pytest.mark.mgpu
|
||||||
def test_gpu_hist(self, params, num_rounds, dataset, local_cuda_cluster):
|
def test_gpu_hist(self, params, num_rounds, dataset, local_cuda_cluster):
|
||||||
with Client(local_cuda_cluster) as client:
|
with Client(local_cuda_cluster) as client:
|
||||||
run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDMatrix,
|
run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDMatrix, client)
|
||||||
client)
|
run_gpu_hist(
|
||||||
run_gpu_hist(params, num_rounds, dataset,
|
params, num_rounds, dataset, dxgb.DaskDeviceQuantileDMatrix, client
|
||||||
dxgb.DaskDeviceQuantileDMatrix, client)
|
)
|
||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_cupy())
|
@pytest.mark.skipif(**tm.no_cupy())
|
||||||
@pytest.mark.skipif(**tm.no_dask())
|
@pytest.mark.skipif(**tm.no_dask())
|
||||||
|
|||||||
@ -8,7 +8,8 @@ import asyncio
|
|||||||
from sklearn.datasets import make_classification
|
from sklearn.datasets import make_classification
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
from hypothesis import given, settings, note
|
import hypothesis
|
||||||
|
from hypothesis import given, settings, note, HealthCheck
|
||||||
from test_updaters import hist_parameter_strategy, exact_parameter_strategy
|
from test_updaters import hist_parameter_strategy, exact_parameter_strategy
|
||||||
|
|
||||||
if sys.platform.startswith("win"):
|
if sys.platform.startswith("win"):
|
||||||
@ -17,6 +18,12 @@ if tm.no_dask()['condition']:
|
|||||||
pytest.skip(msg=tm.no_dask()['reason'], allow_module_level=True)
|
pytest.skip(msg=tm.no_dask()['reason'], allow_module_level=True)
|
||||||
|
|
||||||
|
|
||||||
|
if hasattr(HealthCheck, 'function_scoped_fixture'):
|
||||||
|
suppress = [HealthCheck.function_scoped_fixture]
|
||||||
|
else:
|
||||||
|
suppress = hypothesis.utils.conventions.not_set
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from distributed import LocalCluster, Client, get_client
|
from distributed import LocalCluster, Client, get_client
|
||||||
from distributed.utils_test import client, loop, cluster_fixture
|
from distributed.utils_test import client, loop, cluster_fixture
|
||||||
@ -668,14 +675,14 @@ class TestWithDask:
|
|||||||
|
|
||||||
@given(params=hist_parameter_strategy,
|
@given(params=hist_parameter_strategy,
|
||||||
dataset=tm.dataset_strategy)
|
dataset=tm.dataset_strategy)
|
||||||
@settings(deadline=None)
|
@settings(deadline=None, suppress_health_check=suppress)
|
||||||
def test_hist(self, params, dataset, client):
|
def test_hist(self, params, dataset, client):
|
||||||
num_rounds = 30
|
num_rounds = 30
|
||||||
self.run_updater_test(client, params, num_rounds, dataset, 'hist')
|
self.run_updater_test(client, params, num_rounds, dataset, 'hist')
|
||||||
|
|
||||||
@given(params=exact_parameter_strategy,
|
@given(params=exact_parameter_strategy,
|
||||||
dataset=tm.dataset_strategy)
|
dataset=tm.dataset_strategy)
|
||||||
@settings(deadline=None)
|
@settings(deadline=None, suppress_health_check=suppress)
|
||||||
def test_approx(self, client, params, dataset):
|
def test_approx(self, client, params, dataset):
|
||||||
num_rounds = 30
|
num_rounds = 30
|
||||||
self.run_updater_test(client, params, num_rounds, dataset, 'approx')
|
self.run_updater_test(client, params, num_rounds, dataset, 'approx')
|
||||||
@ -795,7 +802,6 @@ class TestDaskCallbacks:
|
|||||||
merged = xgb.dask._get_workers_from_data(train, evals=[(valid, 'Valid')])
|
merged = xgb.dask._get_workers_from_data(train, evals=[(valid, 'Valid')])
|
||||||
assert len(merged) == 2
|
assert len(merged) == 2
|
||||||
|
|
||||||
|
|
||||||
def test_data_initialization(self):
|
def test_data_initialization(self):
|
||||||
'''Assert each worker has the correct amount of data, and DMatrix initialization doesn't
|
'''Assert each worker has the correct amount of data, and DMatrix initialization doesn't
|
||||||
generate unnecessary copies of data.
|
generate unnecessary copies of data.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user