[Backport] Rename data to X in predict_proba. (#6555) (#6586)

* [Breaking] Rename `data` to `X` in `predict_proba`. (#6555)

New Scikit-Learn version uses keyword argument, and `X` is the predefined
keyword.

* Use pip to install latest Python graphviz on Windows CI.

* Suppress health check.
This commit is contained in:
Jiaming Yuan
2021-01-10 16:05:17 +08:00
committed by GitHub
parent a78d0d4110
commit 7aec915dcd
5 changed files with 44 additions and 26 deletions

View File

@@ -9,7 +9,6 @@ dependencies:
- scikit-learn
- pandas
- pytest
- python-graphviz
- boto3
- hypothesis
- jsonschema
@@ -17,3 +16,4 @@ dependencies:
- pip:
- cupy-cuda101
- modin[all]
- graphviz

View File

@@ -5,8 +5,10 @@ import numpy as np
import asyncio
import xgboost
import subprocess
import hypothesis
from hypothesis import given, strategies, settings, note
from hypothesis._settings import duration
from hypothesis import HealthCheck
from test_gpu_updaters import parameter_strategy
if sys.platform.startswith("win"):
@@ -19,6 +21,11 @@ from test_with_dask import _get_client_workers # noqa
from test_with_dask import generate_array # noqa
import testing as tm # noqa
if hasattr(HealthCheck, 'function_scoped_fixture'):
suppress = [HealthCheck.function_scoped_fixture]
else:
suppress = hypothesis.utils.conventions.not_set
try:
import dask.dataframe as dd
@@ -161,19 +168,24 @@ class TestDistributedGPU:
run_with_dask_dataframe(dxgb.DaskDMatrix, client)
run_with_dask_dataframe(dxgb.DaskDeviceQuantileDMatrix, client)
@given(params=parameter_strategy, num_rounds=strategies.integers(1, 20),
dataset=tm.dataset_strategy)
@settings(deadline=duration(seconds=120))
@given(
params=parameter_strategy,
num_rounds=strategies.integers(1, 20),
dataset=tm.dataset_strategy,
)
@settings(deadline=duration(seconds=120), suppress_health_check=suppress)
@pytest.mark.skipif(**tm.no_dask())
@pytest.mark.skipif(**tm.no_dask_cuda())
@pytest.mark.parametrize('local_cuda_cluster', [{'n_workers': 2}], indirect=['local_cuda_cluster'])
@pytest.mark.parametrize(
"local_cuda_cluster", [{"n_workers": 2}], indirect=["local_cuda_cluster"]
)
@pytest.mark.mgpu
def test_gpu_hist(self, params, num_rounds, dataset, local_cuda_cluster):
with Client(local_cuda_cluster) as client:
run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDMatrix,
client)
run_gpu_hist(params, num_rounds, dataset,
dxgb.DaskDeviceQuantileDMatrix, client)
run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDMatrix, client)
run_gpu_hist(
params, num_rounds, dataset, dxgb.DaskDeviceQuantileDMatrix, client
)
@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.skipif(**tm.no_dask())

View File

@@ -8,7 +8,8 @@ import asyncio
from sklearn.datasets import make_classification
import os
import subprocess
from hypothesis import given, settings, note
import hypothesis
from hypothesis import given, settings, note, HealthCheck
from test_updaters import hist_parameter_strategy, exact_parameter_strategy
if sys.platform.startswith("win"):
@@ -17,6 +18,12 @@ if tm.no_dask()['condition']:
pytest.skip(msg=tm.no_dask()['reason'], allow_module_level=True)
if hasattr(HealthCheck, 'function_scoped_fixture'):
suppress = [HealthCheck.function_scoped_fixture]
else:
suppress = hypothesis.utils.conventions.not_set
try:
from distributed import LocalCluster, Client, get_client
from distributed.utils_test import client, loop, cluster_fixture
@@ -668,14 +675,14 @@ class TestWithDask:
@given(params=hist_parameter_strategy,
dataset=tm.dataset_strategy)
@settings(deadline=None)
@settings(deadline=None, suppress_health_check=suppress)
def test_hist(self, params, dataset, client):
num_rounds = 30
self.run_updater_test(client, params, num_rounds, dataset, 'hist')
@given(params=exact_parameter_strategy,
dataset=tm.dataset_strategy)
@settings(deadline=None)
@settings(deadline=None, suppress_health_check=suppress)
def test_approx(self, client, params, dataset):
num_rounds = 30
self.run_updater_test(client, params, num_rounds, dataset, 'approx')
@@ -795,7 +802,6 @@ class TestDaskCallbacks:
merged = xgb.dask._get_workers_from_data(train, evals=[(valid, 'Valid')])
assert len(merged) == 2
def test_data_initialization(self):
'''Assert each worker has the correct amount of data, and DMatrix initialization doesn't
generate unnecessary copies of data.