Typehint for Sklearn. (#6799)
This commit is contained in:
parent
3d919db0c0
commit
dee5ef2dfd
2
.github/workflows/main.yml
vendored
2
.github/workflows/main.yml
vendored
@ -243,7 +243,7 @@ jobs:
|
|||||||
architecture: 'x64'
|
architecture: 'x64'
|
||||||
- name: Install Python packages
|
- name: Install Python packages
|
||||||
run: |
|
run: |
|
||||||
python -m pip install wheel setuptools mypy dask[complete] distributed
|
python -m pip install wheel setuptools mypy pandas dask[complete] distributed
|
||||||
- name: Run mypy
|
- name: Run mypy
|
||||||
run: |
|
run: |
|
||||||
make mypy
|
make mypy
|
||||||
|
|||||||
5
Makefile
5
Makefile
@ -91,8 +91,9 @@ endif
|
|||||||
# If any of the dask tests failed, contributor won't see the other error.
|
# If any of the dask tests failed, contributor won't see the other error.
|
||||||
mypy:
|
mypy:
|
||||||
cd python-package; \
|
cd python-package; \
|
||||||
mypy ./xgboost/dask.py ../tests/python/test_with_dask.py --follow-imports=silent; \
|
mypy ./xgboost/dask.py && \
|
||||||
mypy ../tests/python-gpu/test_gpu_with_dask.py --follow-imports=silent; \
|
mypy ../tests/python-gpu/test_gpu_with_dask.py && \
|
||||||
|
mypy ./xgboost/sklearn.py || exit 1; \
|
||||||
mypy . || true ;
|
mypy . || true ;
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
|
|||||||
@ -4,3 +4,4 @@ description-file = README.rst
|
|||||||
[mypy]
|
[mypy]
|
||||||
ignore_missing_imports = True
|
ignore_missing_imports = True
|
||||||
disallow_untyped_defs = True
|
disallow_untyped_defs = True
|
||||||
|
follow_imports = silent
|
||||||
@ -276,6 +276,9 @@ class TrainingCallback(ABC):
|
|||||||
.. versionadded:: 1.3.0
|
.. versionadded:: 1.3.0
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
EvalsLog = Dict[str, Dict[str, Union[List[float], List[Tuple[float, float]]]]]
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -287,13 +290,11 @@ class TrainingCallback(ABC):
|
|||||||
'''Run after training is finished.'''
|
'''Run after training is finished.'''
|
||||||
return model
|
return model
|
||||||
|
|
||||||
def before_iteration(self, model, epoch: int,
|
def before_iteration(self, model, epoch: int, evals_log: EvalsLog) -> bool:
|
||||||
evals_log: 'CallbackContainer.EvalsLog') -> bool:
|
|
||||||
'''Run before each iteration. Return True when training should stop.'''
|
'''Run before each iteration. Return True when training should stop.'''
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def after_iteration(self, model, epoch: int,
|
def after_iteration(self, model, epoch: int, evals_log: EvalsLog) -> bool:
|
||||||
evals_log: 'CallbackContainer.EvalsLog') -> bool:
|
|
||||||
'''Run after each iteration. Return True when training should stop.'''
|
'''Run after each iteration. Return True when training should stop.'''
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -351,7 +352,7 @@ class CallbackContainer:
|
|||||||
|
|
||||||
'''
|
'''
|
||||||
|
|
||||||
EvalsLog = Dict[str, Dict[str, Union[List[float], List[Tuple[float, float]]]]]
|
EvalsLog = TrainingCallback.EvalsLog
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
callbacks: List[TrainingCallback],
|
callbacks: List[TrainingCallback],
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
# pylint: disable= invalid-name, unused-import
|
# pylint: disable= invalid-name, unused-import
|
||||||
"""For compatibility and optional dependencies."""
|
"""For compatibility and optional dependencies."""
|
||||||
|
from typing import Any
|
||||||
import sys
|
import sys
|
||||||
import types
|
import types
|
||||||
import importlib.util
|
import importlib.util
|
||||||
@ -36,7 +37,7 @@ except ImportError:
|
|||||||
|
|
||||||
MultiIndex = object
|
MultiIndex = object
|
||||||
Int64Index = object
|
Int64Index = object
|
||||||
DataFrame = object
|
DataFrame: Any = object
|
||||||
Series = object
|
Series = object
|
||||||
pandas_concat = None
|
pandas_concat = None
|
||||||
PANDAS_INSTALLED = False
|
PANDAS_INSTALLED = False
|
||||||
@ -109,10 +110,12 @@ except pkg_resources.DistributionNotFound:
|
|||||||
try:
|
try:
|
||||||
import sparse
|
import sparse
|
||||||
import scipy.sparse as scipy_sparse
|
import scipy.sparse as scipy_sparse
|
||||||
|
from scipy.sparse import csr_matrix as scipy_csr
|
||||||
SCIPY_INSTALLED = True
|
SCIPY_INSTALLED = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
sparse = False
|
sparse = False
|
||||||
scipy_sparse = False
|
scipy_sparse = False
|
||||||
|
scipy_csr: Any = object
|
||||||
SCIPY_INSTALLED = False
|
SCIPY_INSTALLED = False
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -96,7 +96,11 @@ def from_cstr_to_pystr(data, length) -> List[str]:
|
|||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
def _convert_ntree_limit(booster, ntree_limit, iteration_range):
|
def _convert_ntree_limit(
|
||||||
|
booster: "Booster",
|
||||||
|
ntree_limit: Optional[int],
|
||||||
|
iteration_range: Optional[Tuple[int, int]]
|
||||||
|
) -> Optional[Tuple[int, int]]:
|
||||||
if ntree_limit is not None and ntree_limit != 0:
|
if ntree_limit is not None and ntree_limit != 0:
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
"ntree_limit is deprecated, use `iteration_range` or model "
|
"ntree_limit is deprecated, use `iteration_range` or model "
|
||||||
@ -1234,7 +1238,7 @@ class Booster(object):
|
|||||||
params += [('eval_metric', eval_metric)]
|
params += [('eval_metric', eval_metric)]
|
||||||
return params
|
return params
|
||||||
|
|
||||||
def _transform_monotone_constrains(self, value: Union[dict, str]) -> str:
|
def _transform_monotone_constrains(self, value: Union[Dict[str, int], str]) -> str:
|
||||||
if isinstance(value, str):
|
if isinstance(value, str):
|
||||||
return value
|
return value
|
||||||
|
|
||||||
@ -1246,7 +1250,9 @@ class Booster(object):
|
|||||||
return '(' + ','.join([str(value.get(feature_name, 0))
|
return '(' + ','.join([str(value.get(feature_name, 0))
|
||||||
for feature_name in self.feature_names]) + ')'
|
for feature_name in self.feature_names]) + ')'
|
||||||
|
|
||||||
def _transform_interaction_constraints(self, value: Union[list, str]) -> str:
|
def _transform_interaction_constraints(
|
||||||
|
self, value: Union[List[Tuple[str]], str]
|
||||||
|
) -> str:
|
||||||
if isinstance(value, str):
|
if isinstance(value, str):
|
||||||
return value
|
return value
|
||||||
|
|
||||||
@ -1447,7 +1453,7 @@ class Booster(object):
|
|||||||
attr_names = from_cstr_to_pystr(sarr, length)
|
attr_names = from_cstr_to_pystr(sarr, length)
|
||||||
return {n: self.attr(n) for n in attr_names}
|
return {n: self.attr(n) for n in attr_names}
|
||||||
|
|
||||||
def set_attr(self, **kwargs):
|
def set_attr(self, **kwargs: Optional[str]) -> None:
|
||||||
"""Set the attribute of the Booster.
|
"""Set the attribute of the Booster.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
@ -1971,7 +1977,7 @@ class Booster(object):
|
|||||||
"Data type:" + str(type(data)) + " not supported by inplace prediction."
|
"Data type:" + str(type(data)) + " not supported by inplace prediction."
|
||||||
)
|
)
|
||||||
|
|
||||||
def save_model(self, fname):
|
def save_model(self, fname: Union[str, os.PathLike]):
|
||||||
"""Save the model to a file.
|
"""Save the model to a file.
|
||||||
|
|
||||||
The model is saved in an XGBoost internal format which is universal among the
|
The model is saved in an XGBoost internal format which is universal among the
|
||||||
|
|||||||
@ -1028,7 +1028,8 @@ async def _direct_predict_impl( # pylint: disable=too-many-branches
|
|||||||
# Somehow dask fail to infer output shape change for 2-dim prediction, and
|
# Somehow dask fail to infer output shape change for 2-dim prediction, and
|
||||||
# `chunks = (None, output_shape[1])` doesn't work due to None is not
|
# `chunks = (None, output_shape[1])` doesn't work due to None is not
|
||||||
# supported in map_blocks.
|
# supported in map_blocks.
|
||||||
chunks = list(data.chunks)
|
chunks: Optional[List[Tuple]] = list(data.chunks)
|
||||||
|
assert isinstance(chunks, list)
|
||||||
chunks[1] = (output_shape[1], )
|
chunks[1] = (output_shape[1], )
|
||||||
else:
|
else:
|
||||||
chunks = None
|
chunks = None
|
||||||
@ -1633,7 +1634,7 @@ class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if callable(self.objective):
|
if callable(self.objective):
|
||||||
obj = _objective_decorator(self.objective)
|
obj: Optional[Callable] = _objective_decorator(self.objective)
|
||||||
else:
|
else:
|
||||||
obj = None
|
obj = None
|
||||||
model, metric, params = self._configure_fit(
|
model, metric, params = self._configure_fit(
|
||||||
@ -1734,7 +1735,7 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
|
|||||||
params["objective"] = "binary:logistic"
|
params["objective"] = "binary:logistic"
|
||||||
|
|
||||||
if callable(self.objective):
|
if callable(self.objective):
|
||||||
obj = _objective_decorator(self.objective)
|
obj: Optional[Callable] = _objective_decorator(self.objective)
|
||||||
else:
|
else:
|
||||||
obj = None
|
obj = None
|
||||||
model, metric, params = self._configure_fit(
|
model, metric, params = self._configure_fit(
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -269,7 +269,9 @@ class TestDistributedGPU:
|
|||||||
@pytest.mark.skipif(**tm.no_dask())
|
@pytest.mark.skipif(**tm.no_dask())
|
||||||
@pytest.mark.skipif(**tm.no_dask_cuda())
|
@pytest.mark.skipif(**tm.no_dask_cuda())
|
||||||
@pytest.mark.parametrize("model", ["boosting"])
|
@pytest.mark.parametrize("model", ["boosting"])
|
||||||
def test_dask_classifier(self, model, local_cuda_cluster: LocalCUDACluster) -> None:
|
def test_dask_classifier(
|
||||||
|
self, model: str, local_cuda_cluster: LocalCUDACluster
|
||||||
|
) -> None:
|
||||||
import dask_cudf
|
import dask_cudf
|
||||||
with Client(local_cuda_cluster) as client:
|
with Client(local_cuda_cluster) as client:
|
||||||
X_, y_, w_ = generate_array(with_weights=True)
|
X_, y_, w_ = generate_array(with_weights=True)
|
||||||
|
|||||||
@ -70,7 +70,6 @@ class TestInteractionConstraints:
|
|||||||
feature_names=feature_names,
|
feature_names=feature_names,
|
||||||
interaction_constraints=constraints)
|
interaction_constraints=constraints)
|
||||||
|
|
||||||
|
|
||||||
constraints = [('feature_0', 'feature_1')]
|
constraints = [('feature_0', 'feature_1')]
|
||||||
feature_names = ['feature_0', 'feature_1', 'feature_2']
|
feature_names = ['feature_0', 'feature_1', 'feature_2']
|
||||||
self.run_interaction_constraints(tree_method='exact',
|
self.run_interaction_constraints(tree_method='exact',
|
||||||
@ -79,6 +78,7 @@ class TestInteractionConstraints:
|
|||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_sklearn())
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
def training_accuracy(self, tree_method):
|
def training_accuracy(self, tree_method):
|
||||||
|
"""Test accuracy, reused by GPU tests."""
|
||||||
from sklearn.metrics import accuracy_score
|
from sklearn.metrics import accuracy_score
|
||||||
dtrain = xgboost.DMatrix(dpath + 'agaricus.txt.train?indexing_mode=1')
|
dtrain = xgboost.DMatrix(dpath + 'agaricus.txt.train?indexing_mode=1')
|
||||||
dtest = xgboost.DMatrix(dpath + 'agaricus.txt.test?indexing_mode=1')
|
dtest = xgboost.DMatrix(dpath + 'agaricus.txt.test?indexing_mode=1')
|
||||||
@ -101,11 +101,6 @@ class TestInteractionConstraints:
|
|||||||
pred_dtest = (bst.predict(dtest) < 0.5)
|
pred_dtest = (bst.predict(dtest) < 0.5)
|
||||||
assert accuracy_score(dtest.get_label(), pred_dtest) < 0.1
|
assert accuracy_score(dtest.get_label(), pred_dtest) < 0.1
|
||||||
|
|
||||||
def test_hist_training_accuracy(self):
|
@pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"])
|
||||||
self.training_accuracy(tree_method='hist')
|
def test_hist_training_accuracy(self, tree_method):
|
||||||
|
self.training_accuracy(tree_method=tree_method)
|
||||||
def test_exact_training_accuracy(self):
|
|
||||||
self.training_accuracy(tree_method='exact')
|
|
||||||
|
|
||||||
def test_approx_training_accuracy(self):
|
|
||||||
self.training_accuracy(tree_method='approx')
|
|
||||||
|
|||||||
@ -119,13 +119,13 @@ class TestMonotoneConstraints:
|
|||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
xgb.train(params, training_dset)
|
xgb.train(params, training_dset)
|
||||||
|
|
||||||
feature_names =[ 'feature_0', 'feature_2']
|
feature_names = ['feature_0', 'feature_2']
|
||||||
training_dset_w_feature_names = xgb.DMatrix(x, label=y, feature_names=feature_names)
|
training_dset_w_feature_names = xgb.DMatrix(x, label=y, feature_names=feature_names)
|
||||||
|
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
xgb.train(params, training_dset_w_feature_names)
|
xgb.train(params, training_dset_w_feature_names)
|
||||||
|
|
||||||
feature_names =[ 'feature_0', 'feature_1']
|
feature_names = ['feature_0', 'feature_1']
|
||||||
training_dset_w_feature_names = xgb.DMatrix(x, label=y, feature_names=feature_names)
|
training_dset_w_feature_names = xgb.DMatrix(x, label=y, feature_names=feature_names)
|
||||||
|
|
||||||
constrained_learner = xgb.train(
|
constrained_learner = xgb.train(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user