Add Type Hints for Python Package (#7742)

Co-authored-by: Chengyang Gu <bridgream@gmail.com> Co-authored-by: Jiamingy <jm.yuan@outlook.com>
2022-05-17 10:14:09 -04:00
parent 71d3b2e036
commit 806c92c80b
10 changed files with 486 additions and 342 deletions
--- a/python-package/xgboost/_typing.py
+++ b/python-package/xgboost/_typing.py
@@ -1,21 +1,32 @@
 """Shared typing definition."""
 import ctypes
 import os
-from typing import Optional, Any, TypeVar, Union, Sequence
+from typing import Any, TypeVar, Union, Type, Sequence, Callable, List, Dict

 # os.PathLike/string/numpy.array/scipy.sparse/pd.DataFrame/dt.Frame/
 # cudf.DataFrame/cupy.array/dlpack
+import numpy as np
+
 DataType = Any

 # xgboost accepts some other possible types in practice due to historical reason, which is
 # lesser tested.  For now we encourage users to pass a simple list of string.
-FeatureNames = Optional[Sequence[str]]
-FeatureTypes = Optional[Sequence[str]]
+FeatureInfo = Sequence[str]
+FeatureNames = FeatureInfo
+FeatureTypes = FeatureInfo
+BoosterParam = Union[List, Dict]  # better be sequence

 ArrayLike = Any
 PathLike = Union[str, os.PathLike]
 CupyT = ArrayLike  # maybe need a stub for cupy arrays
 NumpyOrCupy = Any
+NumpyDType = Union[str, Type[np.number]]
+PandasDType = Any  # real type is pandas.core.dtypes.base.ExtensionDtype
+
+FloatCompatible = Union[float, np.float32, np.float64]
+
+# callables
+FPreProcCallable = Callable

 # ctypes
 # c_bst_ulong corresponds to bst_ulong defined in xgboost/c_api.h
@@ -59,3 +70,4 @@ CNumericPtr = ctypes.pointer

 # template parameter
 _T = TypeVar("_T")
+_F = TypeVar("_F", bound=Callable[..., Any])
--- a/python-package/xgboost/callback.py
+++ b/python-package/xgboost/callback.py
@@ -10,8 +10,7 @@ from abc import ABC
 import collections
 import os
 import pickle
-from typing import Callable, List, Optional, Union, Dict, Tuple, TypeVar, cast
-from typing import Sequence
+from typing import Callable, List, Optional, Union, Dict, Tuple, TypeVar, cast, Sequence, Any
 import numpy

 from . import rabit
@@ -24,11 +23,14 @@ __all__ = [
    "EarlyStopping",
    "EvaluationMonitor",
    "TrainingCheckPoint",
+    "CallbackContainer"
 ]

 _Score = Union[float, Tuple[float, float]]
 _ScoreList = Union[List[float], List[Tuple[float, float]]]

+_Model = Any  # real type is Union[Booster, CVPack]; need more work
+

 # pylint: disable=unused-argument
 class TrainingCallback(ABC):
@@ -43,19 +45,19 @@ class TrainingCallback(ABC):
    def __init__(self) -> None:
        pass

-    def before_training(self, model):
+    def before_training(self, model: _Model) -> _Model:
        '''Run before training starts.'''
        return model

-    def after_training(self, model):
+    def after_training(self, model: _Model) -> _Model:
        '''Run after training is finished.'''
        return model

-    def before_iteration(self, model, epoch: int, evals_log: EvalsLog) -> bool:
+    def before_iteration(self, model: _Model, epoch: int, evals_log: EvalsLog) -> bool:
        '''Run before each iteration.  Return True when training should stop.'''
        return False

-    def after_iteration(self, model, epoch: int, evals_log: EvalsLog) -> bool:
+    def after_iteration(self, model: _Model, epoch: int, evals_log: EvalsLog) -> bool:
        '''Run after each iteration.  Return True when training should stop.'''
        return False

@@ -140,7 +142,7 @@ class CallbackContainer:
        if self.is_cv:
            self.aggregated_cv = None

-    def before_training(self, model):
+    def before_training(self, model: _Model) -> _Model:
        '''Function called before training.'''
        for c in self.callbacks:
            model = c.before_training(model=model)
@@ -151,7 +153,7 @@ class CallbackContainer:
                assert isinstance(model, Booster), msg
        return model

-    def after_training(self, model):
+    def after_training(self, model: _Model) -> _Model:
        '''Function called after training.'''
        for c in self.callbacks:
            model = c.after_training(model=model)
@@ -182,7 +184,7 @@ class CallbackContainer:
        return model

    def before_iteration(
-        self, model, epoch: int, dtrain: DMatrix, evals: List[Tuple[DMatrix, str]]
+        self, model: _Model, epoch: int, dtrain: DMatrix, evals: Optional[List[Tuple[DMatrix, str]]]
    ) -> bool:
        '''Function called before training iteration.'''
        return any(c.before_iteration(model, epoch, self.history)
@@ -220,7 +222,7 @@ class CallbackContainer:

    def after_iteration(
        self,
-        model,
+        model: _Model,
        epoch: int,
        dtrain: DMatrix,
        evals: Optional[List[Tuple[DMatrix, str]]],
@@ -276,7 +278,7 @@ class LearningRateScheduler(TrainingCallback):
        super().__init__()

    def after_iteration(
-        self, model, epoch: int, evals_log: TrainingCallback.EvalsLog
+        self, model: _Model, epoch: int, evals_log: TrainingCallback.EvalsLog
    ) -> bool:
        model.set_param("learning_rate", self.learning_rates(epoch))
        return False
@@ -344,12 +346,12 @@ class EarlyStopping(TrainingCallback):
        self.starting_round: int = 0
        super().__init__()

-    def before_training(self, model):
+    def before_training(self, model: _Model) -> _Model:
        self.starting_round = model.num_boosted_rounds()
        return model

    def _update_rounds(
-        self, score: _Score, name: str, metric: str, model, epoch: int
+        self, score: _Score, name: str, metric: str, model: _Model, epoch: int
    ) -> bool:
        def get_s(x: _Score) -> float:
            """get score if it's cross validation history."""
@@ -403,7 +405,7 @@ class EarlyStopping(TrainingCallback):
            return True
        return False

-    def after_iteration(self, model, epoch: int,
+    def after_iteration(self, model: _Model, epoch: int,
                        evals_log: TrainingCallback.EvalsLog) -> bool:
        epoch += self.starting_round  # training continuation
        msg = 'Must have at least 1 validation dataset for early stopping.'
@@ -431,7 +433,7 @@ class EarlyStopping(TrainingCallback):
        score = data_log[metric_name][-1]
        return self._update_rounds(score, data_name, metric_name, model, epoch)

-    def after_training(self, model):
+    def after_training(self, model: _Model) -> _Model:
        try:
            if self.save_best:
                model = model[: int(model.attr("best_iteration")) + 1]
@@ -477,7 +479,7 @@ class EvaluationMonitor(TrainingCallback):
            msg = f"\t{data + '-' + metric}:{score:.5f}"
        return msg

-    def after_iteration(self, model, epoch: int,
+    def after_iteration(self, model: _Model, epoch: int,
                        evals_log: TrainingCallback.EvalsLog) -> bool:
        if not evals_log:
            return False
@@ -503,7 +505,7 @@ class EvaluationMonitor(TrainingCallback):
                self._latest = msg
        return False

-    def after_training(self, model):
+    def after_training(self, model: _Model) -> _Model:
        if rabit.get_rank() == self.printer_rank and self._latest is not None:
            rabit.tracker_print(self._latest)
        return model
@@ -544,7 +546,7 @@ class TrainingCheckPoint(TrainingCallback):
        self._epoch = 0
        super().__init__()

-    def after_iteration(self, model, epoch: int,
+    def after_iteration(self, model: _Model, epoch: int,
                        evals_log: TrainingCallback.EvalsLog) -> bool:
        if self._epoch == self._iterations:
            path = os.path.join(self._path, self._name + '_' + str(epoch) +
--- a/python-package/xgboost/compat.py
+++ b/python-package/xgboost/compat.py
@@ -1,30 +1,32 @@
 # coding: utf-8
 # pylint: disable= invalid-name,  unused-import
 """For compatibility and optional dependencies."""
-from typing import Any
+from typing import Any, Type, Dict, Optional, List
 import sys
 import types
 import importlib.util
 import logging
 import numpy as np

+from xgboost._typing import CStrPtr
+
 assert (sys.version_info[0] == 3), 'Python 2 is no longer supported.'


-def py_str(x):
+def py_str(x: CStrPtr) -> str:
    """convert c string back to python string"""
-    return x.decode('utf-8')
+    return x.decode('utf-8')  # type: ignore


-def lazy_isinstance(instance, module, name):
+def lazy_isinstance(instance: Type[object], module: str, name: str) -> bool:
    """Use string representation to identify a type."""

    # Notice, we use .__class__ as opposed to type() in order
    # to support object proxies such as weakref.proxy
    cls = instance.__class__
-    module = cls.__module__ == module
-    name = cls.__name__ == name
-    return module and name
+    is_same_module = cls.__module__ == module
+    has_same_name = cls.__name__ == name
+    return is_same_module and has_same_name


 # pandas
@@ -37,53 +39,33 @@ try:
 except ImportError:

    MultiIndex = object
-    DataFrame: Any = object
+    DataFrame = object
    Series = object
    pandas_concat = None
    PANDAS_INSTALLED = False

 # sklearn
 try:
-    from sklearn.base import BaseEstimator
-    from sklearn.base import RegressorMixin, ClassifierMixin
+    from sklearn.base import (
+         BaseEstimator as XGBModelBase,
+         RegressorMixin as XGBRegressorBase,
+         ClassifierMixin as XGBClassifierBase
+    )
    from sklearn.preprocessing import LabelEncoder

    try:
-        from sklearn.model_selection import KFold, StratifiedKFold
+        from sklearn.model_selection import (
+            KFold as XGBKFold,
+            StratifiedKFold as XGBStratifiedKFold
+        )
    except ImportError:
-        from sklearn.cross_validation import KFold, StratifiedKFold
+        from sklearn.cross_validation import (
+            KFold as XGBKFold,
+            StratifiedKFold as XGBStratifiedKFold
+        )

    SKLEARN_INSTALLED = True

-    XGBModelBase = BaseEstimator
-    XGBRegressorBase = RegressorMixin
-    XGBClassifierBase = ClassifierMixin
-
-    XGBKFold = KFold
-    XGBStratifiedKFold = StratifiedKFold
-
-    class XGBoostLabelEncoder(LabelEncoder):
-        '''Label encoder with JSON serialization methods.'''
-        def to_json(self):
-            '''Returns a JSON compatible dictionary'''
-            meta = {}
-            for k, v in self.__dict__.items():
-                if isinstance(v, np.ndarray):
-                    meta[k] = v.tolist()
-                else:
-                    meta[k] = v
-            return meta
-
-        def from_json(self, doc):
-            # pylint: disable=attribute-defined-outside-init
-            '''Load the encoder back from a JSON compatible dict.'''
-            meta = {}
-            for k, v in doc.items():
-                if k == 'classes_':
-                    self.classes_ = np.array(v)
-                    continue
-                meta[k] = v
-            self.__dict__.update(meta)
 except ImportError:
    SKLEARN_INSTALLED = False

@@ -91,10 +73,34 @@ except ImportError:
    XGBModelBase = object
    XGBClassifierBase = object
    XGBRegressorBase = object
+    LabelEncoder = object

    XGBKFold = None
    XGBStratifiedKFold = None
-    XGBoostLabelEncoder = None
+
+
+class XGBoostLabelEncoder(LabelEncoder):
+    '''Label encoder with JSON serialization methods.'''
+    def to_json(self) -> Dict:
+        '''Returns a JSON compatible dictionary'''
+        meta = {}
+        for k, v in self.__dict__.items():
+            if isinstance(v, np.ndarray):
+                meta[k] = v.tolist()
+            else:
+                meta[k] = v
+        return meta
+
+    def from_json(self, doc: Dict) -> None:
+        # pylint: disable=attribute-defined-outside-init
+        '''Load the encoder back from a JSON compatible dict.'''
+        meta = {}
+        for k, v in doc.items():
+            if k == 'classes_':
+                self.classes_ = np.array(v)
+                continue
+            meta[k] = v
+        self.__dict__.update(meta)


 # dask
@@ -113,7 +119,7 @@ try:
    SCIPY_INSTALLED = True
 except ImportError:
    scipy_sparse = False
-    scipy_csr: Any = object
+    scipy_csr = object
    SCIPY_INSTALLED = False


@@ -136,15 +142,21 @@ class LazyLoader(types.ModuleType):
    """Lazily import a module, mainly to avoid pulling in large dependencies.
    """

-    def __init__(self, local_name, parent_module_globals, name, warning=None):
+    def __init__(
+         self,
+         local_name: str,
+         parent_module_globals: Dict,
+         name: str,
+         warning: Optional[str] = None
+    ) -> None:
        self._local_name = local_name
        self._parent_module_globals = parent_module_globals
        self._warning = warning
-        self.module = None
+        self.module: Optional[types.ModuleType] = None

        super().__init__(name)

-    def _load(self):
+    def _load(self) -> types.ModuleType:
        """Load the module and insert it into the parent's globals."""
        # Import the target module and insert it into the parent's namespace
        module = importlib.import_module(self.__name__)
@@ -163,12 +175,12 @@ class LazyLoader(types.ModuleType):

        return module

-    def __getattr__(self, item):
+    def __getattr__(self, item: str) -> Any:
        if not self.module:
            self.module = self._load()
        return getattr(self.module, item)

-    def __dir__(self):
+    def __dir__(self) -> List[str]:
        if not self.module:
            self.module = self._load()
        return dir(self.module)
--- a/python-package/xgboost/config.py
+++ b/python-package/xgboost/config.py
@@ -4,12 +4,20 @@ import ctypes
 import json
 from contextlib import contextmanager
 from functools import wraps
+from typing import Optional, Callable, Any, Dict, cast, Iterator

 from .core import _LIB, _check_call, c_str, py_str
+from ._typing import _F


-def config_doc(*, header=None, extra_note=None, parameters=None, returns=None,
-               see_also=None):
+def config_doc(
+    *,
+    header: Optional[str] = None,
+    extra_note: Optional[str] = None,
+    parameters: Optional[str] = None,
+    returns: Optional[str] = None,
+    see_also: Optional[str] = None
+) -> Callable[[_F], _F]:
    """Decorator to format docstring for config functions.

    Parameters
@@ -64,19 +72,19 @@ def config_doc(*, header=None, extra_note=None, parameters=None, returns=None,
        assert xgb.get_config()['verbosity'] == 2  # old value restored
    """

-    def none_to_str(value):
+    def none_to_str(value: Optional[str]) -> str:
        return '' if value is None else value

-    def config_doc_decorator(func):
+    def config_doc_decorator(func: _F) -> _F:
        func.__doc__ = (doc_template.format(header=none_to_str(header),
                                            extra_note=none_to_str(extra_note))
                        + none_to_str(parameters) + none_to_str(returns)
                        + none_to_str(common_example) + none_to_str(see_also))

        @wraps(func)
-        def wrap(*args, **kwargs):
+        def wrap(*args: Any, **kwargs: Any) -> Any:
            return func(*args, **kwargs)
-        return wrap
+        return cast(_F, wrap)
    return config_doc_decorator


@@ -89,7 +97,7 @@ def config_doc(*, header=None, extra_note=None, parameters=None, returns=None,
    new_config: Dict[str, Any]
        Keyword arguments representing the parameters and their values
            """)
-def set_config(**new_config):
+def set_config(**new_config: Any) -> None:
    config = json.dumps(new_config)
    _check_call(_LIB.XGBSetGlobalConfig(c_str(config)))

@@ -103,7 +111,7 @@ def set_config(**new_config):
    args: Dict[str, Any]
        The list of global parameters and their values
            """)
-def get_config():
+def get_config() -> Dict[str, Any]:
    config_str = ctypes.c_char_p()
    _check_call(_LIB.XGBGetGlobalConfig(ctypes.byref(config_str)))
    config = json.loads(py_str(config_str.value))
@@ -132,7 +140,7 @@ def get_config():
    set_config: Set global XGBoost configuration
    get_config: Get current values of the global configuration
            """)
-def config_context(**new_config):
+def config_context(**new_config: Any) -> Iterator[None]:
    old_config = get_config().copy()
    set_config(**new_config)

--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@@ -30,10 +30,12 @@ from ._typing import (
    ArrayLike,
    CFloatPtr,
    NumpyOrCupy,
-    FeatureNames,
+    FeatureInfo,
    FeatureTypes,
+    FeatureNames,
    _T,
    CupyT,
+    BoosterParam
 )


@@ -273,7 +275,7 @@ def ctypes2numpy(cptr: CNumericPtr, length: int, dtype: Type[np.number]) -> np.n
    if not isinstance(cptr, ctypes.POINTER(ctype)):
        raise RuntimeError(f"expected {ctype} pointer")
    res = np.zeros(length, dtype=dtype)
-    if not ctypes.memmove(res.ctypes.data, cptr, length * res.strides[0]):
+    if not ctypes.memmove(res.ctypes.data, cptr, length * res.strides[0]):  # type: ignore
        raise RuntimeError("memmove failed")
    return res

@@ -310,7 +312,7 @@ def ctypes2buffer(cptr: CStrPtr, length: int) -> bytearray:
        raise RuntimeError('expected char pointer')
    res = bytearray(length)
    rptr = (ctypes.c_char * length).from_buffer(res)
-    if not ctypes.memmove(rptr, cptr, length):
+    if not ctypes.memmove(rptr, cptr, length):  # type: ignore
        raise RuntimeError('memmove failed')
    return res

@@ -434,8 +436,8 @@ class DataIter(ABC):  # pylint: disable=too-many-instance-attributes
        def data_handle(
            data: Any,
            *,
-            feature_names: FeatureNames = None,
-            feature_types: Optional[List[str]] = None,
+            feature_names: Optional[FeatureNames] = None,
+            feature_types: Optional[FeatureTypes] = None,
            **kwargs: Any,
        ) -> None:
            from .data import dispatch_proxy_set_data
@@ -555,8 +557,8 @@ class DMatrix:  # pylint: disable=too-many-instance-attributes
        base_margin: Optional[ArrayLike] = None,
        missing: Optional[float] = None,
        silent: bool = False,
-        feature_names: FeatureNames = None,
-        feature_types: FeatureTypes = None,
+        feature_names: Optional[FeatureNames] = None,
+        feature_types: Optional[FeatureTypes] = None,
        nthread: Optional[int] = None,
        group: Optional[ArrayLike] = None,
        qid: Optional[ArrayLike] = None,
@@ -718,8 +720,8 @@ class DMatrix:  # pylint: disable=too-many-instance-attributes
        qid: Optional[ArrayLike] = None,
        label_lower_bound: Optional[ArrayLike] = None,
        label_upper_bound: Optional[ArrayLike] = None,
-        feature_names: FeatureNames = None,
-        feature_types: Optional[List[str]] = None,
+        feature_names: Optional[FeatureNames] = None,
+        feature_types: Optional[FeatureTypes] = None,
        feature_weights: Optional[ArrayLike] = None
    ) -> None:
        """Set meta info for DMatrix.  See doc string for :py:obj:`xgboost.DMatrix`."""
@@ -1000,7 +1002,7 @@ class DMatrix:  # pylint: disable=too-many-instance-attributes
        return res

    @property
-    def feature_names(self) -> Optional[List[str]]:
+    def feature_names(self) -> Optional[FeatureNames]:
        """Get feature names (column labels).

        Returns
@@ -1023,7 +1025,7 @@ class DMatrix:  # pylint: disable=too-many-instance-attributes
        return feature_names

    @feature_names.setter
-    def feature_names(self, feature_names: FeatureNames) -> None:
+    def feature_names(self, feature_names: Optional[FeatureNames]) -> None:
        """Set feature names (column labels).

        Parameters
@@ -1039,7 +1041,7 @@ class DMatrix:  # pylint: disable=too-many-instance-attributes
                else:
                    feature_names = [feature_names]
            except TypeError:
-                feature_names = [feature_names]
+                feature_names = [cast(str, feature_names)]

            if len(feature_names) != len(set(feature_names)):
                raise ValueError('feature_names must be unique')
@@ -1069,8 +1071,13 @@ class DMatrix:  # pylint: disable=too-many-instance-attributes
            self.feature_types = None

    @property
-    def feature_types(self) -> Optional[List[str]]:
-        """Get feature types. See :py:class:`DMatrix` for details."""
+    def feature_types(self) -> Optional[FeatureTypes]:
+        """Get feature types (column types).
+
+        Returns
+        -------
+        feature_types : list or None
+        """
        length = c_bst_ulong()
        sarr = ctypes.POINTER(ctypes.c_char_p)()
        _check_call(_LIB.XGDMatrixGetStrFeatureInfo(self.handle,
@@ -1111,7 +1118,7 @@ class DMatrix:  # pylint: disable=too-many-instance-attributes
                else:
                    feature_types = [feature_types]
            except TypeError:
-                feature_types = [feature_types]
+                feature_types = [cast(str, feature_types)]
            feature_types_bytes = [bytes(f, encoding='utf-8')
                               for f in feature_types]
            c_feature_types = (ctypes.c_char_p *
@@ -1203,8 +1210,8 @@ class DeviceQuantileDMatrix(DMatrix):
        base_margin: Optional[ArrayLike] = None,
        missing: Optional[float] = None,
        silent: bool = False,
-        feature_names: FeatureNames = None,
-        feature_types: Optional[List[str]] = None,
+        feature_names: Optional[FeatureNames] = None,
+        feature_types: Optional[FeatureTypes] = None,
        nthread: Optional[int] = None,
        max_bin: int = 256,
        group: Optional[ArrayLike] = None,
@@ -1323,7 +1330,7 @@ def _get_booster_layer_trees(model: "Booster") -> Tuple[int, int]:
    return num_parallel_tree, num_groups


-def _configure_metrics(params: Union[Dict, List]) -> Union[Dict, List]:
+def _configure_metrics(params: BoosterParam) -> BoosterParam:
    if (
        isinstance(params, dict)
        and "eval_metric" in params
@@ -1349,7 +1356,7 @@ class Booster:

    def __init__(
        self,
-        params: Optional[Dict] = None,
+        params: Optional[BoosterParam] = None,
        cache: Optional[Sequence[DMatrix]] = None,
        model_file: Optional[Union["Booster", bytearray, os.PathLike, str]] = None
    ) -> None:
@@ -1444,7 +1451,7 @@ class Booster:
                "Constrained features are not a subset of training data feature names"
            ) from e

-    def _configure_constraints(self, params: Union[List, Dict]) -> Union[List, Dict]:
+    def _configure_constraints(self, params: BoosterParam) -> BoosterParam:
        if isinstance(params, dict):
            value = params.get("monotone_constraints")
            if value is not None:
@@ -1607,7 +1614,7 @@ class Booster:
            return py_str(ret.value)
        return None

-    def attributes(self) -> Dict[str, str]:
+    def attributes(self) -> Dict[str, Optional[str]]:
        """Get attributes stored in the Booster as a dictionary.

        Returns
@@ -1639,7 +1646,7 @@ class Booster:
            _check_call(_LIB.XGBoosterSetAttr(
                self.handle, c_str(key), value))

-    def _get_feature_info(self, field: str) -> Optional[List[str]]:
+    def _get_feature_info(self, field: str) -> Optional[FeatureInfo]:
        length = c_bst_ulong()
        sarr = ctypes.POINTER(ctypes.c_char_p)()
        if not hasattr(self, "handle") or self.handle is None:
@@ -1652,7 +1659,7 @@ class Booster:
        feature_info = from_cstr_to_pystr(sarr, length)
        return feature_info if feature_info else None

-    def _set_feature_info(self, features: Optional[Sequence[str]], field: str) -> None:
+    def _set_feature_info(self, features: Optional[FeatureInfo], field: str) -> None:
        if features is not None:
            assert isinstance(features, list)
            feature_info_bytes = [bytes(f, encoding="utf-8") for f in features]
@@ -1670,7 +1677,7 @@ class Booster:
            )

    @property
-    def feature_types(self) -> Optional[List[str]]:
+    def feature_types(self) -> Optional[FeatureTypes]:
        """Feature types for this booster.  Can be directly set by input data or by
        assignment.  See :py:class:`DMatrix` for details.

@@ -1678,11 +1685,11 @@ class Booster:
        return self._get_feature_info("feature_type")

    @feature_types.setter
-    def feature_types(self, features: Optional[List[str]]) -> None:
+    def feature_types(self, features: Optional[FeatureTypes]) -> None:
        self._set_feature_info(features, "feature_type")

    @property
-    def feature_names(self) -> Optional[List[str]]:
+    def feature_names(self) -> Optional[FeatureNames]:
        """Feature names for this booster.  Can be directly set by input data or by
        assignment.

@@ -1690,7 +1697,7 @@ class Booster:
        return self._get_feature_info("feature_name")

    @feature_names.setter
-    def feature_names(self, features: FeatureNames) -> None:
+    def feature_names(self, features: Optional[FeatureNames]) -> None:
        self._set_feature_info(features, "feature_name")

    def set_param(
@@ -1711,7 +1718,7 @@ class Booster:
            params = params.items()
        elif isinstance(params, str) and value is not None:
            params = [(params, value)]
-        for key, val in params:
+        for key, val in cast(Iterable[Tuple[str, str]], params):
            if val is not None:
                _check_call(_LIB.XGBoosterSetParam(self.handle, c_str(key),
                                                   c_str(str(val))))
@@ -2564,8 +2571,10 @@ class Booster:
            )
        # Booster can't accept data with different feature names
        if self.feature_names != data.feature_names:
-            dat_missing = set(self.feature_names) - set(data.feature_names)
-            my_missing = set(data.feature_names) - set(self.feature_names)
+            dat_missing = set(cast(FeatureNames, self.feature_names)) - \
+                          set(cast(FeatureNames, data.feature_names))
+            my_missing = set(cast(FeatureNames, data.feature_names)) - \
+                         set(cast(FeatureNames, self.feature_names))

            msg = 'feature_names mismatch: {0} {1}'

--- a/python-package/xgboost/dask.py
+++ b/python-package/xgboost/dask.py
@@ -318,7 +318,7 @@ class DaskDMatrix:
        base_margin: Optional[_DaskCollection] = None,
        missing: float = None,
        silent: bool = False,  # pylint: disable=unused-argument
-        feature_names: FeatureNames = None,
+        feature_names: Optional[FeatureNames] = None,
        feature_types: FeatureTypes = None,
        group: Optional[_DaskCollection] = None,
        qid: Optional[_DaskCollection] = None,
@@ -594,7 +594,7 @@ class DaskPartitionIter(DataIter):  # pylint: disable=R0902
        qid: Optional[List[Any]] = None,
        label_lower_bound: Optional[List[Any]] = None,
        label_upper_bound: Optional[List[Any]] = None,
-        feature_names: FeatureNames = None,
+        feature_names: Optional[FeatureNames] = None,
        feature_types: Optional[Union[Any, List[Any]]] = None,
    ) -> None:
        self._data = data
@@ -637,7 +637,7 @@ class DaskPartitionIter(DataIter):  # pylint: disable=R0902
        if self._iter == len(self._data):
            # Return 0 when there's no more batch.
            return 0
-        feature_names: FeatureNames = None
+        feature_names: Optional[FeatureNames] = None
        if self._feature_names:
            feature_names = self._feature_names
        else:
@@ -688,7 +688,7 @@ class DaskDeviceQuantileDMatrix(DaskDMatrix):
        base_margin: Optional[_DaskCollection] = None,
        missing: float = None,
        silent: bool = False,  # disable=unused-argument
-        feature_names: FeatureNames = None,
+        feature_names: Optional[FeatureNames] = None,
        feature_types: Optional[Union[Any, List[Any]]] = None,
        max_bin: int = 256,
        group: Optional[_DaskCollection] = None,
@@ -725,7 +725,7 @@ class DaskDeviceQuantileDMatrix(DaskDMatrix):


 def _create_device_quantile_dmatrix(
-    feature_names: FeatureNames,
+    feature_names: Optional[FeatureNames],
    feature_types: Optional[Union[Any, List[Any]]],
    feature_weights: Optional[Any],
    missing: float,
@@ -766,7 +766,7 @@ def _create_device_quantile_dmatrix(


 def _create_dmatrix(
-    feature_names: FeatureNames,
+    feature_names: Optional[FeatureNames],
    feature_types: Optional[Union[Any, List[Any]]],
    feature_weights: Optional[Any],
    missing: float,
--- a/python-package/xgboost/data.py
+++ b/python-package/xgboost/data.py
@@ -5,17 +5,26 @@ import ctypes
 import json
 import warnings
 import os
-from typing import Any, Tuple, Callable, Optional, List, Union, Iterator, Type
+from typing import Any, Tuple, Callable, Optional, List, Union, Iterator, Sequence, cast

 import numpy as np

 from .core import c_array, _LIB, _check_call, c_str
 from .core import _cuda_array_interface
-from .core import DataIter, _ProxyDMatrix, DMatrix, FeatureNames
-from ._typing import FeatureTypes
+from .core import DataIter, _ProxyDMatrix, DMatrix
 from .compat import lazy_isinstance, DataFrame
+from ._typing import (
+    c_bst_ulong,
+    DataType,
+    FeatureTypes,
+    FeatureNames,
+    NumpyDType,
+    CupyT,
+    FloatCompatible, PandasDType
+)

-c_bst_ulong = ctypes.c_uint64   # pylint: disable=invalid-name
+DispatchedDataBackendReturnType = Tuple[
+    ctypes.c_void_p, Optional[FeatureNames], Optional[FeatureTypes]]

 CAT_T = "c"

@@ -23,14 +32,14 @@ CAT_T = "c"
 _matrix_meta = {"base_margin", "label"}


-def _warn_unused_missing(data, missing):
+def _warn_unused_missing(data: DataType, missing: Optional[FloatCompatible]) -> None:
    if (missing is not None) and (not np.isnan(missing)):
        warnings.warn(
            '`missing` is not used for current input data type:' +
            str(type(data)), UserWarning)


-def _check_complex(data):
+def _check_complex(data: DataType) -> None:
    '''Test whether data is complex using `dtype` attribute.'''
    complex_dtypes = (np.complex128, np.complex64,
                      np.cfloat, np.cdouble, np.clongdouble)
@@ -38,16 +47,15 @@ def _check_complex(data):
        raise ValueError('Complex data not supported')


-def _check_data_shape(data: Any) -> None:
+def _check_data_shape(data: DataType) -> None:
    if hasattr(data, "shape") and len(data.shape) != 2:
        raise ValueError("Please reshape the input data into 2-dimensional matrix.")


-def _is_scipy_csr(data):
+def _is_scipy_csr(data: DataType) -> bool:
    try:
-        import scipy
+        import scipy.sparse
    except ImportError:
-        scipy = None
        return False
    return isinstance(data, scipy.sparse.csr_matrix)

@@ -64,12 +72,12 @@ def _array_interface(data: np.ndarray) -> bytes:


 def _from_scipy_csr(
-    data,
-    missing,
-    nthread,
-    feature_names: FeatureNames,
-    feature_types: FeatureTypes,
-):
+    data: DataType,
+    missing: FloatCompatible,
+    nthread: int,
+    feature_names: Optional[FeatureNames],
+    feature_types: Optional[FeatureTypes],
+) -> DispatchedDataBackendReturnType:
    """Initialize data from a CSR matrix."""
    if len(data.indices) != len(data.data):
        raise ValueError(
@@ -94,21 +102,20 @@ def _from_scipy_csr(
    return handle, feature_names, feature_types


-def _is_scipy_csc(data):
+def _is_scipy_csc(data: DataType) -> bool:
    try:
-        import scipy
+        import scipy.sparse
    except ImportError:
-        scipy = None
        return False
    return isinstance(data, scipy.sparse.csc_matrix)


 def _from_scipy_csc(
-    data,
-    missing,
-    feature_names: FeatureNames,
-    feature_types: FeatureTypes,
-):
+    data: DataType,
+    missing: Optional[FloatCompatible],
+    feature_names: Optional[FeatureNames],
+    feature_types: Optional[FeatureTypes],
+) -> DispatchedDataBackendReturnType:
    if len(data.indices) != len(data.data):
        raise ValueError(f"length mismatch: {len(data.indices)} vs {len(data.data)}")
    _warn_unused_missing(data, missing)
@@ -124,27 +131,29 @@ def _from_scipy_csc(
    return handle, feature_names, feature_types


-def _is_scipy_coo(data):
+def _is_scipy_coo(data: DataType) -> bool:
    try:
-        import scipy
+        import scipy.sparse
    except ImportError:
-        scipy = None
        return False
    return isinstance(data, scipy.sparse.coo_matrix)


-def _is_numpy_array(data):
+def _is_numpy_array(data: DataType) -> bool:
    return isinstance(data, (np.ndarray, np.matrix))


-def _ensure_np_dtype(data, dtype) -> Tuple[np.ndarray, np.dtype]:
+def _ensure_np_dtype(
+    data: DataType,
+    dtype: Optional[NumpyDType]
+) -> Tuple[np.ndarray, Optional[NumpyDType]]:
    if data.dtype.hasobject or data.dtype in [np.float16, np.bool_]:
        data = data.astype(np.float32, copy=False)
        dtype = np.float32
    return data, dtype


-def _maybe_np_slice(data: np.ndarray, dtype) -> np.ndarray:
+def _maybe_np_slice(data: DataType, dtype: Optional[NumpyDType]) -> np.ndarray:
    '''Handle numpy slice.  This can be removed if we use __array_interface__.
    '''
    try:
@@ -159,12 +168,12 @@ def _maybe_np_slice(data: np.ndarray, dtype) -> np.ndarray:


 def _from_numpy_array(
-    data,
-    missing,
-    nthread,
-    feature_names: FeatureNames,
-    feature_types: FeatureTypes,
-):
+    data: DataType,
+    missing: FloatCompatible,
+    nthread: int,
+    feature_names: Optional[FeatureNames],
+    feature_types: Optional[FeatureTypes],
+) -> DispatchedDataBackendReturnType:
    """Initialize data from a 2-D numpy matrix.

    """
@@ -189,7 +198,7 @@ def _from_numpy_array(
    return handle, feature_names, feature_types


-def _is_pandas_df(data):
+def _is_pandas_df(data: DataType) -> bool:
    try:
        import pandas as pd
    except ImportError:
@@ -197,7 +206,7 @@ def _is_pandas_df(data):
    return isinstance(data, pd.DataFrame)


-def _is_modin_df(data):
+def _is_modin_df(data: DataType) -> bool:
    try:
        import modin.pandas as pd
    except ImportError:
@@ -232,7 +241,7 @@ _ENABLE_CAT_ERR = (
 )


-def _invalid_dataframe_dtype(data: Any) -> None:
+def _invalid_dataframe_dtype(data: DataType) -> None:
    # pandas series has `dtypes` but it's just a single object
    # cudf series doesn't have `dtypes`.
    if hasattr(data, "dtypes") and hasattr(data.dtypes, "__iter__"):
@@ -253,10 +262,10 @@ def _invalid_dataframe_dtype(data: Any) -> None:
 def _pandas_feature_info(
    data: DataFrame,
    meta: Optional[str],
-    feature_names: FeatureNames,
-    feature_types: FeatureTypes,
+    feature_names: Optional[FeatureNames],
+    feature_types: Optional[FeatureTypes],
    enable_categorical: bool,
-) -> Tuple[FeatureNames, FeatureTypes]:
+) -> Tuple[Optional[FeatureNames], Optional[FeatureTypes]]:
    import pandas as pd
    from pandas.api.types import (
        is_sparse,
@@ -285,13 +294,13 @@ def _pandas_feature_info(
    return feature_names, feature_types


-def is_nullable_dtype(dtype: Any) -> bool:
+def is_nullable_dtype(dtype: PandasDType) -> bool:
    """Wether dtype is a pandas nullable type."""
    from pandas.api.types import is_integer_dtype, is_bool_dtype
    # dtype: pd.core.arrays.numeric.NumericDtype
    nullable_alias = {"Int16", "Int32", "Int64"}
    is_int = is_integer_dtype(dtype) and dtype.name in nullable_alias
-    # np.bool has alias `bool`, while pd.BooleanDtype has `boolean`.
+    # np.bool has alias `bool`, while pd.BooleanDtype has `bzoolean`.
    is_bool = is_bool_dtype(dtype) and dtype.name == "boolean"
    return is_int or is_bool

@@ -331,11 +340,11 @@ def _pandas_cat_null(data: DataFrame) -> DataFrame:
 def _transform_pandas_df(
    data: DataFrame,
    enable_categorical: bool,
-    feature_names: FeatureNames = None,
-    feature_types: FeatureTypes = None,
+    feature_names: Optional[FeatureNames] = None,
+    feature_types: Optional[FeatureTypes] = None,
    meta: Optional[str] = None,
-    meta_type: Optional[str] = None,
-) -> Tuple[np.ndarray, FeatureNames, FeatureTypes]:
+    meta_type: Optional[NumpyDType] = None,
+) -> Tuple[np.ndarray, Optional[FeatureNames], Optional[FeatureTypes]]:
    from pandas.api.types import (
        is_sparse,
        is_categorical_dtype,
@@ -359,7 +368,7 @@ def _transform_pandas_df(
    if meta and len(data.columns) > 1 and meta not in _matrix_meta:
        raise ValueError(f"DataFrame for {meta} cannot have multiple columns")

-    dtype: Union[Type[np.floating], str] = meta_type if meta_type else np.float32
+    dtype = meta_type if meta_type else np.float32
    arr: np.ndarray = transformed.values
    if meta_type:
        arr = arr.astype(dtype)
@@ -369,18 +378,18 @@ def _transform_pandas_df(
 def _from_pandas_df(
    data: DataFrame,
    enable_categorical: bool,
-    missing: float,
+    missing: FloatCompatible,
    nthread: int,
-    feature_names: FeatureNames,
-    feature_types: FeatureTypes,
-) -> Tuple[ctypes.c_void_p, FeatureNames, FeatureTypes]:
+    feature_names: Optional[FeatureNames],
+    feature_types: Optional[FeatureTypes],
+) -> DispatchedDataBackendReturnType:
    data, feature_names, feature_types = _transform_pandas_df(
        data, enable_categorical, feature_names, feature_types
    )
    return _from_numpy_array(data, missing, nthread, feature_names, feature_types)


-def _is_pandas_series(data):
+def _is_pandas_series(data: DataType) -> bool:
    try:
        import pandas as pd
    except ImportError:
@@ -389,18 +398,21 @@ def _is_pandas_series(data):


 def _meta_from_pandas_series(
-    data, name: str, dtype: Optional[str], handle: ctypes.c_void_p
+    data: DataType,
+    name: str,
+    dtype: Optional[NumpyDType],
+    handle: ctypes.c_void_p
 ) -> None:
    """Help transform pandas series for meta data like labels"""
    data = data.values.astype('float')
    from pandas.api.types import is_sparse
    if is_sparse(data):
-        data = data.to_dense()
+        data = data.to_dense()  # type: ignore
    assert len(data.shape) == 1 or data.shape[1] == 0 or data.shape[1] == 1
    _meta_from_numpy(data, name, dtype, handle)


-def _is_modin_series(data):
+def _is_modin_series(data: DataType) -> bool:
    try:
        import modin.pandas as pd
    except ImportError:
@@ -409,13 +421,13 @@ def _is_modin_series(data):


 def _from_pandas_series(
-    data,
-    missing: float,
+    data: DataType,
+    missing: FloatCompatible,
    nthread: int,
    enable_categorical: bool,
-    feature_names: FeatureNames,
-    feature_types: FeatureTypes,
-):
+    feature_names: Optional[FeatureNames],
+    feature_types: Optional[FeatureTypes],
+) -> DispatchedDataBackendReturnType:
    from pandas.api.types import is_categorical_dtype

    if (data.dtype.name not in _pandas_dtype_mapper) and not (
@@ -433,7 +445,7 @@ def _from_pandas_series(
    )


-def _is_dt_df(data):
+def _is_dt_df(data: DataType) -> bool:
    return lazy_isinstance(data, 'datatable', 'Frame') or \
        lazy_isinstance(data, 'datatable', 'DataTable')

@@ -443,12 +455,12 @@ _dt_type_mapper2 = {'bool': 'i', 'int': 'int', 'real': 'float'}


 def _transform_dt_df(
-    data,
-    feature_names: FeatureNames,
-    feature_types: FeatureTypes,
-    meta=None,
-    meta_type=None,
-):
+    data: DataType,
+    feature_names: Optional[FeatureNames],
+    feature_types: Optional[FeatureTypes],
+    meta: Optional[str] = None,
+    meta_type: Optional[NumpyDType] = None,
+) -> Tuple[np.ndarray, Optional[FeatureNames], Optional[FeatureTypes]]:
    """Validate feature names and types if data table"""
    if meta and data.shape[1] > 1:
        raise ValueError('DataTable for meta info cannot have multiple columns')
@@ -482,13 +494,13 @@ def _transform_dt_df(


 def _from_dt_df(
-    data,
-    missing,
-    nthread,
-    feature_names: FeatureNames,
-    feature_types: FeatureTypes,
+    data: DataType,
+    missing: Optional[FloatCompatible],
+    nthread: int,
+    feature_names: Optional[FeatureNames],
+    feature_types: Optional[FeatureTypes],
    enable_categorical: bool,
-) -> Tuple[ctypes.c_void_p, FeatureNames, FeatureTypes]:
+) -> DispatchedDataBackendReturnType:
    if enable_categorical:
        raise ValueError("categorical data in datatable is not supported yet.")
    data, feature_names, feature_types = _transform_dt_df(
@@ -525,7 +537,7 @@ def _from_dt_df(
    return handle, feature_names, feature_types


-def _is_arrow(data) -> bool:
+def _is_arrow(data: DataType) -> bool:
    try:
        import pyarrow as pa
        from pyarrow import dataset as arrow_dataset
@@ -571,13 +583,13 @@ def record_batch_data_iter(data_iter: Iterator) -> Callable:


 def _from_arrow(
-    data,
-    missing: float,
+    data: DataType,
+    missing: FloatCompatible,
    nthread: int,
-    feature_names: FeatureNames,
-    feature_types: FeatureTypes,
+    feature_names: Optional[FeatureNames],
+    feature_types: Optional[FeatureTypes],
    enable_categorical: bool,
-) -> Tuple[ctypes.c_void_p, FeatureNames, FeatureTypes]:
+) -> DispatchedDataBackendReturnType:
    import pyarrow as pa

    if not all(
@@ -605,11 +617,11 @@ def _from_arrow(
    return handle, feature_names, feature_types


-def _is_cudf_df(data) -> bool:
+def _is_cudf_df(data: DataType) -> bool:
    return lazy_isinstance(data, "cudf.core.dataframe", "DataFrame")


-def _cudf_array_interfaces(data, cat_codes: list) -> bytes:
+def _cudf_array_interfaces(data: DataType, cat_codes: list) -> bytes:
    """Extract CuDF __cuda_array_interface__.  This is special as it returns a new list of
    data and a list of array interfaces.  The data is list of categorical codes that
    caller can safely ignore, but have to keep their reference alive until usage of array
@@ -645,11 +657,11 @@ def _cudf_array_interfaces(data, cat_codes: list) -> bytes:


 def _transform_cudf_df(
-    data,
-    feature_names: FeatureNames,
-    feature_types: FeatureTypes,
+    data: DataType,
+    feature_names: Optional[FeatureNames],
+    feature_types: Optional[FeatureTypes],
    enable_categorical: bool,
-):
+) -> Tuple[ctypes.c_void_p, list, Optional[FeatureNames], Optional[FeatureTypes]]:
    try:
        from cudf.api.types import is_categorical_dtype
    except ImportError:
@@ -709,13 +721,13 @@ def _transform_cudf_df(


 def _from_cudf_df(
-    data,
-    missing,
-    nthread,
-    feature_names: FeatureNames,
-    feature_types: FeatureTypes,
+    data: DataType,
+    missing: FloatCompatible,
+    nthread: int,
+    feature_names: Optional[FeatureNames],
+    feature_types: Optional[FeatureTypes],
    enable_categorical: bool,
-) -> Tuple[ctypes.c_void_p, Any, Any]:
+) -> DispatchedDataBackendReturnType:
    data, cat_codes, feature_names, feature_types = _transform_cudf_df(
        data, feature_names, feature_types, enable_categorical
    )
@@ -732,7 +744,7 @@ def _from_cudf_df(
    return handle, feature_names, feature_types


-def _is_cudf_ser(data):
+def _is_cudf_ser(data: DataType) -> bool:
    try:
        import cudf
    except ImportError:
@@ -740,13 +752,13 @@ def _is_cudf_ser(data):
    return isinstance(data, cudf.Series)


-def _is_cupy_array(data: Any) -> bool:
+def _is_cupy_array(data: DataType) -> bool:
    return lazy_isinstance(data, "cupy.core.core", "ndarray") or lazy_isinstance(
        data, "cupy._core.core", "ndarray"
    )


-def _transform_cupy_array(data):
+def _transform_cupy_array(data: DataType) -> CupyT:
    import cupy  # pylint: disable=import-error
    if not hasattr(data, '__cuda_array_interface__') and hasattr(
            data, '__array__'):
@@ -757,12 +769,12 @@ def _transform_cupy_array(data):


 def _from_cupy_array(
-    data,
-    missing,
-    nthread,
-    feature_names: FeatureNames,
-    feature_types: FeatureTypes,
-):
+    data: DataType,
+    missing: FloatCompatible,
+    nthread: int,
+    feature_names: Optional[FeatureNames],
+    feature_types: Optional[FeatureTypes],
+) -> DispatchedDataBackendReturnType:
    """Initialize DMatrix from cupy ndarray."""
    data = _transform_cupy_array(data)
    interface_str = _cuda_array_interface(data)
@@ -776,7 +788,7 @@ def _from_cupy_array(
    return handle, feature_names, feature_types


-def _is_cupy_csr(data):
+def _is_cupy_csr(data: DataType) -> bool:
    try:
        import cupyx
    except ImportError:
@@ -784,7 +796,7 @@ def _is_cupy_csr(data):
    return isinstance(data, cupyx.scipy.sparse.csr_matrix)


-def _is_cupy_csc(data):
+def _is_cupy_csc(data: DataType) -> bool:
    try:
        import cupyx
    except ImportError:
@@ -792,11 +804,11 @@ def _is_cupy_csc(data):
    return isinstance(data, cupyx.scipy.sparse.csc_matrix)


-def _is_dlpack(data):
+def _is_dlpack(data: DataType) -> bool:
    return 'PyCapsule' in str(type(data)) and "dltensor" in str(data)


-def _transform_dlpack(data):
+def _transform_dlpack(data: DataType) -> bool:
    from cupy import fromDlpack  # pylint: disable=E0401
    assert 'used_dltensor' not in str(data)
    data = fromDlpack(data)
@@ -804,27 +816,27 @@ def _transform_dlpack(data):


 def _from_dlpack(
-    data,
-    missing,
-    nthread,
-    feature_names: FeatureNames,
-    feature_types: FeatureTypes,
-):
+    data: DataType,
+    missing: FloatCompatible,
+    nthread: int,
+    feature_names: Optional[FeatureNames],
+    feature_types: Optional[FeatureTypes],
+) -> DispatchedDataBackendReturnType:
    data = _transform_dlpack(data)
    return _from_cupy_array(data, missing, nthread, feature_names,
                            feature_types)


-def _is_uri(data):
+def _is_uri(data: DataType) -> bool:
    return isinstance(data, (str, os.PathLike))


 def _from_uri(
-    data,
-    missing,
-    feature_names: FeatureNames,
-    feature_types: FeatureTypes,
-):
+    data: DataType,
+    missing: Optional[FloatCompatible],
+    feature_names: Optional[FeatureNames],
+    feature_types: Optional[FeatureTypes],
+) -> DispatchedDataBackendReturnType:
    _warn_unused_missing(data, missing)
    handle = ctypes.c_void_p()
    data = os.fspath(os.path.expanduser(data))
@@ -834,51 +846,51 @@ def _from_uri(
    return handle, feature_names, feature_types


-def _is_list(data):
+def _is_list(data: DataType) -> bool:
    return isinstance(data, list)


 def _from_list(
-    data,
-    missing,
-    n_threads,
-    feature_names: FeatureNames,
-    feature_types: FeatureTypes,
-):
+    data: Sequence,
+    missing: FloatCompatible,
+    n_threads: int,
+    feature_names: Optional[FeatureNames],
+    feature_types: Optional[FeatureTypes],
+) -> DispatchedDataBackendReturnType:
    array = np.array(data)
    _check_data_shape(data)
    return _from_numpy_array(array, missing, n_threads, feature_names, feature_types)


-def _is_tuple(data):
+def _is_tuple(data: DataType) -> bool:
    return isinstance(data, tuple)


 def _from_tuple(
-    data,
-    missing,
-    n_threads,
-    feature_names: FeatureNames,
-    feature_types: FeatureTypes,
-):
+    data: Sequence,
+    missing: FloatCompatible,
+    n_threads: int,
+    feature_names: Optional[FeatureNames],
+    feature_types: Optional[FeatureTypes],
+) -> DispatchedDataBackendReturnType:
    return _from_list(data, missing, n_threads, feature_names, feature_types)


-def _is_iter(data):
+def _is_iter(data: DataType) -> bool:
    return isinstance(data, DataIter)


-def _has_array_protocol(data):
+def _has_array_protocol(data: DataType) -> bool:
    return hasattr(data, '__array__')


-def _convert_unknown_data(data):
+def _convert_unknown_data(data: DataType) -> DataType:
    warnings.warn(
        f'Unknown data type: {type(data)}, trying to convert it to csr_matrix',
        UserWarning
    )
    try:
-        import scipy
+        import scipy.sparse
    except ImportError:
        return None

@@ -891,13 +903,13 @@ def _convert_unknown_data(data):


 def dispatch_data_backend(
-    data,
-    missing,
-    threads,
-    feature_names: FeatureNames,
-    feature_types: FeatureTypes,
+    data: DataType,
+    missing: FloatCompatible,  # Or Optional[Float]
+    threads: int,
+    feature_names: Optional[FeatureNames],
+    feature_types: Optional[FeatureTypes],
    enable_categorical: bool = False,
-):
+) -> DispatchedDataBackendReturnType:
    '''Dispatch data for DMatrix.'''
    if not _is_cudf_ser(data) and not _is_pandas_series(data):
        _check_data_shape(data)
@@ -964,7 +976,7 @@ def dispatch_data_backend(
    raise TypeError('Not supported type for data.' + str(type(data)))


-def _to_data_type(dtype: str, name: str):
+def _to_data_type(dtype: str, name: str) -> int:
    dtype_map = {'float32': 1, 'float64': 2, 'uint32': 3, 'uint64': 4}
    if dtype not in dtype_map:
        raise TypeError(
@@ -973,7 +985,7 @@ def _to_data_type(dtype: str, name: str):
    return dtype_map[dtype]


-def _validate_meta_shape(data: Any, name: str) -> None:
+def _validate_meta_shape(data: DataType, name: str) -> None:
    if hasattr(data, "shape"):
        msg = f"Invalid shape: {data.shape} for {name}"
        if name in _matrix_meta:
@@ -990,7 +1002,7 @@ def _validate_meta_shape(data: Any, name: str) -> None:
 def _meta_from_numpy(
    data: np.ndarray,
    field: str,
-    dtype: Optional[Union[np.dtype, str]],
+    dtype: Optional[NumpyDType],
    handle: ctypes.c_void_p,
 ) -> None:
    data, dtype = _ensure_np_dtype(data, dtype)
@@ -1001,16 +1013,26 @@ def _meta_from_numpy(
    _check_call(_LIB.XGDMatrixSetInfoFromInterface(handle, c_str(field), interface_str))


-def _meta_from_list(data, field, dtype, handle):
-    data = np.array(data)
-    _meta_from_numpy(data, field, dtype, handle)
+def _meta_from_list(
+    data: Sequence,
+    field: str,
+    dtype: Optional[NumpyDType],
+    handle: ctypes.c_void_p
+) -> None:
+    data_np = np.array(data)
+    _meta_from_numpy(data_np, field, dtype, handle)


-def _meta_from_tuple(data, field, dtype, handle):
+def _meta_from_tuple(
+    data: Sequence,
+    field: str,
+    dtype: Optional[NumpyDType],
+    handle: ctypes.c_void_p
+) -> None:
    return _meta_from_list(data, field, dtype, handle)


-def _meta_from_cudf_df(data, field: str, handle: ctypes.c_void_p) -> None:
+def _meta_from_cudf_df(data: DataType, field: str, handle: ctypes.c_void_p) -> None:
    if field not in _matrix_meta:
        _meta_from_cudf_series(data.iloc[:, 0], field, handle)
    else:
@@ -1019,7 +1041,7 @@ def _meta_from_cudf_df(data, field: str, handle: ctypes.c_void_p) -> None:
        _check_call(_LIB.XGDMatrixSetInfoFromInterface(handle, c_str(field), interface))


-def _meta_from_cudf_series(data, field, handle):
+def _meta_from_cudf_series(data: DataType, field: str, handle: ctypes.c_void_p) -> None:
    interface = bytes(json.dumps([data.__cuda_array_interface__],
                                 indent=2), 'utf-8')
    _check_call(_LIB.XGDMatrixSetInfoFromInterface(handle,
@@ -1027,7 +1049,7 @@ def _meta_from_cudf_series(data, field, handle):
                                                   interface))


-def _meta_from_cupy_array(data, field, handle):
+def _meta_from_cupy_array(data: DataType, field: str, handle: ctypes.c_void_p) -> None:
    data = _transform_cupy_array(data)
    interface = bytes(json.dumps([data.__cuda_array_interface__],
                                 indent=2), 'utf-8')
@@ -1036,14 +1058,22 @@ def _meta_from_cupy_array(data, field, handle):
                                                   interface))


-def _meta_from_dt(data, field: str, dtype, handle: ctypes.c_void_p):
+def _meta_from_dt(
+    data: DataType,
+    field: str,
+    dtype: Optional[NumpyDType],
+    handle: ctypes.c_void_p
+) -> None:
    data, _, _ = _transform_dt_df(data, None, None, field, dtype)
    _meta_from_numpy(data, field, dtype, handle)


 def dispatch_meta_backend(
-    matrix: DMatrix, data, name: str, dtype: Optional[Union[str, np.dtype]] = None
-):
+    matrix: DMatrix,
+    data: DataType,
+    name: str,
+    dtype: Optional[NumpyDType] = None
+) -> None:
    '''Dispatch for meta info.'''
    handle = matrix.handle
    assert handle is not None
@@ -1060,8 +1090,7 @@ def dispatch_meta_backend(
        _meta_from_numpy(data, name, dtype, handle)
        return
    if _is_pandas_df(data):
-        data, _, _ = _transform_pandas_df(data, False, meta=name,
-                                          meta_type=dtype)
+        data, _, _ = _transform_pandas_df(data, False, meta=name, meta_type=dtype)
        _meta_from_numpy(data, name, dtype, handle)
        return
    if _is_pandas_series(data):
@@ -1107,7 +1136,7 @@ class SingleBatchInternalIter(DataIter):  # pylint: disable=R0902
    area for meta info.

    '''
-    def __init__(self, **kwargs: Any):
+    def __init__(self, **kwargs: Any) -> None:
        self.kwargs = kwargs
        self.it = 0             # pylint: disable=invalid-name
        super().__init__()
@@ -1124,11 +1153,13 @@ class SingleBatchInternalIter(DataIter):  # pylint: disable=R0902


 def _proxy_transform(
-    data,
-    feature_names: FeatureNames,
-    feature_types: FeatureTypes,
+    data: DataType,
+    feature_names: Optional[FeatureNames],
+    feature_types: Optional[FeatureTypes],
    enable_categorical: bool,
-):
+) -> Tuple[
+    Union[bool, ctypes.c_void_p, np.ndarray],
+        Optional[list], Optional[FeatureNames], Optional[FeatureTypes]]:
    if _is_cudf_df(data) or _is_cudf_ser(data):
        return _transform_cudf_df(
            data, feature_names, feature_types, enable_categorical
@@ -1152,7 +1183,7 @@ def _proxy_transform(

 def dispatch_proxy_set_data(
    proxy: _ProxyDMatrix,
-    data: Any,
+    data: DataType,
    cat_codes: Optional[list],
    allow_host: bool,
 ) -> None:
@@ -1162,11 +1193,11 @@ def dispatch_proxy_set_data(

    if _is_cudf_df(data):
        # pylint: disable=W0212
-        proxy._set_data_from_cuda_columnar(data, cat_codes)
+        proxy._set_data_from_cuda_columnar(data, cast(List, cat_codes))
        return
    if _is_cudf_ser(data):
        # pylint: disable=W0212
-        proxy._set_data_from_cuda_columnar(data, cat_codes)
+        proxy._set_data_from_cuda_columnar(data, cast(List, cat_codes))
        return
    if _is_cupy_array(data):
        proxy._set_data_from_cuda_interface(data)  # pylint: disable=W0212
--- a/python-package/xgboost/plotting.py
+++ b/python-package/xgboost/plotting.py
@@ -4,16 +4,34 @@
 """Plotting Library."""
 from io import BytesIO
 import json
+from typing import Optional, Any
+
 import numpy as np
+
+from ._typing import PathLike
 from .core import Booster
 from .sklearn import XGBModel

+Axes = Any  # real type is matplotlib.axes.Axes
+GraphvizSource = Any  # real type is graphviz.Source

-def plot_importance(booster, ax=None, height=0.2,
-                    xlim=None, ylim=None, title='Feature importance',
-                    xlabel='F score', ylabel='Features', fmap='',
-                    importance_type='weight', max_num_features=None,
-                    grid=True, show_values=True, **kwargs):
+
+def plot_importance(
+    booster: Booster,
+    ax: Optional[Axes] = None,
+    height: float = 0.2,
+    xlim: Optional[tuple] = None,
+    ylim: Optional[tuple] = None,
+    title: str = "Feature importance",
+    xlabel: str = "F score",
+    ylabel: str = "Features",
+    fmap: PathLike = "",
+    importance_type: str = "weight",
+    max_num_features: Optional[int] = None,
+    grid: bool = True,
+    show_values: bool = True,
+    **kwargs: Any
+) -> Axes:
    """Plot importance based on fitted trees.

    Parameters
@@ -78,9 +96,9 @@ def plot_importance(booster, ax=None, height=0.2,
    tuples = [(k, importance[k]) for k in importance]
    if max_num_features is not None:
        # pylint: disable=invalid-unary-operand-type
-        tuples = sorted(tuples, key=lambda x: x[1])[-max_num_features:]
+        tuples = sorted(tuples, key=lambda _x: _x[1])[-max_num_features:]
    else:
-        tuples = sorted(tuples, key=lambda x: x[1])
+        tuples = sorted(tuples, key=lambda _x: _x[1])
    labels, values = zip(*tuples)

    if ax is None:
@@ -120,9 +138,17 @@ def plot_importance(booster, ax=None, height=0.2,
    return ax


-def to_graphviz(booster, fmap='', num_trees=0, rankdir=None,
-                yes_color=None, no_color=None,
-                condition_node_params=None, leaf_node_params=None, **kwargs):
+def to_graphviz(
+    booster: Booster,
+    fmap: PathLike = "",
+    num_trees: int = 0,
+    rankdir: Optional[str] = None,
+    yes_color: Optional[str] = None,
+    no_color: Optional[str] = None,
+    condition_node_params: Optional[dict] = None,
+    leaf_node_params: Optional[dict] = None,
+    **kwargs: Any
+) -> GraphvizSource:
    """Convert specified tree to graphviz instance. IPython can automatically plot
    the returned graphiz instance. Otherwise, you should call .render() method
    of the returned graphiz instance.
@@ -212,7 +238,14 @@ def to_graphviz(booster, fmap='', num_trees=0, rankdir=None,
    return g


-def plot_tree(booster, fmap='', num_trees=0, rankdir=None, ax=None, **kwargs):
+def plot_tree(
+    booster: Booster,
+    fmap: PathLike = "",
+    num_trees: int = 0,
+    rankdir: Optional[str] = None,
+    ax: Optional[Axes] = None,
+    **kwargs: Any
+) -> Axes:
    """Plot specified tree.

    Parameters
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -4,8 +4,19 @@ import copy
 import warnings
 import json
 import os
-from typing import Union, Optional, List, Dict, Callable, Tuple, Any, TypeVar, Type, cast
-from typing import Sequence
+from typing import (
+    Union,
+    Optional,
+    List,
+    Dict,
+    Callable,
+    Sequence,
+    Tuple,
+    Any,
+    TypeVar,
+    Type,
+    cast,
+)
 import numpy as np

 from .core import Booster, DMatrix, XGBoostError
@@ -14,7 +25,7 @@ from .core import Metric
 from .training import train
 from .callback import TrainingCallback
 from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_array
-from ._typing import ArrayLike, FeatureTypes
+from ._typing import ArrayLike, FeatureNames, FeatureTypes

 # Do not use class names on scikit-learn directly.  Re-define the classes on
 # .compat to guarantee the behavior without scikit-learn
@@ -401,7 +412,7 @@ def _wrap_evaluation_matrices(
    eval_qid: Optional[Sequence[Any]],
    create_dmatrix: Callable,
    enable_categorical: bool,
-    feature_types: FeatureTypes,
+    feature_types: Optional[FeatureTypes],
 ) -> Tuple[Any, List[Tuple[Any, str]]]:
    """Convert array_like evaluation matrices into DMatrix.  Perform validation on the way.

@@ -717,7 +728,7 @@ class XGBModel(XGBModelBase):
        return self._estimator_type  # pylint: disable=no-member

    def save_model(self, fname: Union[str, os.PathLike]) -> None:
-        meta = {}
+        meta: Dict[str, Any] = {}
        for k, v in self.__dict__.items():
            if k == '_le':
                meta['_le'] = self._le.to_json()
@@ -1231,7 +1242,7 @@ class XGBModel(XGBModelBase):
            importance_type=self.importance_type if self.importance_type else dft()
        )
        if b.feature_names is None:
-            feature_names = [f"f{i}" for i in range(self.n_features_in_)]
+            feature_names: FeatureNames = [f"f{i}" for i in range(self.n_features_in_)]
        else:
            feature_names = b.feature_names
        # gblinear returns all features so the `get` in next line is only for gbtree.
--- a/python-package/xgboost/training.py
+++ b/python-package/xgboost/training.py
@@ -5,20 +5,24 @@
 import copy
 import os
 import warnings
-from typing import Optional, Dict, Any, Union, Tuple, Sequence
+from typing import Optional, Dict, Any, Union, Tuple, Sequence, List, cast, Iterable

 import numpy as np
+
+from .callback import TrainingCallback, CallbackContainer, EvaluationMonitor, EarlyStopping
 from .core import Booster, DMatrix, XGBoostError, _deprecate_positional_args
 from .core import Metric, Objective
-from .compat import (SKLEARN_INSTALLED, XGBStratifiedKFold)
-from . import callback
+from .compat import SKLEARN_INSTALLED, XGBStratifiedKFold, DataFrame
+from ._typing import _F, FPreProcCallable, BoosterParam
+
+_CVFolds = Sequence["CVPack"]


 def _assert_new_callback(
-    callbacks: Optional[Sequence[callback.TrainingCallback]]
+    callbacks: Optional[Sequence[TrainingCallback]]
 ) -> None:
    is_new_callback: bool = not callbacks or all(
-        isinstance(c, callback.TrainingCallback) for c in callbacks
+        isinstance(c, TrainingCallback) for c in callbacks
    )
    if not is_new_callback:
        link = "https://xgboost.readthedocs.io/en/latest/python/callbacks.html"
@@ -56,10 +60,10 @@ def train(
    feval: Optional[Metric] = None,
    maximize: Optional[bool] = None,
    early_stopping_rounds: Optional[int] = None,
-    evals_result: callback.TrainingCallback.EvalsLog = None,
+    evals_result: TrainingCallback.EvalsLog = None,
    verbose_eval: Optional[Union[bool, int]] = True,
    xgb_model: Optional[Union[str, os.PathLike, Booster, bytearray]] = None,
-    callbacks: Optional[Sequence[callback.TrainingCallback]] = None,
+    callbacks: Optional[Sequence[TrainingCallback]] = None,
    custom_metric: Optional[Metric] = None,
 ) -> Booster:
    """Train a booster with given parameters.
@@ -159,12 +163,12 @@ def train(
    _assert_new_callback(callbacks)
    if verbose_eval:
        verbose_eval = 1 if verbose_eval is True else verbose_eval
-        callbacks.append(callback.EvaluationMonitor(period=verbose_eval))
+        callbacks.append(EvaluationMonitor(period=verbose_eval))
    if early_stopping_rounds:
        callbacks.append(
-            callback.EarlyStopping(rounds=early_stopping_rounds, maximize=maximize)
+            EarlyStopping(rounds=early_stopping_rounds, maximize=maximize)
        )
-    cb_container = callback.CallbackContainer(
+    cb_container = CallbackContainer(
        callbacks,
        metric=metric_fn,
        # For old `feval` parameter, the behavior is unchanged.  For the new
@@ -194,71 +198,73 @@ def train(

 class CVPack:
    """"Auxiliary datastruct to hold one fold of CV."""
-    def __init__(self, dtrain, dtest, param):
+    def __init__(self, dtrain: DMatrix, dtest: DMatrix, param: Optional[Union[Dict, List]]) -> None:
        """"Initialize the CVPack"""
        self.dtrain = dtrain
        self.dtest = dtest
        self.watchlist = [(dtrain, 'train'), (dtest, 'test')]
        self.bst = Booster(param, [dtrain, dtest])

-    def __getattr__(self, name):
-        def _inner(*args, **kwargs):
+    def __getattr__(self, name: str) -> _F:
+        def _inner(*args: Any, **kwargs: Any) -> Any:
            return getattr(self.bst, name)(*args, **kwargs)
-        return _inner
+        return cast(_F, _inner)

-    def update(self, iteration, fobj):
+    def update(self, iteration: int, fobj: Optional[Objective]) -> None:
        """"Update the boosters for one iteration"""
        self.bst.update(self.dtrain, iteration, fobj)

-    def eval(self, iteration, feval, output_margin):
+    def eval(self, iteration: int, feval: Optional[Metric], output_margin: bool) -> str:
        """"Evaluate the CVPack for one iteration."""
        return self.bst.eval_set(self.watchlist, iteration, feval, output_margin)


 class _PackedBooster:
-    def __init__(self, cvfolds) -> None:
+    def __init__(self, cvfolds: _CVFolds) -> None:
        self.cvfolds = cvfolds

-    def update(self, iteration, obj):
+    def update(self, iteration: int, obj: Optional[Objective]) -> None:
        '''Iterate through folds for update'''
        for fold in self.cvfolds:
            fold.update(iteration, obj)

-    def eval(self, iteration, feval, output_margin):
+    def eval(self, iteration: int, feval: Optional[Metric], output_margin: bool) -> List[str]:
        '''Iterate through folds for eval'''
        result = [f.eval(iteration, feval, output_margin) for f in self.cvfolds]
        return result

-    def set_attr(self, **kwargs):
+    def set_attr(self, **kwargs: Optional[str]) -> Any:
        '''Iterate through folds for setting attributes'''
        for f in self.cvfolds:
            f.bst.set_attr(**kwargs)

-    def attr(self, key):
+    def attr(self, key: str) -> Optional[str]:
        '''Redirect to booster attr.'''
        return self.cvfolds[0].bst.attr(key)

-    def set_param(self, params, value=None):
+    def set_param(self,
+                  params: Union[Dict, Iterable[Tuple[str, Any]], str],
+                  value: Optional[str] = None) -> None:
        """Iterate through folds for set_param"""
        for f in self.cvfolds:
            f.bst.set_param(params, value)

-    def num_boosted_rounds(self):
+    def num_boosted_rounds(self) -> int:
        '''Number of boosted rounds.'''
        return self.cvfolds[0].num_boosted_rounds()

    @property
-    def best_iteration(self):
+    def best_iteration(self) -> int:
        '''Get best_iteration'''
-        return int(self.cvfolds[0].bst.attr("best_iteration"))
+        return int(cast(int, self.cvfolds[0].bst.attr("best_iteration")))

    @property
-    def best_score(self):
+    def best_score(self) -> float:
        """Get best_score."""
-        return float(self.cvfolds[0].bst.attr("best_score"))
+        return float(cast(float, self.cvfolds[0].bst.attr("best_score")))


-def groups_to_rows(groups, boundaries):
+def groups_to_rows(groups: List[np.ndarray], boundaries: np.ndarray) -> np.ndarray:
    """
    Given group row boundaries, convert ground indexes to row indexes
    :param groups: list of groups for testing
@@ -268,7 +274,9 @@ def groups_to_rows(groups, boundaries):
    return np.concatenate([np.arange(boundaries[g], boundaries[g+1]) for g in groups])


-def mkgroupfold(dall, nfold, param, evals=(), fpreproc=None, shuffle=True):
+def mkgroupfold(dall: DMatrix, nfold: int, param: BoosterParam,
+                evals: Sequence[str] = (), fpreproc: FPreProcCallable = None,
+                shuffle: bool = True) -> List[CVPack]:
    """
    Make n folds for cross-validation maintaining groups
    :return: cross-validation folds
@@ -308,8 +316,10 @@ def mkgroupfold(dall, nfold, param, evals=(), fpreproc=None, shuffle=True):
    return ret


-def mknfold(dall, nfold, param, seed, evals=(), fpreproc=None, stratified=False,
-            folds=None, shuffle=True):
+def mknfold(dall: DMatrix, nfold: int, param: BoosterParam, seed: int,
+            evals: Sequence[str] = (), fpreproc: FPreProcCallable = None,
+            stratified: bool = False, folds: XGBStratifiedKFold = None, shuffle: bool = True
+            ) -> List[CVPack]:
    """
    Make an n-fold list of CVPack from random indices.
    """
@@ -362,11 +372,27 @@ def mknfold(dall, nfold, param, seed, evals=(), fpreproc=None, stratified=False,
    return ret


-def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None,
-       metrics=(), obj: Optional[Objective] = None,
-       feval=None, maximize=None, early_stopping_rounds=None,
-       fpreproc=None, as_pandas=True, verbose_eval=None, show_stdv=True,
-       seed=0, callbacks=None, shuffle=True, custom_metric: Optional[Metric] = None):
+def cv(
+    params: BoosterParam,
+    dtrain: DMatrix,
+    num_boost_round: int = 10,
+    nfold: int = 3,
+    stratified: bool = False,
+    folds: XGBStratifiedKFold = None,
+    metrics: Sequence[str] = (),
+    obj: Optional[Objective] = None,
+    feval: Optional[Metric] = None,
+    maximize: bool = None,
+    early_stopping_rounds: int = None,
+    fpreproc: FPreProcCallable = None,
+    as_pandas: bool = True,
+    verbose_eval: Optional[Union[int, bool]] = None,
+    show_stdv: bool = True,
+    seed: int = 0,
+    callbacks: Sequence[TrainingCallback] = None,
+    shuffle: bool = True,
+    custom_metric: Optional[Metric] = None,
+) -> Union[Dict[str, float], DataFrame]:
    # pylint: disable = invalid-name
    """Cross-validation with given parameters.

@@ -477,7 +503,7 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None

    params.pop("eval_metric", None)

-    results = {}
+    results: Dict[str, List[float]] = {}
    cvfolds = mknfold(dtrain, nfold, params, seed, metrics, fpreproc,
                      stratified, folds, shuffle)

@@ -490,13 +516,13 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
    if verbose_eval:
        verbose_eval = 1 if verbose_eval is True else verbose_eval
        callbacks.append(
-            callback.EvaluationMonitor(period=verbose_eval, show_stdv=show_stdv)
+            EvaluationMonitor(period=verbose_eval, show_stdv=show_stdv)
        )
    if early_stopping_rounds:
        callbacks.append(
-            callback.EarlyStopping(rounds=early_stopping_rounds, maximize=maximize)
+            EarlyStopping(rounds=early_stopping_rounds, maximize=maximize)
        )
-    callbacks = callback.CallbackContainer(
+    callbacks_container = CallbackContainer(
        callbacks,
        metric=metric_fn,
        is_cv=True,
@@ -504,16 +530,16 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
    )

    booster = _PackedBooster(cvfolds)
-    callbacks.before_training(booster)
+    callbacks_container.before_training(booster)

    for i in range(num_boost_round):
-        if callbacks.before_iteration(booster, i, dtrain, None):
+        if callbacks_container.before_iteration(booster, i, dtrain, None):
            break
        booster.update(i, obj)

-        should_break = callbacks.after_iteration(booster, i, dtrain, None)
-        res = callbacks.aggregated_cv
-        for key, mean, std in res:
+        should_break = callbacks_container.after_iteration(booster, i, dtrain, None)
+        res = callbacks_container.aggregated_cv
+        for key, mean, std in cast(List[Tuple[str, float, float]], res):
            if key + '-mean' not in results:
                results[key + '-mean'] = []
            if key + '-std' not in results:
@@ -532,6 +558,6 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
        except ImportError:
            pass

-    callbacks.after_training(booster)
+    callbacks_container.after_training(booster)

    return results