diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 4fa958446..bc73bd0d2 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -243,7 +243,7 @@ jobs:
         architecture: 'x64'
     - name: Install Python packages
       run: |
-        python -m pip install wheel setuptools mypy dask[complete] distributed
+        python -m pip install wheel setuptools mypy pandas dask[complete] distributed
     - name: Run mypy
       run: |
         make mypy
diff --git a/Makefile b/Makefile
index f8ee45d3a..85adfa2c7 100644
--- a/Makefile
+++ b/Makefile
@@ -91,8 +91,9 @@ endif
 # If any of the dask tests failed, contributor won't see the other error.
 mypy:
 	cd python-package; \
-	mypy ./xgboost/dask.py ../tests/python/test_with_dask.py --follow-imports=silent; \
-	mypy ../tests/python-gpu/test_gpu_with_dask.py --follow-imports=silent; \
+	mypy ./xgboost/dask.py && \
+	mypy ../tests/python-gpu/test_gpu_with_dask.py && \
+	mypy ./xgboost/sklearn.py || exit 1; \
 	mypy . || true ;
 
 clean:
diff --git a/python-package/setup.cfg b/python-package/setup.cfg
index 0edf1e560..9337cad1d 100644
--- a/python-package/setup.cfg
+++ b/python-package/setup.cfg
@@ -3,4 +3,5 @@ description-file = README.rst
 
 [mypy]
 ignore_missing_imports = True
-disallow_untyped_defs = True
\ No newline at end of file
+disallow_untyped_defs = True
+follow_imports = silent
\ No newline at end of file
diff --git a/python-package/xgboost/callback.py b/python-package/xgboost/callback.py
index 0be6a9a37..e267abe37 100644
--- a/python-package/xgboost/callback.py
+++ b/python-package/xgboost/callback.py
@@ -276,6 +276,9 @@ class TrainingCallback(ABC):
     .. versionadded:: 1.3.0
 
     '''
+
+    EvalsLog = Dict[str, Dict[str, Union[List[float], List[Tuple[float, float]]]]]
+
     def __init__(self):
         pass
 
@@ -287,13 +290,11 @@ class TrainingCallback(ABC):
         '''Run after training is finished.'''
         return model
 
-    def before_iteration(self, model, epoch: int,
-                         evals_log: 'CallbackContainer.EvalsLog') -> bool:
+    def before_iteration(self, model, epoch: int, evals_log: EvalsLog) -> bool:
         '''Run before each iteration.  Return True when training should stop.'''
         return False
 
-    def after_iteration(self, model, epoch: int,
-                        evals_log: 'CallbackContainer.EvalsLog') -> bool:
+    def after_iteration(self, model, epoch: int, evals_log: EvalsLog) -> bool:
         '''Run after each iteration.  Return True when training should stop.'''
         return False
 
@@ -351,7 +352,7 @@ class CallbackContainer:
 
     '''
 
-    EvalsLog = Dict[str, Dict[str, Union[List[float], List[Tuple[float, float]]]]]
+    EvalsLog = TrainingCallback.EvalsLog
 
     def __init__(self,
                  callbacks: List[TrainingCallback],
diff --git a/python-package/xgboost/compat.py b/python-package/xgboost/compat.py
index bfb4d4091..c0b589018 100644
--- a/python-package/xgboost/compat.py
+++ b/python-package/xgboost/compat.py
@@ -1,6 +1,7 @@
 # coding: utf-8
 # pylint: disable= invalid-name,  unused-import
 """For compatibility and optional dependencies."""
+from typing import Any
 import sys
 import types
 import importlib.util
@@ -36,7 +37,7 @@ except ImportError:
 
     MultiIndex = object
     Int64Index = object
-    DataFrame = object
+    DataFrame: Any = object
     Series = object
     pandas_concat = None
     PANDAS_INSTALLED = False
@@ -109,10 +110,12 @@ except pkg_resources.DistributionNotFound:
 try:
     import sparse
     import scipy.sparse as scipy_sparse
+    from scipy.sparse import csr_matrix as scipy_csr
     SCIPY_INSTALLED = True
 except ImportError:
     sparse = False
     scipy_sparse = False
+    scipy_csr: Any = object
     SCIPY_INSTALLED = False
 
 
diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py
index eac3497b8..8430d5acd 100644
--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@@ -96,7 +96,11 @@ def from_cstr_to_pystr(data, length) -> List[str]:
     return res
 
 
-def _convert_ntree_limit(booster, ntree_limit, iteration_range):
+def _convert_ntree_limit(
+    booster: "Booster",
+    ntree_limit: Optional[int],
+    iteration_range: Optional[Tuple[int, int]]
+) -> Optional[Tuple[int, int]]:
     if ntree_limit is not None and ntree_limit != 0:
         warnings.warn(
             "ntree_limit is deprecated, use `iteration_range` or model "
@@ -1234,7 +1238,7 @@ class Booster(object):
                 params += [('eval_metric', eval_metric)]
         return params
 
-    def _transform_monotone_constrains(self, value: Union[dict, str]) -> str:
+    def _transform_monotone_constrains(self, value: Union[Dict[str, int], str]) -> str:
         if isinstance(value, str):
             return value
 
@@ -1246,7 +1250,9 @@ class Booster(object):
         return '(' + ','.join([str(value.get(feature_name, 0))
                                for feature_name in self.feature_names]) + ')'
 
-    def _transform_interaction_constraints(self, value: Union[list, str]) -> str:
+    def _transform_interaction_constraints(
+        self, value: Union[List[Tuple[str]], str]
+    ) -> str:
         if isinstance(value, str):
             return value
 
@@ -1447,7 +1453,7 @@ class Booster(object):
         attr_names = from_cstr_to_pystr(sarr, length)
         return {n: self.attr(n) for n in attr_names}
 
-    def set_attr(self, **kwargs):
+    def set_attr(self, **kwargs: Optional[str]) -> None:
         """Set the attribute of the Booster.
 
         Parameters
@@ -1971,7 +1977,7 @@ class Booster(object):
             "Data type:" + str(type(data)) + " not supported by inplace prediction."
         )
 
-    def save_model(self, fname):
+    def save_model(self, fname: Union[str, os.PathLike]):
         """Save the model to a file.
 
         The model is saved in an XGBoost internal format which is universal among the
diff --git a/python-package/xgboost/dask.py b/python-package/xgboost/dask.py
index f916848a8..c8dda3ae9 100644
--- a/python-package/xgboost/dask.py
+++ b/python-package/xgboost/dask.py
@@ -1028,7 +1028,8 @@ async def _direct_predict_impl(  # pylint: disable=too-many-branches
             # Somehow dask fail to infer output shape change for 2-dim prediction, and
             #  `chunks = (None, output_shape[1])` doesn't work due to None is not
             #  supported in map_blocks.
-            chunks = list(data.chunks)
+            chunks: Optional[List[Tuple]] = list(data.chunks)
+            assert isinstance(chunks, list)
             chunks[1] = (output_shape[1], )
         else:
             chunks = None
@@ -1633,7 +1634,7 @@ class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
         )
 
         if callable(self.objective):
-            obj = _objective_decorator(self.objective)
+            obj: Optional[Callable] = _objective_decorator(self.objective)
         else:
             obj = None
         model, metric, params = self._configure_fit(
@@ -1734,7 +1735,7 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
             params["objective"] = "binary:logistic"
 
         if callable(self.objective):
-            obj = _objective_decorator(self.objective)
+            obj: Optional[Callable] = _objective_decorator(self.objective)
         else:
             obj = None
         model, metric, params = self._configure_fit(
diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py
index a41ad0cb6..b9b7916a8 100644
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -1,31 +1,55 @@
 # coding: utf-8
-# pylint: disable=too-many-arguments, too-many-locals, invalid-name, fixme, E0012, R0912, C0302
+# pylint: disable=too-many-arguments, too-many-locals, invalid-name, fixme, R0912, C0302
 """Scikit-Learn Wrapper interface for XGBoost."""
 import copy
 import warnings
 import json
 import os
-from typing import Union, Optional, List, Dict, Callable, Tuple, Any, TypeVar
+from typing import Union, Optional, List, Dict, Callable, Tuple, Any, TypeVar, Type
 import numpy as np
+
 from .core import Booster, DMatrix, XGBoostError
 from .core import _deprecate_positional_args, _convert_ntree_limit
 from .core import Metric
 from .training import train
+from .callback import TrainingCallback
 from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_array
 
 # Do not use class names on scikit-learn directly.  Re-define the classes on
 # .compat to guarantee the behavior without scikit-learn
-from .compat import (SKLEARN_INSTALLED, XGBModelBase,
-                     XGBClassifierBase, XGBRegressorBase, XGBoostLabelEncoder)
+from .compat import (
+    SKLEARN_INSTALLED,
+    XGBModelBase,
+    XGBClassifierBase,
+    XGBRegressorBase,
+    XGBoostLabelEncoder,
+    DataFrame,
+    scipy_csr,
+)
+
+# Actually XGBoost supports a lot more data types including `scipy.sparse.csr_matrix` and
+# many others.  See `data.py` for a complete list.  The `array_like` here is just for
+# easier type checks.
+array_like = TypeVar("array_like", bound=Union[np.ndarray, DataFrame, scipy_csr])
 
 
-class XGBRankerMixIn:           # pylint: disable=too-few-public-methods
+class XGBRankerMixIn:  # pylint: disable=too-few-public-methods
     """MixIn for ranking, defines the _estimator_type usually defined in scikit-learn base
     classes."""
+
     _estimator_type = "ranker"
 
 
-def _objective_decorator(func):
+_SklObjective = Optional[
+    Union[
+        str, Callable[[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]
+    ]
+]
+
+
+def _objective_decorator(
+    func: Callable[[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]
+) -> Callable[[np.ndarray, DMatrix], Tuple[np.ndarray, np.ndarray]]:
     """Decorate an objective function
 
     Converts an objective function using the typical sklearn metrics
@@ -33,7 +57,7 @@ def _objective_decorator(func):
 
     Parameters
     ----------
-    func: callable
+    func:
         Expects a callable with signature ``func(y_true, y_pred)``:
 
         y_true: array_like of shape [n_samples]
@@ -43,7 +67,7 @@ def _objective_decorator(func):
 
     Returns
     -------
-    new_func: callable
+    new_func:
         The new objective function as expected by ``xgboost.training.train``.
         The signature is ``new_func(preds, dmatrix)``:
 
@@ -53,7 +77,7 @@ def _objective_decorator(func):
             The training set from which the labels will be extracted using
             ``dmatrix.get_label()``
     """
-    def inner(preds, dmatrix):
+    def inner(preds: np.ndarray, dmatrix: DMatrix) -> Tuple[np.ndarray, np.ndarray]:
         """internal function"""
         labels = dmatrix.get_label()
         return func(labels, preds)
@@ -66,52 +90,52 @@ __estimator_doc = '''
         rounds.
 '''
 
-__model_doc = '''
-    max_depth : int
+__model_doc = f'''
+    max_depth :  Optional[int]
         Maximum tree depth for base learners.
-    learning_rate : float
+    learning_rate : Optional[float]
         Boosting learning rate (xgb's "eta")
-    verbosity : int
+    verbosity : Optional[int]
         The degree of verbosity. Valid values are 0 (silent) - 3 (debug).
-    objective : string or callable
+    objective : {_SklObjective}
         Specify the learning task and the corresponding learning objective or
         a custom objective function to be used (see note below).
-    booster: string
+    booster: Optional[str]
         Specify which booster to use: gbtree, gblinear or dart.
-    tree_method: string
+    tree_method: Optional[str]
         Specify which tree method to use.  Default to auto.  If this parameter
         is set to default, XGBoost will choose the most conservative option
         available.  It's recommended to study this option from parameters
         document.
-    n_jobs : int
+    n_jobs : Optional[int]
         Number of parallel threads used to run xgboost.  When used with other Scikit-Learn
         algorithms like grid search, you may choose which algorithm to parallelize and
         balance the threads.  Creating thread contention will significantly slow down both
         algorithms.
-    gamma : float
+    gamma : Optional[float]
         Minimum loss reduction required to make a further partition on a leaf
         node of the tree.
-    min_child_weight : float
+    min_child_weight : Optional[float]
         Minimum sum of instance weight(hessian) needed in a child.
-    max_delta_step : float
+    max_delta_step : Optional[float]
         Maximum delta step we allow each tree's weight estimation to be.
-    subsample : float
+    subsample : Optional[float]
         Subsample ratio of the training instance.
-    colsample_bytree : float
+    colsample_bytree : Optional[float]
         Subsample ratio of columns when constructing each tree.
-    colsample_bylevel : float
+    colsample_bylevel : Optional[float]
         Subsample ratio of columns for each level.
-    colsample_bynode : float
+    colsample_bynode : Optional[float]
         Subsample ratio of columns for each split.
-    reg_alpha : float (xgb's alpha)
-        L1 regularization term on weights
-    reg_lambda : float (xgb's lambda)
-        L2 regularization term on weights
-    scale_pos_weight : float
+    reg_alpha : Optional[float]
+        L1 regularization term on weights (xgb's alpha).
+    reg_lambda : Optional[float]
+        L2 regularization term on weights (xgb's lambda).
+    scale_pos_weight : Optional[float]
         Balancing of positive and negative weights.
-    base_score:
+    base_score : Optional[float]
         The initial prediction score of all instances, global bias.
-    random_state : int
+    random_state : Optional[Union[numpy.random.RandomState, int]]
         Random number seed.
 
         .. note::
@@ -121,12 +145,12 @@ __model_doc = '''
 
     missing : float, default np.nan
         Value in the data which needs to be present as a missing value.
-    num_parallel_tree: int
+    num_parallel_tree: Optional[int]
         Used for boosting random forest.
-    monotone_constraints : str
+    monotone_constraints : Optional[Union[Dict[str, int], str]]
         Constraint of variable monotonicity.  See tutorial for more
         information.
-    interaction_constraints : str
+    interaction_constraints : Optional[Union[str, List[Tuple[str]]]]
         Constraints for interaction representing permitted interactions.  The
         constraints must be specified in the form of a nest list, e.g. [[0, 1],
         [2, 3, 4]], where each inner list is a group of indices of features
@@ -135,12 +159,12 @@ __model_doc = '''
     importance_type: string, default "gain"
         The feature importance type for the feature_importances\\_ property:
         either "gain", "weight", "cover", "total_gain" or "total_cover".
-    gpu_id :
+    gpu_id : Optional[int]
         Device ordinal.
-    validate_parameters :
+    validate_parameters : Optional[bool]
         Give warnings for unknown parameter.
 
-    \\*\\*kwargs : dict, optional
+    kwargs : dict, optional
         Keyword arguments for XGBoost Booster object.  Full documentation of
         parameters can be found here:
         https://github.com/dmlc/xgboost/blob/master/doc/parameter.rst.
@@ -173,7 +197,11 @@ __custom_obj_note = '''
 '''
 
 
-def xgboost_model_doc(header, items, extra_parameters=None, end_note=None):
+def xgboost_model_doc(
+    header: str, items: List[str],
+    extra_parameters: Optional[str] = None,
+    end_note: Optional[str] = None
+) -> Callable[[Type], Type]:
     '''Obtain documentation for Scikit-Learn wrappers
 
     Parameters
@@ -190,14 +218,14 @@ def xgboost_model_doc(header, items, extra_parameters=None, end_note=None):
     end_note: str
        Extra notes put to the end.
 '''
-    def get_doc(item):
+    def get_doc(item: str) -> str:
         '''Return selected item'''
         __doc = {'estimators': __estimator_doc,
                  'model': __model_doc,
                  'objective': __custom_obj_note}
         return __doc[item]
 
-    def adddoc(cls):
+    def adddoc(cls: Type) -> Type:
         doc = ['''
 Parameters
 ----------
@@ -245,13 +273,15 @@ def _wrap_evaluation_matrices(
         missing=missing,
     )
 
+    n_validation = 0 if eval_set is None else len(eval_set)
+
     def validate_or_none(meta: Optional[List], name: str) -> List:
         if meta is None:
-            return [None] * len(eval_set)
-        if len(meta) != len(eval_set):
+            return [None] * n_validation
+        if len(meta) != n_validation:
             raise ValueError(
                 f"{name}'s length does not eqaul to `eval_set`, " +
-                f"expecting {len(eval_set)}, got {len(meta)}"
+                f"expecting {n_validation}, got {len(meta)}"
             )
         return meta
 
@@ -317,35 +347,35 @@ class XGBModel(XGBModelBase):
     # pylint: disable=too-many-arguments, too-many-instance-attributes, missing-docstring
     def __init__(
         self,
-        max_depth=None,
-        learning_rate=None,
-        n_estimators=100,
-        verbosity=None,
-        objective=None,
-        booster=None,
-        tree_method=None,
-        n_jobs=None,
-        gamma=None,
-        min_child_weight=None,
-        max_delta_step=None,
-        subsample=None,
-        colsample_bytree=None,
-        colsample_bylevel=None,
-        colsample_bynode=None,
-        reg_alpha=None,
-        reg_lambda=None,
-        scale_pos_weight=None,
-        base_score=None,
-        random_state=None,
-        missing=np.nan,
-        num_parallel_tree=None,
-        monotone_constraints=None,
-        interaction_constraints=None,
-        importance_type="gain",
-        gpu_id=None,
-        validate_parameters=None,
-        **kwargs
-    ):
+        max_depth: Optional[int] = None,
+        learning_rate: Optional[float] = None,
+        n_estimators: int = 100,
+        verbosity: Optional[int] = None,
+        objective: _SklObjective = None,
+        booster: Optional[str] = None,
+        tree_method: Optional[str] = None,
+        n_jobs: Optional[int] = None,
+        gamma: Optional[float] = None,
+        min_child_weight: Optional[float] = None,
+        max_delta_step: Optional[float] = None,
+        subsample: Optional[float] = None,
+        colsample_bytree: Optional[float] = None,
+        colsample_bylevel: Optional[float] = None,
+        colsample_bynode: Optional[float] = None,
+        reg_alpha: Optional[float] = None,
+        reg_lambda: Optional[float] = None,
+        scale_pos_weight: Optional[float] = None,
+        base_score: Optional[float] = None,
+        random_state: Optional[Union[np.random.RandomState, int]] = None,
+        missing: float = np.nan,
+        num_parallel_tree: Optional[int] = None,
+        monotone_constraints: Optional[Union[Dict[str, int], str]] = None,
+        interaction_constraints: Optional[Union[str, List[Tuple[str]]]] = None,
+        importance_type: str = "gain",
+        gpu_id: Optional[int] = None,
+        validate_parameters: Optional[bool] = None,
+        **kwargs: Any
+    ) -> None:
         if not SKLEARN_INSTALLED:
             raise XGBoostError(
                 "sklearn needs to be installed in order to use this module"
@@ -380,11 +410,11 @@ class XGBModel(XGBModelBase):
         self.gpu_id = gpu_id
         self.validate_parameters = validate_parameters
 
-    def _more_tags(self):
+    def _more_tags(self) -> Dict[str, bool]:
         '''Tags used for scikit-learn data validation.'''
         return {'allow_nan': True, 'no_validation': True}
 
-    def get_booster(self):
+    def get_booster(self) -> Booster:
         """Get the underlying xgboost Booster of this model.
 
         This will raise an exception when fit was not called
@@ -398,7 +428,7 @@ class XGBModel(XGBModelBase):
             raise NotFittedError('need to call fit or load_model beforehand')
         return self._Booster
 
-    def set_params(self, **params):
+    def set_params(self, **params: Any) -> "XGBModel":
         """Set the parameters of this estimator.  Modification of the sklearn method to
         allow unknown kwargs. This allows using the full range of xgboost
         parameters that are not defined as member variables in sklearn grid
@@ -427,7 +457,7 @@ class XGBModel(XGBModelBase):
 
         return self
 
-    def get_params(self, deep=True):
+    def get_params(self, deep: bool = True) -> Dict[str, Any]:
         # pylint: disable=attribute-defined-outside-init
         """Get parameters."""
         # Based on: https://stackoverflow.com/questions/59248211
@@ -447,7 +477,7 @@ class XGBModel(XGBModelBase):
             params['random_state'] = params['random_state'].randint(
                 np.iinfo(np.int32).max)
 
-        def parse_parameter(value):
+        def parse_parameter(value: Any) -> Optional[Union[int, float, str]]:
             for t in (int, float, str):
                 try:
                     ret = t(value)
@@ -477,7 +507,7 @@ class XGBModel(XGBModelBase):
             pass
         return params
 
-    def get_xgb_params(self):
+    def get_xgb_params(self) -> Dict[str, Any]:
         """Get xgboost specific parameters."""
         params = self.get_params()
         # Parameters that should not go into native learner.
@@ -489,7 +519,7 @@ class XGBModel(XGBModelBase):
                 filtered[k] = v
         return filtered
 
-    def get_num_boosting_rounds(self):
+    def get_num_boosting_rounds(self) -> int:
         """Gets the number of xgboost boosting rounds."""
         return self.n_estimators
 
@@ -501,7 +531,7 @@ class XGBModel(XGBModelBase):
             )
         return self._estimator_type  # pylint: disable=no-member
 
-    def save_model(self, fname: Union[str, os.PathLike]):
+    def save_model(self, fname: Union[str, os.PathLike]) -> None:
         meta = dict()
         for k, v in self.__dict__.items():
             if k == '_le':
@@ -532,15 +562,15 @@ class XGBModel(XGBModelBase):
         if not hasattr(self, '_Booster'):
             self._Booster = Booster({'n_jobs': self.n_jobs})
         self.get_booster().load_model(fname)
-        meta = self.get_booster().attr('scikit_learn')
-        if meta is None:
+        meta_str = self.get_booster().attr('scikit_learn')
+        if meta_str is None:
             # FIXME(jiaming): This doesn't have to be a problem as most of the needed
             # information like num_class and objective is in Learner class.
             warnings.warn(
                 'Loading a native XGBoost model with Scikit-Learn interface.'
             )
             return
-        meta = json.loads(meta)
+        meta = json.loads(meta_str)
         states = dict()
         for k, v in meta.items():
             if k == '_le':
@@ -568,14 +598,17 @@ class XGBModel(XGBModelBase):
 
     def _configure_fit(
         self,
-        booster: Optional[Union[Booster, "XGBModel"]],
+        booster: Optional[Union[Booster, "XGBModel", str]],
         eval_metric: Optional[Union[Callable, str, List[str]]],
         params: Dict[str, Any],
-    ) -> Tuple[Booster, Optional[Metric], Dict[str, Any]]:
+    ) -> Tuple[Optional[Union[Booster, str]], Optional[Metric], Dict[str, Any]]:
         # pylint: disable=protected-access, no-self-use
-        model = booster
-        if hasattr(model, '_Booster'):
-            model = model._Booster  # Handle the case when xgb_model is a sklearn model object
+        if isinstance(booster, XGBModel):
+            # Handle the case when xgb_model is a sklearn model object
+            model: Optional[Union[Booster, str]] = booster._Booster
+        else:
+            model = booster
+
         feval = eval_metric if callable(eval_metric) else None
         if eval_metric is not None:
             if callable(eval_metric):
@@ -584,7 +617,7 @@ class XGBModel(XGBModelBase):
                 params.update({"eval_metric": eval_metric})
         return model, feval, params
 
-    def _set_evaluation_result(self, evals_result: Optional[dict]) -> None:
+    def _set_evaluation_result(self, evals_result: TrainingCallback.EvalsLog) -> None:
         if evals_result:
             for val in evals_result.items():
                 evals_result_key = list(val[1].keys())[0]
@@ -594,21 +627,21 @@ class XGBModel(XGBModelBase):
     @_deprecate_positional_args
     def fit(
         self,
-        X,
-        y,
+        X: array_like,
+        y: array_like,
         *,
-        sample_weight=None,
-        base_margin=None,
-        eval_set=None,
-        eval_metric=None,
-        early_stopping_rounds=None,
-        verbose=True,
+        sample_weight: Optional[array_like] = None,
+        base_margin: Optional[array_like] = None,
+        eval_set: Optional[List[Tuple[array_like, array_like]]] = None,
+        eval_metric: Optional[Union[str, List[str], Metric]] = None,
+        early_stopping_rounds: Optional[int] = None,
+        verbose: Optional[bool] = True,
         xgb_model: Optional[Union[Booster, str, "XGBModel"]] = None,
-        sample_weight_eval_set=None,
-        base_margin_eval_set=None,
-        feature_weights=None,
-        callbacks=None
-    ):
+        sample_weight_eval_set: Optional[List[array_like]] = None,
+        base_margin_eval_set: Optional[List[array_like]] = None,
+        feature_weights: Optional[array_like] = None,
+        callbacks: Optional[List[TrainingCallback]] = None
+    ) -> "XGBModel":
         # pylint: disable=invalid-name,attribute-defined-outside-init
         """Fit gradient boosting model.
 
@@ -618,57 +651,61 @@ class XGBModel(XGBModelBase):
 
         Parameters
         ----------
-        X : array_like
+        X :
             Feature matrix
-        y : array_like
+        y :
             Labels
-        sample_weight : array_like
+        sample_weight :
             instance weights
-        base_margin : array_like
+        base_margin :
             global bias for each instance.
-        eval_set : list, optional
+        eval_set :
             A list of (X, y) tuple pairs to use as validation sets, for which
             metrics will be computed.
             Validation metrics will help us track the performance of the model.
-        eval_metric : str, list of str, or callable, optional
-            If a str, should be a built-in evaluation metric to use. See
-            doc/parameter.rst.
+        eval_metric :
+            If a str, should be a built-in evaluation metric to use. See doc/parameter.rst.
+
             If a list of str, should be the list of multiple built-in evaluation metrics
             to use.
+
             If callable, a custom evaluation metric. The call signature is
             ``func(y_predicted, y_true)`` where ``y_true`` will be a DMatrix object such
             that you may need to call the ``get_label`` method. It must return a str,
             value pair where the str is a name for the evaluation and value is the value
             of the evaluation function. The callable custom objective is always minimized.
-        early_stopping_rounds : int
+        early_stopping_rounds :
             Activates early stopping. Validation metric needs to improve at least once in
             every **early_stopping_rounds** round(s) to continue training.
             Requires at least one item in **eval_set**.
+
             The method returns the model from the last iteration (not the best one).
             If there's more than one item in **eval_set**, the last entry will be used
             for early stopping.
+
             If there's more than one metric in **eval_metric**, the last metric will be
             used for early stopping.
+
             If early stopping occurs, the model will have three additional fields:
             ``clf.best_score``, ``clf.best_iteration`` and ``clf.best_ntree_limit``.
-        verbose : bool
+        verbose :
             If `verbose` and an evaluation set is used, writes the evaluation metric
             measured on the validation set to stderr.
         xgb_model :
             file name of stored XGBoost model or 'Booster' instance XGBoost model to be
             loaded before training (allows training continuation).
-        sample_weight_eval_set : list, optional
+        sample_weight_eval_set :
             A list of the form [L_1, L_2, ..., L_n], where each L_i is an array like
             object storing instance weights for the i-th validation set.
-        base_margin_eval_set : list, optional
+        base_margin_eval_set :
             A list of the form [M_1, M_2, ..., M_n], where each M_i is an array like
             object storing base margin for the i-th validation set.
-        feature_weights: array_like
+        feature_weights :
             Weight for each feature, defines the probability of each feature being
             selected when colsample is being used.  All values must be greater than 0,
             otherwise a `ValueError` is thrown.  Only available for `hist`, `gpu_hist` and
             `exact` tree methods.
-        callbacks : list of callback functions
+        callbacks :
             List of callback functions that are applied at end of each iteration.
             It is possible to use predefined callbacks by using :ref:`callback_api`.
             Example:
@@ -679,7 +716,7 @@ class XGBModel(XGBModelBase):
                                                         save_best=True)]
 
         """
-        evals_result = {}
+        evals_result: TrainingCallback.EvalsLog = {}
 
         train_dmatrix, evals = _wrap_evaluation_matrices(
             missing=self.missing,
@@ -700,7 +737,9 @@ class XGBModel(XGBModelBase):
         params = self.get_xgb_params()
 
         if callable(self.objective):
-            obj = _objective_decorator(self.objective)
+            obj: Optional[
+                Callable[[np.ndarray, DMatrix], Tuple[np.ndarray, np.ndarray]]
+            ] = _objective_decorator(self.objective)
             params["objective"] = "reg:squarederror"
         else:
             obj = None
@@ -748,13 +787,13 @@ class XGBModel(XGBModelBase):
 
     def predict(
         self,
-        X,
-        output_margin=False,
-        ntree_limit=None,
-        validate_features=True,
-        base_margin=None,
-        iteration_range=None,
-    ):
+        X: array_like,
+        output_margin: bool = False,
+        ntree_limit: Optional[int] = None,
+        validate_features: bool = True,
+        base_margin: Optional[array_like] = None,
+        iteration_range: Optional[Tuple[int, int]] = None,
+    ) -> np.ndarray:
         """
         Predict with `X`.
 
@@ -762,16 +801,16 @@ class XGBModel(XGBModelBase):
 
         Parameters
         ----------
-        X : array_like
+        X :
             Data to predict with.
-        output_margin : bool
+        output_margin :
             Whether to output the raw untransformed margin value.
-        ntree_limit : int
+        ntree_limit :
             Deprecated, use `iteration_range` instead.
-        validate_features : bool
+        validate_features :
             When this is True, validate that the Booster's and data's feature_names are
             identical.  Otherwise, it is assumed that the feature_names are the same.
-        base_margin : array_like
+        base_margin :
             Margin added to prediction.
         iteration_range :
             Specifies which layer of trees are used in prediction.  For example, if a
@@ -782,7 +821,7 @@ class XGBModel(XGBModelBase):
             .. versionadded:: 1.4.0
         Returns
         -------
-        prediction : numpy array
+        prediction
         """
         iteration_range = _convert_ntree_limit(
             self.get_booster(), ntree_limit, iteration_range
@@ -817,7 +856,9 @@ class XGBModel(XGBModelBase):
         )
 
     def apply(
-        self, X, ntree_limit: int = 0, iteration_range: Optional[Tuple[int, int]] = None
+        self, X: array_like,
+        ntree_limit: int = 0,
+        iteration_range: Optional[Tuple[int, int]] = None
     ) -> np.ndarray:
         """Return the predicted leaf every tree for each sample.
 
@@ -850,7 +891,7 @@ class XGBModel(XGBModelBase):
             iteration_range=iteration_range
         )
 
-    def evals_result(self):
+    def evals_result(self) -> TrainingCallback.EvalsLog:
         """Return the evaluation results.
 
         If **eval_set** is passed to the `fit` function, you can call
@@ -883,12 +924,14 @@ class XGBModel(XGBModelBase):
         .. code-block:: python
 
             {'validation_0': {'logloss': ['0.604835', '0.531479']},
-            'validation_1': {'logloss': ['0.41965', '0.17686']}}
+             'validation_1': {'logloss': ['0.41965', '0.17686']}}
         """
-        if self.evals_result_:
+        if getattr(self, "evals_result_", None) is not None:
             evals_result = self.evals_result_
         else:
-            raise XGBoostError('No results.')
+            raise XGBoostError(
+                "No evaluation result, `eval_set` is not used during training."
+            )
 
         return evals_result
 
@@ -919,7 +962,7 @@ class XGBModel(XGBModelBase):
         return int(self._early_stopping_attr('best_ntree_limit'))
 
     @property
-    def feature_importances_(self):
+    def feature_importances_(self) -> np.ndarray:
         """
         Feature importances property
 
@@ -945,14 +988,14 @@ class XGBModel(XGBModelBase):
         else:
             feature_names = b.feature_names
         all_features = [score.get(f, 0.) for f in feature_names]
-        all_features = np.array(all_features, dtype=np.float32)
-        total = all_features.sum()
+        all_features_arr = np.array(all_features, dtype=np.float32)
+        total = all_features_arr.sum()
         if total == 0:
-            return all_features
-        return all_features / total
+            return all_features_arr
+        return all_features_arr / total
 
     @property
-    def coef_(self):
+    def coef_(self) -> np.ndarray:
         """
         Coefficients property
 
@@ -982,7 +1025,7 @@ class XGBModel(XGBModelBase):
         return coef
 
     @property
-    def intercept_(self):
+    def intercept_(self) -> np.ndarray:
         """
         Intercept (bias) property
 
@@ -1004,7 +1047,7 @@ class XGBModel(XGBModelBase):
         return np.array(json.loads(b.get_dump(dump_format='json')[0])['bias'])
 
 
-PredtT = TypeVar("PredtT")
+PredtT = TypeVar("PredtT", bound=np.ndarray)
 
 
 def _cls_predict_proba(n_classes: int, prediction: PredtT, vstack: Callable) -> PredtT:
@@ -1029,28 +1072,34 @@ def _cls_predict_proba(n_classes: int, prediction: PredtT, vstack: Callable) ->
 class XGBClassifier(XGBModel, XGBClassifierBase):
     # pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes
     @_deprecate_positional_args
-    def __init__(self, *, objective="binary:logistic", use_label_encoder=True, **kwargs):
+    def __init__(
+        self,
+        *,
+        objective: _SklObjective = "binary:logistic",
+        use_label_encoder: bool = True,
+        **kwargs: Any
+    ) -> None:
         self.use_label_encoder = use_label_encoder
         super().__init__(objective=objective, **kwargs)
 
     @_deprecate_positional_args
     def fit(
         self,
-        X,
-        y,
+        X: array_like,
+        y: array_like,
         *,
-        sample_weight=None,
-        base_margin=None,
-        eval_set=None,
-        eval_metric=None,
-        early_stopping_rounds=None,
-        verbose=True,
-        xgb_model=None,
-        sample_weight_eval_set=None,
-        base_margin_eval_set=None,
-        feature_weights=None,
-        callbacks=None
-    ):
+        sample_weight: Optional[array_like] = None,
+        base_margin: Optional[array_like] = None,
+        eval_set: Optional[List[Tuple[array_like, array_like]]] = None,
+        eval_metric: Optional[Union[str, List[str], Metric]] = None,
+        early_stopping_rounds: Optional[int] = None,
+        verbose: Optional[bool] = True,
+        xgb_model: Optional[Union[Booster, str, XGBModel]] = None,
+        sample_weight_eval_set: Optional[List[array_like]] = None,
+        base_margin_eval_set: Optional[List[array_like]] = None,
+        feature_weights: Optional[array_like] = None,
+        callbacks: Optional[List[TrainingCallback]] = None
+    ) -> "XGBClassifier":
         # pylint: disable = attribute-defined-outside-init,too-many-statements
         can_use_label_encoder = True
         label_encoding_check_error = (
@@ -1065,7 +1114,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
             "starting with 0, i.e. 0, 1, 2, ..., [num_class - 1]."
         )
 
-        evals_result = {}
+        evals_result: TrainingCallback.EvalsLog = {}
         if _is_cudf_df(y) or _is_cudf_ser(y):
             import cupy as cp  # pylint: disable=E0401
 
@@ -1101,7 +1150,9 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
         params = self.get_xgb_params()
 
         if callable(self.objective):
-            obj = _objective_decorator(self.objective)
+            obj: Optional[
+                Callable[[np.ndarray, DMatrix], Tuple[np.ndarray, np.ndarray]]
+            ] = _objective_decorator(self.objective)
             # Use default value. Is it really not used ?
             params["objective"] = "binary:logistic"
         else:
@@ -1169,19 +1220,20 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
         self._set_evaluation_result(evals_result)
         return self
 
+    assert XGBModel.fit.__doc__ is not None
     fit.__doc__ = XGBModel.fit.__doc__.replace(
         'Fit gradient boosting model',
         'Fit gradient boosting classifier', 1)
 
     def predict(
         self,
-        X,
-        output_margin=False,
-        ntree_limit=None,
-        validate_features=True,
-        base_margin=None,
+        X: array_like,
+        output_margin: bool = False,
+        ntree_limit: Optional[int] = None,
+        validate_features: bool = True,
+        base_margin: Optional[array_like] = None,
         iteration_range: Optional[Tuple[int, int]] = None,
-    ):
+    ) -> np.ndarray:
         class_probs = super().predict(
             X=X,
             output_margin=output_margin,
@@ -1196,7 +1248,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
 
         if len(class_probs.shape) > 1:
             # turns softprob into softmax
-            column_indexes = np.argmax(class_probs, axis=1)
+            column_indexes: np.ndarray = np.argmax(class_probs, axis=1)  # type: ignore
         else:
             # turns soft logit into class label
             column_indexes = np.repeat(0, class_probs.shape[0])
@@ -1208,10 +1260,10 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
 
     def predict_proba(
         self,
-        X,
-        ntree_limit=None,
-        validate_features=False,
-        base_margin=None,
+        X: array_like,
+        ntree_limit: Optional[int] = None,
+        validate_features: bool = False,
+        base_margin: Optional[array_like] = None,
         iteration_range: Optional[Tuple[int, int]] = None,
     ) -> np.ndarray:
         """ Predict the probability of each `X` example being of a given class.
@@ -1237,7 +1289,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
 
         Returns
         -------
-        prediction : numpy array
+        prediction :
             a numpy array of shape array-like of shape (n_samples, n_classes) with the
             probability of each data example being of a given class.
         """
@@ -1259,7 +1311,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
             getattr(self, "n_classes_", None), class_probs, np.vstack
         )
 
-    def evals_result(self):
+    def evals_result(self) -> TrainingCallback.EvalsLog:
         """Return the evaluation results.
 
         If **eval_set** is passed to the `fit` function, you can call
@@ -1315,13 +1367,15 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
 class XGBRFClassifier(XGBClassifier):
     # pylint: disable=missing-docstring
     @_deprecate_positional_args
-    def __init__(self, *,
-                 learning_rate=1,
-                 subsample=0.8,
-                 colsample_bynode=0.8,
-                 reg_lambda=1e-5,
-                 use_label_encoder=True,
-                 **kwargs):
+    def __init__(
+        self, *,
+        learning_rate: float = 1.0,
+        subsample: float = 0.8,
+        colsample_bynode: float = 0.8,
+        reg_lambda: float = 1e-5,
+        use_label_encoder: bool = True,
+        **kwargs: Any
+    ):
         super().__init__(learning_rate=learning_rate,
                          subsample=subsample,
                          colsample_bynode=colsample_bynode,
@@ -1329,12 +1383,12 @@ class XGBRFClassifier(XGBClassifier):
                          use_label_encoder=use_label_encoder,
                          **kwargs)
 
-    def get_xgb_params(self):
+    def get_xgb_params(self) -> Dict[str, Any]:
         params = super().get_xgb_params()
         params['num_parallel_tree'] = self.n_estimators
         return params
 
-    def get_num_boosting_rounds(self):
+    def get_num_boosting_rounds(self) -> int:
         return 1
 
 
@@ -1344,7 +1398,9 @@ class XGBRFClassifier(XGBClassifier):
 class XGBRegressor(XGBModel, XGBRegressorBase):
     # pylint: disable=missing-docstring
     @_deprecate_positional_args
-    def __init__(self, *, objective="reg:squarederror", **kwargs):
+    def __init__(
+        self, *, objective: _SklObjective = "reg:squarederror", **kwargs: Any
+    ) -> None:
         super().__init__(objective=objective, **kwargs)
 
 
@@ -1357,18 +1413,25 @@ class XGBRegressor(XGBModel, XGBRegressorBase):
 class XGBRFRegressor(XGBRegressor):
     # pylint: disable=missing-docstring
     @_deprecate_positional_args
-    def __init__(self, *, learning_rate=1, subsample=0.8, colsample_bynode=0.8,
-                 reg_lambda=1e-5, **kwargs):
+    def __init__(
+        self,
+        *,
+        learning_rate: float = 1.0,
+        subsample: float = 0.8,
+        colsample_bynode: float = 0.8,
+        reg_lambda: float = 1e-5,
+        **kwargs: Any
+    ) -> None:
         super().__init__(learning_rate=learning_rate, subsample=subsample,
                          colsample_bynode=colsample_bynode,
                          reg_lambda=reg_lambda, **kwargs)
 
-    def get_xgb_params(self):
+    def get_xgb_params(self) -> Dict[str, Any]:
         params = super().get_xgb_params()
         params['num_parallel_tree'] = self.n_estimators
         return params
 
-    def get_num_boosting_rounds(self):
+    def get_num_boosting_rounds(self) -> int:
         return 1
 
 
@@ -1416,34 +1479,34 @@ class XGBRFRegressor(XGBRegressor):
 class XGBRanker(XGBModel, XGBRankerMixIn):
     # pylint: disable=missing-docstring,too-many-arguments,invalid-name
     @_deprecate_positional_args
-    def __init__(self, *, objective="rank:pairwise", **kwargs):
+    def __init__(self, *, objective: str = "rank:pairwise", **kwargs: Any):
         super().__init__(objective=objective, **kwargs)
         if callable(self.objective):
             raise ValueError("custom objective function not supported by XGBRanker")
-        if "rank:" not in self.objective:
+        if "rank:" not in objective:
             raise ValueError("please use XGBRanker for ranking task")
 
     @_deprecate_positional_args
     def fit(
         self,
-        X,
-        y,
+        X: array_like,
+        y: array_like,
         *,
-        group=None,
-        qid=None,
-        sample_weight=None,
-        base_margin=None,
-        eval_set=None,
-        eval_group=None,
-        eval_qid=None,
-        eval_metric=None,
-        early_stopping_rounds=None,
-        verbose=False,
+        group: Optional[array_like] = None,
+        qid: Optional[array_like] = None,
+        sample_weight: Optional[array_like] = None,
+        base_margin: Optional[array_like] = None,
+        eval_set: Optional[List[Tuple[array_like, array_like]]] = None,
+        eval_group: Optional[List[array_like]] = None,
+        eval_qid: Optional[List[array_like]] = None,
+        eval_metric: Optional[Union[str, List[str], Metric]] = None,
+        early_stopping_rounds: Optional[int] = None,
+        verbose: Optional[bool] = False,
         xgb_model: Optional[Union[Booster, str, XGBModel]] = None,
-        sample_weight_eval_set=None,
-        base_margin_eval_set=None,
-        feature_weights=None,
-        callbacks=None
+        sample_weight_eval_set: Optional[List[array_like]] = None,
+        base_margin_eval_set: Optional[List[array_like]] = None,
+        feature_weights: Optional[array_like] = None,
+        callbacks: Optional[List[TrainingCallback]] = None
     ) -> "XGBRanker":
         # pylint: disable = attribute-defined-outside-init,arguments-differ
         """Fit gradient boosting ranker
@@ -1454,18 +1517,18 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
 
         Parameters
         ----------
-        X : array_like
+        X :
             Feature matrix
-        y : array_like
+        y :
             Labels
-        group : array_like
+        group :
             Size of each query group of training data. Should have as many elements as the
             query groups in the training data.  If this is set to None, then user must
             provide qid.
-        qid : array_like
+        qid :
             Query ID for each training sample.  Should have the size of n_samples.  If
             this is set to None, then user must provide group.
-        sample_weight : array_like
+        sample_weight :
             Query group weights
 
             .. note:: Weights are per-group for ranking tasks
@@ -1474,24 +1537,24 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
                 data point). This is because we only care about the relative ordering of
                 data points within each group, so it doesn't make sense to assign weights
                 to individual data points.
-        base_margin : array_like
+        base_margin :
             Global bias for each instance.
-        eval_set : list, optional
+        eval_set :
             A list of (X, y) tuple pairs to use as validation sets, for which
             metrics will be computed.
             Validation metrics will help us track the performance of the model.
-        eval_group : list of arrays, optional
+        eval_group :
             A list in which ``eval_group[i]`` is the list containing the sizes of all
             query groups in the ``i``-th pair in **eval_set**.
-        eval_qid : list of array_like, optional
+        eval_qid :
             A list in which ``eval_qid[i]`` is the array containing query ID of ``i``-th
             pair in **eval_set**.
-        eval_metric : str, list of str, optional
+        eval_metric :
             If a str, should be a built-in evaluation metric to use. See
             doc/parameter.rst.
             If a list of str, should be the list of multiple built-in evaluation metrics
             to use. The custom evaluation metric is not yet supported for the ranker.
-        early_stopping_rounds : int
+        early_stopping_rounds :
             Activates early stopping. Validation metric needs to improve at least once in
             every **early_stopping_rounds** round(s) to continue training.  Requires at
             least one item in **eval_set**.
@@ -1502,13 +1565,13 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
             used for early stopping.
             If early stopping occurs, the model will have three additional fields:
             ``clf.best_score``, ``clf.best_iteration`` and ``clf.best_ntree_limit``.
-        verbose : bool
+        verbose :
             If `verbose` and an evaluation set is used, writes the evaluation metric
             measured on the validation set to stderr.
         xgb_model :
             file name of stored XGBoost model or 'Booster' instance XGBoost model to be
             loaded before training (allows training continuation).
-        sample_weight_eval_set : list, optional
+        sample_weight_eval_set :
             A list of the form [L_1, L_2, ..., L_n], where each L_i is a list of
             group weights on the i-th validation set.
 
@@ -1518,15 +1581,15 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
                 data point). This is because we only care about the relative ordering of
                 data points within each group, so it doesn't make sense to assign
                 weights to individual data points.
-        base_margin_eval_set : list, optional
+        base_margin_eval_set :
             A list of the form [M_1, M_2, ..., M_n], where each M_i is an array like
             object storing base margin for the i-th validation set.
-        feature_weights: array_like
+        feature_weights :
             Weight for each feature, defines the probability of each feature being
             selected when colsample is being used.  All values must be greater than 0,
             otherwise a `ValueError` is thrown.  Only available for `hist`, `gpu_hist` and
             `exact` tree methods.
-        callbacks : list of callback functions
+        callbacks :
             List of callback functions that are applied at end of each
             iteration.  It is possible to use predefined callbacks by using
             :ref:`callback_api`.  Example:
@@ -1562,7 +1625,7 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
             create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs),
         )
 
-        evals_result = {}
+        evals_result: TrainingCallback.EvalsLog = {}
         params = self.get_xgb_params()
 
         model, feval, params = self._configure_fit(xgb_model, eval_metric, params)
diff --git a/tests/python-gpu/test_gpu_with_dask.py b/tests/python-gpu/test_gpu_with_dask.py
index cfdd9db12..38895062a 100644
--- a/tests/python-gpu/test_gpu_with_dask.py
+++ b/tests/python-gpu/test_gpu_with_dask.py
@@ -269,7 +269,9 @@ class TestDistributedGPU:
     @pytest.mark.skipif(**tm.no_dask())
     @pytest.mark.skipif(**tm.no_dask_cuda())
     @pytest.mark.parametrize("model", ["boosting"])
-    def test_dask_classifier(self, model, local_cuda_cluster: LocalCUDACluster) -> None:
+    def test_dask_classifier(
+        self, model: str, local_cuda_cluster: LocalCUDACluster
+    ) -> None:
         import dask_cudf
         with Client(local_cuda_cluster) as client:
             X_, y_, w_ = generate_array(with_weights=True)
diff --git a/tests/python/test_interaction_constraints.py b/tests/python/test_interaction_constraints.py
index 6c3a442a6..c582614c8 100644
--- a/tests/python/test_interaction_constraints.py
+++ b/tests/python/test_interaction_constraints.py
@@ -60,25 +60,25 @@ class TestInteractionConstraints:
     def test_interaction_constraints_feature_names(self):
         with pytest.raises(ValueError):
             constraints = [('feature_0', 'feature_1')]
-            self.run_interaction_constraints(tree_method='exact', 
+            self.run_interaction_constraints(tree_method='exact',
                                              interaction_constraints=constraints)
 
         with pytest.raises(ValueError):
             constraints = [('feature_0', 'feature_3')]
             feature_names = ['feature_0', 'feature_1', 'feature_2']
-            self.run_interaction_constraints(tree_method='exact', 
-                                             feature_names=feature_names, 
+            self.run_interaction_constraints(tree_method='exact',
+                                             feature_names=feature_names,
                                              interaction_constraints=constraints)
 
-    
         constraints = [('feature_0', 'feature_1')]
         feature_names = ['feature_0', 'feature_1', 'feature_2']
-        self.run_interaction_constraints(tree_method='exact', 
-                                         feature_names=feature_names, 
+        self.run_interaction_constraints(tree_method='exact',
+                                         feature_names=feature_names,
                                          interaction_constraints=constraints)
 
     @pytest.mark.skipif(**tm.no_sklearn())
     def training_accuracy(self, tree_method):
+        """Test accuracy, reused by GPU tests."""
         from sklearn.metrics import accuracy_score
         dtrain = xgboost.DMatrix(dpath + 'agaricus.txt.train?indexing_mode=1')
         dtest = xgboost.DMatrix(dpath + 'agaricus.txt.test?indexing_mode=1')
@@ -101,11 +101,6 @@ class TestInteractionConstraints:
         pred_dtest = (bst.predict(dtest) < 0.5)
         assert accuracy_score(dtest.get_label(), pred_dtest) < 0.1
 
-    def test_hist_training_accuracy(self):
-        self.training_accuracy(tree_method='hist')
-
-    def test_exact_training_accuracy(self):
-        self.training_accuracy(tree_method='exact')
-
-    def test_approx_training_accuracy(self):
-        self.training_accuracy(tree_method='approx')
+    @pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"])
+    def test_hist_training_accuracy(self, tree_method):
+        self.training_accuracy(tree_method=tree_method)
diff --git a/tests/python/test_monotone_constraints.py b/tests/python/test_monotone_constraints.py
index 8e29a53fa..21fba7348 100644
--- a/tests/python/test_monotone_constraints.py
+++ b/tests/python/test_monotone_constraints.py
@@ -22,14 +22,14 @@ def is_correctly_constrained(learner, feature_names=None):
     for i in range(n):
         fixed_x = fixed_xs_values[i] * np.ones((n, 1))
         monotonically_increasing_x = np.column_stack((variable_x, fixed_x))
-        monotonically_increasing_dset = xgb.DMatrix(monotonically_increasing_x, 
+        monotonically_increasing_dset = xgb.DMatrix(monotonically_increasing_x,
                                                     feature_names=feature_names)
         monotonically_increasing_y = learner.predict(
             monotonically_increasing_dset
         )
 
         monotonically_decreasing_x = np.column_stack((fixed_x, variable_x))
-        monotonically_decreasing_dset = xgb.DMatrix(monotonically_decreasing_x, 
+        monotonically_decreasing_dset = xgb.DMatrix(monotonically_decreasing_x,
                                                     feature_names=feature_names)
         monotonically_decreasing_y = learner.predict(
             monotonically_decreasing_dset
@@ -105,7 +105,7 @@ class TestMonotoneConstraints:
 
     @pytest.mark.parametrize('format', [dict, list])
     def test_monotone_constraints_feature_names(self, format):
-        
+
         # next check monotonicity when initializing monotone_constraints by feature names
         params = {
             'tree_method': 'hist', 'verbosity': 1,
@@ -119,13 +119,13 @@ class TestMonotoneConstraints:
         with pytest.raises(ValueError):
             xgb.train(params, training_dset)
 
-        feature_names =[ 'feature_0', 'feature_2']
+        feature_names = ['feature_0', 'feature_2']
         training_dset_w_feature_names = xgb.DMatrix(x, label=y, feature_names=feature_names)
 
         with pytest.raises(ValueError):
             xgb.train(params, training_dset_w_feature_names)
-        
-        feature_names =[ 'feature_0', 'feature_1']
+
+        feature_names = ['feature_0', 'feature_1']
         training_dset_w_feature_names = xgb.DMatrix(x, label=y, feature_names=feature_names)
 
         constrained_learner = xgb.train(