Require black formatter for the python package. (#8748)

2023-02-07 01:53:33 +08:00
parent a2e433a089
commit 0f37a01dd9
10 changed files with 707 additions and 574 deletions
--- a/python-package/xgboost/collective.py
+++ b/python-package/xgboost/collective.py
@@ -152,42 +152,52 @@ def broadcast(data: _T, root: int) -> _T:
    rank = get_rank()
    length = ctypes.c_ulong()
    if root == rank:
-        assert data is not None, 'need to pass in data when broadcasting'
+        assert data is not None, "need to pass in data when broadcasting"
        s = pickle.dumps(data, protocol=pickle.HIGHEST_PROTOCOL)
        length.value = len(s)
    # run first broadcast
-    _check_call(_LIB.XGCommunicatorBroadcast(ctypes.byref(length),
-                                             ctypes.sizeof(ctypes.c_ulong), root))
+    _check_call(
+        _LIB.XGCommunicatorBroadcast(
+            ctypes.byref(length), ctypes.sizeof(ctypes.c_ulong), root
+        )
+    )
    if root != rank:
        dptr = (ctypes.c_char * length.value)()
        # run second
-        _check_call(_LIB.XGCommunicatorBroadcast(ctypes.cast(dptr, ctypes.c_void_p),
-                                                 length.value, root))
+        _check_call(
+            _LIB.XGCommunicatorBroadcast(
+                ctypes.cast(dptr, ctypes.c_void_p), length.value, root
+            )
+        )
        data = pickle.loads(dptr.raw)
        del dptr
    else:
-        _check_call(_LIB.XGCommunicatorBroadcast(ctypes.cast(ctypes.c_char_p(s), ctypes.c_void_p),
-                                                 length.value, root))
+        _check_call(
+            _LIB.XGCommunicatorBroadcast(
+                ctypes.cast(ctypes.c_char_p(s), ctypes.c_void_p), length.value, root
+            )
+        )
        del s
    return data


 # enumeration of dtypes
 DTYPE_ENUM__ = {
-    np.dtype('int8'): 0,
-    np.dtype('uint8'): 1,
-    np.dtype('int32'): 2,
-    np.dtype('uint32'): 3,
-    np.dtype('int64'): 4,
-    np.dtype('uint64'): 5,
-    np.dtype('float32'): 6,
-    np.dtype('float64'): 7
+    np.dtype("int8"): 0,
+    np.dtype("uint8"): 1,
+    np.dtype("int32"): 2,
+    np.dtype("uint32"): 3,
+    np.dtype("int64"): 4,
+    np.dtype("uint64"): 5,
+    np.dtype("float32"): 6,
+    np.dtype("float64"): 7,
 }


@unique
 class Op(IntEnum):
    """Supported operations for allreduce."""
+
    MAX = 0
    MIN = 1
    SUM = 2
@@ -196,9 +206,7 @@ class Op(IntEnum):
    BITWISE_XOR = 5


-def allreduce(  # pylint:disable=invalid-name
-        data: np.ndarray, op: Op
-) -> np.ndarray:
+def allreduce(data: np.ndarray, op: Op) -> np.ndarray:  # pylint:disable=invalid-name
    """Perform allreduce, return the result.

    Parameters
@@ -218,15 +226,22 @@ def allreduce(  # pylint:disable=invalid-name
    This function is not thread-safe.
    """
    if not isinstance(data, np.ndarray):
-        raise TypeError('allreduce only takes in numpy.ndarray')
+        raise TypeError("allreduce only takes in numpy.ndarray")
    buf = data.ravel()
    if buf.base is data.base:
        buf = buf.copy()
    if buf.dtype not in DTYPE_ENUM__:
        raise Exception(f"data type {buf.dtype} not supported")
-    _check_call(_LIB.XGCommunicatorAllreduce(buf.ctypes.data_as(ctypes.c_void_p),
-                                             buf.size, DTYPE_ENUM__[buf.dtype],
-                                             int(op), None, None))
+    _check_call(
+        _LIB.XGCommunicatorAllreduce(
+            buf.ctypes.data_as(ctypes.c_void_p),
+            buf.size,
+            DTYPE_ENUM__[buf.dtype],
+            int(op),
+            None,
+            None,
+        )
+    )
    return buf


--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
--- a/python-package/xgboost/data.py
+++ b/python-package/xgboost/data.py
@@ -1,6 +1,6 @@
 # pylint: disable=too-many-arguments, too-many-branches, too-many-lines
 # pylint: disable=too-many-return-statements, import-error
-'''Data dispatching for DMatrix.'''
+"""Data dispatching for DMatrix."""
 import ctypes
 import json
 import os
@@ -108,6 +108,7 @@ def _from_scipy_csr(
    feature_types: Optional[FeatureTypes],
 ) -> DispatchedDataBackendReturnType:
    """Initialize data from a CSR matrix."""
+
    handle = ctypes.c_void_p()
    data = transform_scipy_sparse(data, True)
    _check_call(
@@ -178,8 +179,7 @@ def _ensure_np_dtype(


 def _maybe_np_slice(data: DataType, dtype: Optional[NumpyDType]) -> np.ndarray:
-    '''Handle numpy slice.  This can be removed if we use __array_interface__.
-    '''
+    """Handle numpy slice.  This can be removed if we use __array_interface__."""
    try:
        if not data.flags.c_contiguous:
            data = np.array(data, copy=True, dtype=dtype)
@@ -653,6 +653,7 @@ def _is_arrow(data: DataType) -> bool:
    try:
        import pyarrow as pa
        from pyarrow import dataset as arrow_dataset
+
        return isinstance(data, (pa.Table, arrow_dataset.Dataset))
    except ImportError:
        return False
@@ -878,8 +879,8 @@ def _is_cupy_array(data: DataType) -> bool:

 def _transform_cupy_array(data: DataType) -> CupyT:
    import cupy  # pylint: disable=import-error
-    if not hasattr(data, '__cuda_array_interface__') and hasattr(
-            data, '__array__'):
+
+    if not hasattr(data, "__cuda_array_interface__") and hasattr(data, "__array__"):
        data = cupy.array(data, copy=False)
    if data.dtype.hasobject or data.dtype in [cupy.float16, cupy.bool_]:
        data = data.astype(cupy.float32, copy=False)
@@ -900,9 +901,9 @@ def _from_cupy_array(
    config = bytes(json.dumps({"missing": missing, "nthread": nthread}), "utf-8")
    _check_call(
        _LIB.XGDMatrixCreateFromCudaArrayInterface(
-            interface_str,
-            config,
-            ctypes.byref(handle)))
+            interface_str, config, ctypes.byref(handle)
+        )
+    )
    return handle, feature_names, feature_types


@@ -923,12 +924,13 @@ def _is_cupy_csc(data: DataType) -> bool:


 def _is_dlpack(data: DataType) -> bool:
-    return 'PyCapsule' in str(type(data)) and "dltensor" in str(data)
+    return "PyCapsule" in str(type(data)) and "dltensor" in str(data)


 def _transform_dlpack(data: DataType) -> bool:
    from cupy import fromDlpack  # pylint: disable=E0401
-    assert 'used_dltensor' not in str(data)
+
+    assert "used_dltensor" not in str(data)
    data = fromDlpack(data)
    return data

@@ -941,8 +943,7 @@ def _from_dlpack(
    feature_types: Optional[FeatureTypes],
 ) -> DispatchedDataBackendReturnType:
    data = _transform_dlpack(data)
-    return _from_cupy_array(data, missing, nthread, feature_names,
-                            feature_types)
+    return _from_cupy_array(data, missing, nthread, feature_names, feature_types)


 def _is_uri(data: DataType) -> bool:
@@ -1003,13 +1004,13 @@ def _is_iter(data: DataType) -> bool:


 def _has_array_protocol(data: DataType) -> bool:
-    return hasattr(data, '__array__')
+    return hasattr(data, "__array__")


 def _convert_unknown_data(data: DataType) -> DataType:
    warnings.warn(
-        f'Unknown data type: {type(data)}, trying to convert it to csr_matrix',
-        UserWarning
+        f"Unknown data type: {type(data)}, trying to convert it to csr_matrix",
+        UserWarning,
    )
    try:
        import scipy.sparse
@@ -1018,7 +1019,7 @@ def _convert_unknown_data(data: DataType) -> DataType:

    try:
        data = scipy.sparse.csr_matrix(data)
-    except Exception:           # pylint: disable=broad-except
+    except Exception:  # pylint: disable=broad-except
        return None

    return data
@@ -1033,7 +1034,7 @@ def dispatch_data_backend(
    enable_categorical: bool = False,
    data_split_mode: DataSplitMode = DataSplitMode.ROW,
 ) -> DispatchedDataBackendReturnType:
-    '''Dispatch data for DMatrix.'''
+    """Dispatch data for DMatrix."""
    if not _is_cudf_ser(data) and not _is_pandas_series(data):
        _check_data_shape(data)
    if _is_scipy_csr(data):
@@ -1054,6 +1055,7 @@ def dispatch_data_backend(
        return _from_tuple(data, missing, threads, feature_names, feature_types)
    if _is_pandas_series(data):
        import pandas as pd
+
        data = pd.DataFrame(data)
    if _is_pandas_df(data):
        return _from_pandas_df(
@@ -1064,39 +1066,41 @@ def dispatch_data_backend(
            data, missing, threads, feature_names, feature_types, enable_categorical
        )
    if _is_cupy_array(data):
-        return _from_cupy_array(data, missing, threads, feature_names,
-                                feature_types)
+        return _from_cupy_array(data, missing, threads, feature_names, feature_types)
    if _is_cupy_csr(data):
-        raise TypeError('cupyx CSR is not supported yet.')
+        raise TypeError("cupyx CSR is not supported yet.")
    if _is_cupy_csc(data):
-        raise TypeError('cupyx CSC is not supported yet.')
+        raise TypeError("cupyx CSC is not supported yet.")
    if _is_dlpack(data):
-        return _from_dlpack(data, missing, threads, feature_names,
-                            feature_types)
+        return _from_dlpack(data, missing, threads, feature_names, feature_types)
    if _is_dt_df(data):
        _warn_unused_missing(data, missing)
        return _from_dt_df(
            data, missing, threads, feature_names, feature_types, enable_categorical
        )
    if _is_modin_df(data):
-        return _from_pandas_df(data, enable_categorical, missing, threads,
-                               feature_names, feature_types)
+        return _from_pandas_df(
+            data, enable_categorical, missing, threads, feature_names, feature_types
+        )
    if _is_modin_series(data):
        return _from_pandas_series(
            data, missing, threads, enable_categorical, feature_names, feature_types
        )
    if _is_arrow(data):
        return _from_arrow(
-            data, missing, threads, feature_names, feature_types, enable_categorical)
+            data, missing, threads, feature_names, feature_types, enable_categorical
+        )
    if _has_array_protocol(data):
        array = np.asarray(data)
        return _from_numpy_array(array, missing, threads, feature_names, feature_types)

    converted = _convert_unknown_data(data)
    if converted is not None:
-        return _from_scipy_csr(converted, missing, threads, feature_names, feature_types)
+        return _from_scipy_csr(
+            converted, missing, threads, feature_names, feature_types
+        )

-    raise TypeError('Not supported type for data.' + str(type(data)))
+    raise TypeError("Not supported type for data." + str(type(data)))


 def _validate_meta_shape(data: DataType, name: str) -> None:
@@ -1128,20 +1132,14 @@ def _meta_from_numpy(


 def _meta_from_list(
-    data: Sequence,
-    field: str,
-    dtype: Optional[NumpyDType],
-    handle: ctypes.c_void_p
+    data: Sequence, field: str, dtype: Optional[NumpyDType], handle: ctypes.c_void_p
 ) -> None:
    data_np = np.array(data)
    _meta_from_numpy(data_np, field, dtype, handle)


 def _meta_from_tuple(
-    data: Sequence,
-    field: str,
-    dtype: Optional[NumpyDType],
-    handle: ctypes.c_void_p
+    data: Sequence, field: str, dtype: Optional[NumpyDType], handle: ctypes.c_void_p
 ) -> None:
    return _meta_from_list(data, field, dtype, handle)

@@ -1156,39 +1154,27 @@ def _meta_from_cudf_df(data: DataType, field: str, handle: ctypes.c_void_p) -> N


 def _meta_from_cudf_series(data: DataType, field: str, handle: ctypes.c_void_p) -> None:
-    interface = bytes(json.dumps([data.__cuda_array_interface__],
-                                 indent=2), 'utf-8')
-    _check_call(_LIB.XGDMatrixSetInfoFromInterface(handle,
-                                                   c_str(field),
-                                                   interface))
+    interface = bytes(json.dumps([data.__cuda_array_interface__], indent=2), "utf-8")
+    _check_call(_LIB.XGDMatrixSetInfoFromInterface(handle, c_str(field), interface))


 def _meta_from_cupy_array(data: DataType, field: str, handle: ctypes.c_void_p) -> None:
    data = _transform_cupy_array(data)
-    interface = bytes(json.dumps([data.__cuda_array_interface__],
-                                 indent=2), 'utf-8')
-    _check_call(_LIB.XGDMatrixSetInfoFromInterface(handle,
-                                                   c_str(field),
-                                                   interface))
+    interface = bytes(json.dumps([data.__cuda_array_interface__], indent=2), "utf-8")
+    _check_call(_LIB.XGDMatrixSetInfoFromInterface(handle, c_str(field), interface))


 def _meta_from_dt(
-    data: DataType,
-    field: str,
-    dtype: Optional[NumpyDType],
-    handle: ctypes.c_void_p
+    data: DataType, field: str, dtype: Optional[NumpyDType], handle: ctypes.c_void_p
 ) -> None:
    data, _, _ = _transform_dt_df(data, None, None, field, dtype)
    _meta_from_numpy(data, field, dtype, handle)


 def dispatch_meta_backend(
-    matrix: DMatrix,
-    data: DataType,
-    name: str,
-    dtype: Optional[NumpyDType] = None
+    matrix: DMatrix, data: DataType, name: str, dtype: Optional[NumpyDType] = None
 ) -> None:
-    '''Dispatch for meta info.'''
+    """Dispatch for meta info."""
    handle = matrix.handle
    assert handle is not None
    _validate_meta_shape(data, name)
@@ -1231,7 +1217,7 @@ def dispatch_meta_backend(
        _meta_from_numpy(data, name, dtype, handle)
        return
    if _is_modin_series(data):
-        data = data.values.astype('float')
+        data = data.values.astype("float")
        assert len(data.shape) == 1 or data.shape[1] == 0 or data.shape[1] == 1
        _meta_from_numpy(data, name, dtype, handle)
        return
@@ -1240,19 +1226,20 @@ def dispatch_meta_backend(
        array = np.asarray(data)
        _meta_from_numpy(array, name, dtype, handle)
        return
-    raise TypeError('Unsupported type for ' + name, str(type(data)))
+    raise TypeError("Unsupported type for " + name, str(type(data)))


 class SingleBatchInternalIter(DataIter):  # pylint: disable=R0902
-    '''An iterator for single batch data to help creating device DMatrix.
+    """An iterator for single batch data to help creating device DMatrix.
    Transforming input directly to histogram with normal single batch data API
    can not access weight for sketching.  So this iterator acts as a staging
    area for meta info.

-    '''
+    """
+
    def __init__(self, **kwargs: Any) -> None:
        self.kwargs = kwargs
-        self.it = 0             # pylint: disable=invalid-name
+        self.it = 0  # pylint: disable=invalid-name

        # This does not necessarily increase memory usage as the data transformation
        # might use memory.
--- a/python-package/xgboost/libpath.py
+++ b/python-package/xgboost/libpath.py
@@ -22,45 +22,51 @@ def find_lib_path() -> List[str]:
    curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
    dll_path = [
        # normal, after installation `lib` is copied into Python package tree.
-        os.path.join(curr_path, 'lib'),
+        os.path.join(curr_path, "lib"),
        # editable installation, no copying is performed.
-        os.path.join(curr_path, os.path.pardir, os.path.pardir, 'lib'),
+        os.path.join(curr_path, os.path.pardir, os.path.pardir, "lib"),
        # use libxgboost from a system prefix, if available.  This should be the last
        # option.
-        os.path.join(sys.prefix, 'lib'),
+        os.path.join(sys.prefix, "lib"),
    ]

-    if sys.platform == 'win32':
-        if platform.architecture()[0] == '64bit':
-            dll_path.append(
-                os.path.join(curr_path, '../../windows/x64/Release/'))
+    if sys.platform == "win32":
+        if platform.architecture()[0] == "64bit":
+            dll_path.append(os.path.join(curr_path, "../../windows/x64/Release/"))
            # hack for pip installation when copy all parent source
            # directory here
-            dll_path.append(os.path.join(curr_path, './windows/x64/Release/'))
+            dll_path.append(os.path.join(curr_path, "./windows/x64/Release/"))
        else:
-            dll_path.append(os.path.join(curr_path, '../../windows/Release/'))
+            dll_path.append(os.path.join(curr_path, "../../windows/Release/"))
            # hack for pip installation when copy all parent source
            # directory here
-            dll_path.append(os.path.join(curr_path, './windows/Release/'))
-        dll_path = [os.path.join(p, 'xgboost.dll') for p in dll_path]
-    elif sys.platform.startswith(('linux', 'freebsd', 'emscripten')):
-        dll_path = [os.path.join(p, 'libxgboost.so') for p in dll_path]
-    elif sys.platform == 'darwin':
-        dll_path = [os.path.join(p, 'libxgboost.dylib') for p in dll_path]
-    elif sys.platform == 'cygwin':
-        dll_path = [os.path.join(p, 'cygxgboost.dll') for p in dll_path]
-    if platform.system() == 'OS400':
-        dll_path = [os.path.join(p, 'libxgboost.so') for p in dll_path]
+            dll_path.append(os.path.join(curr_path, "./windows/Release/"))
+        dll_path = [os.path.join(p, "xgboost.dll") for p in dll_path]
+    elif sys.platform.startswith(("linux", "freebsd", "emscripten")):
+        dll_path = [os.path.join(p, "libxgboost.so") for p in dll_path]
+    elif sys.platform == "darwin":
+        dll_path = [os.path.join(p, "libxgboost.dylib") for p in dll_path]
+    elif sys.platform == "cygwin":
+        dll_path = [os.path.join(p, "cygxgboost.dll") for p in dll_path]
+    if platform.system() == "OS400":
+        dll_path = [os.path.join(p, "libxgboost.so") for p in dll_path]

    lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]

    # XGBOOST_BUILD_DOC is defined by sphinx conf.
-    if not lib_path and not os.environ.get('XGBOOST_BUILD_DOC', False):
-        link = 'https://xgboost.readthedocs.io/en/latest/build.html'
-        msg = 'Cannot find XGBoost Library in the candidate path.  ' + \
-            'List of candidates:\n- ' + ('\n- '.join(dll_path)) + \
-            '\nXGBoost Python package path: ' + curr_path + \
-            '\nsys.prefix: ' + sys.prefix + \
-            '\nSee: ' + link + ' for installing XGBoost.'
+    if not lib_path and not os.environ.get("XGBOOST_BUILD_DOC", False):
+        link = "https://xgboost.readthedocs.io/en/latest/build.html"
+        msg = (
+            "Cannot find XGBoost Library in the candidate path.  "
+            + "List of candidates:\n- "
+            + ("\n- ".join(dll_path))
+            + "\nXGBoost Python package path: "
+            + curr_path
+            + "\nsys.prefix: "
+            + sys.prefix
+            + "\nSee: "
+            + link
+            + " for installing XGBoost."
+        )
        raise XGBoostLibraryNotFound(msg)
    return lib_path
--- a/python-package/xgboost/plotting.py
+++ b/python-package/xgboost/plotting.py
@@ -81,22 +81,24 @@ def plot_importance(
    try:
        import matplotlib.pyplot as plt
    except ImportError as e:
-        raise ImportError('You must install matplotlib to plot importance') from e
+        raise ImportError("You must install matplotlib to plot importance") from e

    if isinstance(booster, XGBModel):
        importance = booster.get_booster().get_score(
-            importance_type=importance_type, fmap=fmap)
+            importance_type=importance_type, fmap=fmap
+        )
    elif isinstance(booster, Booster):
        importance = booster.get_score(importance_type=importance_type, fmap=fmap)
    elif isinstance(booster, dict):
        importance = booster
    else:
-        raise ValueError('tree must be Booster, XGBModel or dict instance')
+        raise ValueError("tree must be Booster, XGBModel or dict instance")

    if not importance:
        raise ValueError(
-            'Booster.get_score() results in empty.  ' +
-            'This maybe caused by having all trees as decision dumps.')
+            "Booster.get_score() results in empty.  "
+            + "This maybe caused by having all trees as decision dumps."
+        )

    tuples = [(k, importance[k]) for k in importance]
    if max_num_features is not None:
@@ -110,25 +112,25 @@ def plot_importance(
        _, ax = plt.subplots(1, 1)

    ylocs = np.arange(len(values))
-    ax.barh(ylocs, values, align='center', height=height, **kwargs)
+    ax.barh(ylocs, values, align="center", height=height, **kwargs)

    if show_values is True:
        for x, y in zip(values, ylocs):
-            ax.text(x + 1, y, values_format.format(v=x), va='center')
+            ax.text(x + 1, y, values_format.format(v=x), va="center")

    ax.set_yticks(ylocs)
    ax.set_yticklabels(labels)

    if xlim is not None:
        if not isinstance(xlim, tuple) or len(xlim) != 2:
-            raise ValueError('xlim must be a tuple of 2 elements')
+            raise ValueError("xlim must be a tuple of 2 elements")
    else:
        xlim = (0, max(values) * 1.1)
    ax.set_xlim(xlim)

    if ylim is not None:
        if not isinstance(ylim, tuple) or len(ylim) != 2:
-            raise ValueError('ylim must be a tuple of 2 elements')
+            raise ValueError("ylim must be a tuple of 2 elements")
    else:
        ylim = (-1, len(values))
    ax.set_ylim(ylim)
@@ -201,44 +203,42 @@ def to_graphviz(
    try:
        from graphviz import Source
    except ImportError as e:
-        raise ImportError('You must install graphviz to plot tree') from e
+        raise ImportError("You must install graphviz to plot tree") from e
    if isinstance(booster, XGBModel):
        booster = booster.get_booster()

    # squash everything back into kwargs again for compatibility
-    parameters = 'dot'
+    parameters = "dot"
    extra = {}
    for key, value in kwargs.items():
        extra[key] = value

    if rankdir is not None:
-        kwargs['graph_attrs'] = {}
-        kwargs['graph_attrs']['rankdir'] = rankdir
+        kwargs["graph_attrs"] = {}
+        kwargs["graph_attrs"]["rankdir"] = rankdir
    for key, value in extra.items():
        if kwargs.get("graph_attrs", None) is not None:
-            kwargs['graph_attrs'][key] = value
+            kwargs["graph_attrs"][key] = value
        else:
-            kwargs['graph_attrs'] = {}
+            kwargs["graph_attrs"] = {}
        del kwargs[key]

    if yes_color is not None or no_color is not None:
-        kwargs['edge'] = {}
+        kwargs["edge"] = {}
    if yes_color is not None:
-        kwargs['edge']['yes_color'] = yes_color
+        kwargs["edge"]["yes_color"] = yes_color
    if no_color is not None:
-        kwargs['edge']['no_color'] = no_color
+        kwargs["edge"]["no_color"] = no_color

    if condition_node_params is not None:
-        kwargs['condition_node_params'] = condition_node_params
+        kwargs["condition_node_params"] = condition_node_params
    if leaf_node_params is not None:
-        kwargs['leaf_node_params'] = leaf_node_params
+        kwargs["leaf_node_params"] = leaf_node_params

    if kwargs:
-        parameters += ':'
+        parameters += ":"
        parameters += json.dumps(kwargs)
-    tree = booster.get_dump(
-        fmap=fmap,
-        dump_format=parameters)[num_trees]
+    tree = booster.get_dump(fmap=fmap, dump_format=parameters)[num_trees]
    g = Source(tree)
    return g

@@ -277,19 +277,18 @@ def plot_tree(
        from matplotlib import image
        from matplotlib import pyplot as plt
    except ImportError as e:
-        raise ImportError('You must install matplotlib to plot tree') from e
+        raise ImportError("You must install matplotlib to plot tree") from e

    if ax is None:
        _, ax = plt.subplots(1, 1)

-    g = to_graphviz(booster, fmap=fmap, num_trees=num_trees, rankdir=rankdir,
-                    **kwargs)
+    g = to_graphviz(booster, fmap=fmap, num_trees=num_trees, rankdir=rankdir, **kwargs)

    s = BytesIO()
-    s.write(g.pipe(format='png'))
+    s.write(g.pipe(format="png"))
    s.seek(0)
    img = image.imread(s)

    ax.imshow(img)
-    ax.axis('off')
+    ax.axis("off")
    return ax
--- a/python-package/xgboost/rabit.py
+++ b/python-package/xgboost/rabit.py
@@ -24,7 +24,7 @@ def init(args: Optional[List[bytes]] = None) -> None:
    parsed = {}
    if args:
        for arg in args:
-            kv = arg.decode().split('=')
+            kv = arg.decode().split("=")
            if len(kv) == 2:
                parsed[kv[0]] = kv[1]
    collective.init(**parsed)
@@ -104,6 +104,7 @@ def broadcast(data: T, root: int) -> T:
@unique
 class Op(IntEnum):
    """Supported operations for rabit."""
+
    MAX = 0
    MIN = 1
    SUM = 2
@@ -111,7 +112,7 @@ class Op(IntEnum):


 def allreduce(  # pylint:disable=invalid-name
-        data: np.ndarray, op: Op, prepare_fun: Optional[Callable[[np.ndarray], None]] = None
+    data: np.ndarray, op: Op, prepare_fun: Optional[Callable[[np.ndarray], None]] = None
 ) -> np.ndarray:
    """Perform allreduce, return the result.
    Parameters
--- a/python-package/xgboost/tracker.py
+++ b/python-package/xgboost/tracker.py
@@ -53,7 +53,7 @@ class ExSocket:


 # magic number used to verify existence of data
-MAGIC_NUM = 0xff99
+MAGIC_NUM = 0xFF99


 def get_some_ip(host: str) -> str:
@@ -334,19 +334,19 @@ class RabitTracker:
        while len(shutdown) != n_workers:
            fd, s_addr = self.sock.accept()
            s = WorkerEntry(fd, s_addr)
-            if s.cmd == 'print':
+            if s.cmd == "print":
                s.print(self._use_logger)
                continue
-            if s.cmd == 'shutdown':
+            if s.cmd == "shutdown":
                assert s.rank >= 0 and s.rank not in shutdown
                assert s.rank not in wait_conn
                shutdown[s.rank] = s
-                logging.debug('Received %s signal from %d', s.cmd, s.rank)
+                logging.debug("Received %s signal from %d", s.cmd, s.rank)
                continue
            assert s.cmd in ("start", "recover")
            # lazily initialize the workers
            if tree_map is None:
-                assert s.cmd == 'start'
+                assert s.cmd == "start"
                if s.world_size > 0:
                    n_workers = s.world_size
                tree_map, parent_map, ring_map = self.get_link_map(n_workers)
@@ -354,7 +354,7 @@ class RabitTracker:
                todo_nodes = list(range(n_workers))
            else:
                assert s.world_size in (-1, n_workers)
-            if s.cmd == 'recover':
+            if s.cmd == "recover":
                assert s.rank >= 0

            rank = s.decide_rank(job_map)
@@ -410,24 +410,25 @@ def get_host_ip(host_ip: Optional[str] = None) -> str:
    returned as it's

    """
-    if host_ip is None or host_ip == 'auto':
-        host_ip = 'ip'
+    if host_ip is None or host_ip == "auto":
+        host_ip = "ip"

-    if host_ip == 'dns':
+    if host_ip == "dns":
        host_ip = socket.getfqdn()
-    elif host_ip == 'ip':
+    elif host_ip == "ip":
        from socket import gaierror
+
        try:
            host_ip = socket.gethostbyname(socket.getfqdn())
        except gaierror:
            logging.debug(
-                'gethostbyname(socket.getfqdn()) failed... trying on hostname()'
+                "gethostbyname(socket.getfqdn()) failed... trying on hostname()"
            )
            host_ip = socket.gethostbyname(socket.gethostname())
        if host_ip.startswith("127."):
            s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
            # doesn't have to be reachable
-            s.connect(('10.255.255.255', 1))
+            s.connect(("10.255.255.255", 1))
            host_ip = s.getsockname()[0]

    assert host_ip is not None
@@ -458,25 +459,41 @@ def start_rabit_tracker(args: argparse.Namespace) -> None:

 def main() -> None:
    """Main function if tracker is executed in standalone mode."""
-    parser = argparse.ArgumentParser(description='Rabit Tracker start.')
-    parser.add_argument('--num-workers', required=True, type=int,
-                        help='Number of worker process to be launched.')
+    parser = argparse.ArgumentParser(description="Rabit Tracker start.")
    parser.add_argument(
-        '--num-servers', default=0, type=int,
-        help='Number of server process to be launched. Only used in PS jobs.'
+        "--num-workers",
+        required=True,
+        type=int,
+        help="Number of worker process to be launched.",
+    )
+    parser.add_argument(
+        "--num-servers",
+        default=0,
+        type=int,
+        help="Number of server process to be launched. Only used in PS jobs.",
+    )
+    parser.add_argument(
+        "--host-ip",
+        default=None,
+        type=str,
+        help=(
+            "Host IP addressed, this is only needed "
+            + "if the host IP cannot be automatically guessed."
+        ),
+    )
+    parser.add_argument(
+        "--log-level",
+        default="INFO",
+        type=str,
+        choices=["INFO", "DEBUG"],
+        help="Logging level of the logger.",
    )
-    parser.add_argument('--host-ip', default=None, type=str,
-                        help=('Host IP addressed, this is only needed ' +
-                              'if the host IP cannot be automatically guessed.'))
-    parser.add_argument('--log-level', default='INFO', type=str,
-                        choices=['INFO', 'DEBUG'],
-                        help='Logging level of the logger.')
    args = parser.parse_args()

-    fmt = '%(asctime)s %(levelname)s %(message)s'
-    if args.log_level == 'INFO':
+    fmt = "%(asctime)s %(levelname)s %(message)s"
+    if args.log_level == "INFO":
        level = logging.INFO
-    elif args.log_level == 'DEBUG':
+    elif args.log_level == "DEBUG":
        level = logging.DEBUG
    else:
        raise RuntimeError(f"Unknown logging level {args.log_level}")
--- a/python-package/xgboost/training.py
+++ b/python-package/xgboost/training.py
@@ -205,25 +205,29 @@ def train(


 class CVPack:
-    """"Auxiliary datastruct to hold one fold of CV."""
-    def __init__(self, dtrain: DMatrix, dtest: DMatrix, param: Optional[Union[Dict, List]]) -> None:
-        """"Initialize the CVPack"""
+    """ "Auxiliary datastruct to hold one fold of CV."""
+
+    def __init__(
+        self, dtrain: DMatrix, dtest: DMatrix, param: Optional[Union[Dict, List]]
+    ) -> None:
+        """ "Initialize the CVPack"""
        self.dtrain = dtrain
        self.dtest = dtest
-        self.watchlist = [(dtrain, 'train'), (dtest, 'test')]
+        self.watchlist = [(dtrain, "train"), (dtest, "test")]
        self.bst = Booster(param, [dtrain, dtest])

    def __getattr__(self, name: str) -> Callable:
        def _inner(*args: Any, **kwargs: Any) -> Any:
            return getattr(self.bst, name)(*args, **kwargs)
+
        return _inner

    def update(self, iteration: int, fobj: Optional[Objective]) -> None:
-        """"Update the boosters for one iteration"""
+        """ "Update the boosters for one iteration"""
        self.bst.update(self.dtrain, iteration, fobj)

    def eval(self, iteration: int, feval: Optional[Metric], output_margin: bool) -> str:
-        """"Evaluate the CVPack for one iteration."""
+        """ "Evaluate the CVPack for one iteration."""
        return self.bst.eval_set(self.watchlist, iteration, feval, output_margin)


@@ -232,38 +236,42 @@ class _PackedBooster:
        self.cvfolds = cvfolds

    def update(self, iteration: int, obj: Optional[Objective]) -> None:
-        '''Iterate through folds for update'''
+        """Iterate through folds for update"""
        for fold in self.cvfolds:
            fold.update(iteration, obj)

-    def eval(self, iteration: int, feval: Optional[Metric], output_margin: bool) -> List[str]:
-        '''Iterate through folds for eval'''
+    def eval(
+        self, iteration: int, feval: Optional[Metric], output_margin: bool
+    ) -> List[str]:
+        """Iterate through folds for eval"""
        result = [f.eval(iteration, feval, output_margin) for f in self.cvfolds]
        return result

    def set_attr(self, **kwargs: Optional[str]) -> Any:
-        '''Iterate through folds for setting attributes'''
+        """Iterate through folds for setting attributes"""
        for f in self.cvfolds:
            f.bst.set_attr(**kwargs)

    def attr(self, key: str) -> Optional[str]:
-        '''Redirect to booster attr.'''
+        """Redirect to booster attr."""
        return self.cvfolds[0].bst.attr(key)

-    def set_param(self,
-                  params: Union[Dict, Iterable[Tuple[str, Any]], str],
-                  value: Optional[str] = None) -> None:
+    def set_param(
+        self,
+        params: Union[Dict, Iterable[Tuple[str, Any]], str],
+        value: Optional[str] = None,
+    ) -> None:
        """Iterate through folds for set_param"""
        for f in self.cvfolds:
            f.bst.set_param(params, value)

    def num_boosted_rounds(self) -> int:
-        '''Number of boosted rounds.'''
+        """Number of boosted rounds."""
        return self.cvfolds[0].num_boosted_rounds()

    @property
    def best_iteration(self) -> int:
-        '''Get best_iteration'''
+        """Get best_iteration"""
        return int(cast(int, self.cvfolds[0].bst.attr("best_iteration")))

    @property
@@ -279,7 +287,7 @@ def groups_to_rows(groups: List[np.ndarray], boundaries: np.ndarray) -> np.ndarr
    :param boundaries: rows index limits of each group
    :return: row in group
    """
-    return np.concatenate([np.arange(boundaries[g], boundaries[g+1]) for g in groups])
+    return np.concatenate([np.arange(boundaries[g], boundaries[g + 1]) for g in groups])


 def mkgroupfold(
@@ -305,11 +313,17 @@ def mkgroupfold(
    # list by fold of test group indexes
    out_group_idset = np.array_split(idx, nfold)
    # list by fold of train group indexes
-    in_group_idset = [np.concatenate([out_group_idset[i] for i in range(nfold) if k != i])
-                      for k in range(nfold)]
+    in_group_idset = [
+        np.concatenate([out_group_idset[i] for i in range(nfold) if k != i])
+        for k in range(nfold)
+    ]
    # from the group indexes, convert them to row indexes
-    in_idset = [groups_to_rows(in_groups, group_boundaries) for in_groups in in_group_idset]
-    out_idset = [groups_to_rows(out_groups, group_boundaries) for out_groups in out_group_idset]
+    in_idset = [
+        groups_to_rows(in_groups, group_boundaries) for in_groups in in_group_idset
+    ]
+    out_idset = [
+        groups_to_rows(out_groups, group_boundaries) for out_groups in out_group_idset
+    ]

    # build the folds by taking the appropriate slices
    ret = []
@@ -324,7 +338,7 @@ def mkgroupfold(
            dtrain, dtest, tparam = fpreproc(dtrain, dtest, param.copy())
        else:
            tparam = param
-        plst = list(tparam.items()) + [('eval_metric', itm) for itm in evals]
+        plst = list(tparam.items()) + [("eval_metric", itm) for itm in evals]
        ret.append(CVPack(dtrain, dtest, plst))
    return ret

@@ -348,16 +362,20 @@ def mknfold(

    if stratified is False and folds is None:
        # Do standard k-fold cross validation. Automatically determine the folds.
-        if len(dall.get_uint_info('group_ptr')) > 1:
-            return mkgroupfold(dall, nfold, param, evals=evals, fpreproc=fpreproc, shuffle=shuffle)
+        if len(dall.get_uint_info("group_ptr")) > 1:
+            return mkgroupfold(
+                dall, nfold, param, evals=evals, fpreproc=fpreproc, shuffle=shuffle
+            )

        if shuffle is True:
            idx = np.random.permutation(dall.num_row())
        else:
            idx = np.arange(dall.num_row())
        out_idset = np.array_split(idx, nfold)
-        in_idset = [np.concatenate([out_idset[i] for i in range(nfold) if k != i])
-                    for k in range(nfold)]
+        in_idset = [
+            np.concatenate([out_idset[i] for i in range(nfold) if k != i])
+            for k in range(nfold)
+        ]
    elif folds is not None:
        # Use user specified custom split using indices
        try:
@@ -387,7 +405,7 @@ def mknfold(
            dtrain, dtest, tparam = fpreproc(dtrain, dtest, param.copy())
        else:
            tparam = param
-        plst = list(tparam.items()) + [('eval_metric', itm) for itm in evals]
+        plst = list(tparam.items()) + [("eval_metric", itm) for itm in evals]
        ret.append(CVPack(dtrain, dtest, plst))
    return ret

@@ -502,29 +520,32 @@ def cv(
    evaluation history : list(string)
    """
    if stratified is True and not SKLEARN_INSTALLED:
-        raise XGBoostError('sklearn needs to be installed in order to use stratified cv')
+        raise XGBoostError(
+            "sklearn needs to be installed in order to use stratified cv"
+        )

    if isinstance(metrics, str):
        metrics = [metrics]

    params = params.copy()
    if isinstance(params, list):
-        _metrics = [x[1] for x in params if x[0] == 'eval_metric']
+        _metrics = [x[1] for x in params if x[0] == "eval_metric"]
        params = dict(params)
-        if 'eval_metric' in params:
-            params['eval_metric'] = _metrics
+        if "eval_metric" in params:
+            params["eval_metric"] = _metrics

-    if (not metrics) and 'eval_metric' in params:
-        if isinstance(params['eval_metric'], list):
-            metrics = params['eval_metric']
+    if (not metrics) and "eval_metric" in params:
+        if isinstance(params["eval_metric"], list):
+            metrics = params["eval_metric"]
        else:
-            metrics = [params['eval_metric']]
+            metrics = [params["eval_metric"]]

    params.pop("eval_metric", None)

    results: Dict[str, List[float]] = {}
-    cvfolds = mknfold(dtrain, nfold, params, seed, metrics, fpreproc,
-                      stratified, folds, shuffle)
+    cvfolds = mknfold(
+        dtrain, nfold, params, seed, metrics, fpreproc, stratified, folds, shuffle
+    )

    metric_fn = _configure_custom_metric(feval, custom_metric)

@@ -555,20 +576,21 @@ def cv(
        should_break = callbacks_container.after_iteration(booster, i, dtrain, None)
        res = callbacks_container.aggregated_cv
        for key, mean, std in cast(List[Tuple[str, float, float]], res):
-            if key + '-mean' not in results:
-                results[key + '-mean'] = []
-            if key + '-std' not in results:
-                results[key + '-std'] = []
-            results[key + '-mean'].append(mean)
-            results[key + '-std'].append(std)
+            if key + "-mean" not in results:
+                results[key + "-mean"] = []
+            if key + "-std" not in results:
+                results[key + "-std"] = []
+            results[key + "-mean"].append(mean)
+            results[key + "-std"].append(std)

        if should_break:
            for k in results.keys():  # pylint: disable=consider-iterating-dictionary
-                results[k] = results[k][:(booster.best_iteration + 1)]
+                results[k] = results[k][: (booster.best_iteration + 1)]
            break
    if as_pandas:
        try:
            import pandas as pd
+
            results = pd.DataFrame.from_dict(results)
        except ImportError:
            pass