Migrate pylint check to Python 3 (#4381)

* Migrate lint to Python 3 * Fix lint errors * Use Miniconda3 to use Python 3.7 * Use latest pylint and astroid
2019-04-21 01:01:54 -07:00 · 2019-04-21 01:01:54 -07:00 · bbe0dbd7ec
commit bbe0dbd7ec
parent 5e97de6a41
11 changed files with 118 additions and 117 deletions
--- a/4
+++ b/4
@ -173,10 +173,10 @@ xgboost: $(CLI_OBJ) $(ALL_DEP)
 	$(CXX) $(CFLAGS) -o $@  $(filter %.o %.a, $^)  $(LDFLAGS)

 rcpplint:
-	python2 dmlc-core/scripts/lint.py xgboost ${LINT_LANG} R-package/src
+	python3 dmlc-core/scripts/lint.py xgboost ${LINT_LANG} R-package/src

 lint: rcpplint
-	python2 dmlc-core/scripts/lint.py xgboost ${LINT_LANG} include src plugin python-package
+	python3 dmlc-core/scripts/lint.py --pylint-rc ${PWD}/python-package/.pylintrc xgboost ${LINT_LANG} include src plugin python-package

 pylint:
 	flake8 --ignore E501 python-package
--- a/python-package/.pylintrc
+++ b/python-package/.pylintrc
@ -4,7 +4,7 @@ ignore=tests

 extension-pkg-whitelist=numpy

-disiable=unexpected-special-method-signature,too-many-nested-blocks
+disable=unexpected-special-method-signature,too-many-nested-blocks,useless-object-inheritance

 dummy-variables-rgx=(unused|)_.*

--- a/python-package/xgboost/callback.py
+++ b/python-package/xgboost/callback.py
@ -1,5 +1,5 @@
 # coding: utf-8
-# pylint: disable= invalid-name
+# pylint: disable=invalid-name, too-many-statements
 """Training Library containing training routines."""
 from __future__ import absolute_import

@ -20,12 +20,10 @@ def _fmt_metric(value, show_stdv=True):
    """format metric string"""
    if len(value) == 2:
        return '%s:%g' % (value[0], value[1])
-    elif len(value) == 3:
+    if len(value) == 3:
        if show_stdv:
            return '%s:%g+%g' % (value[0], value[1], value[2])
-        else:
        return '%s:%g' % (value[0], value[1])
-    else:
    raise ValueError("wrong metric value")


@ -50,10 +48,10 @@ def print_evaluation(period=1, show_stdv=True):
    """
    def callback(env):
        """internal function"""
-        if env.rank != 0 or len(env.evaluation_result_list) == 0 or period is False or period == 0:
+        if env.rank != 0 or (not env.evaluation_result_list) or period is False or period == 0:
            return
        i = env.iteration
-        if (i % period == 0 or i + 1 == env.begin_iteration or i + 1 == env.end_iteration):
+        if i % period == 0 or i + 1 == env.begin_iteration or i + 1 == env.end_iteration:
            msg = '\t'.join([_fmt_metric(x, show_stdv) for x in env.evaluation_result_list])
            rabit.tracker_print('[%d]\t%s\n' % (i, msg))
    return callback
@ -89,7 +87,7 @@ def record_evaluation(eval_result):

    def callback(env):
        """internal function"""
-        if len(eval_result) == 0:
+        if not eval_result:
            init(env)
        for k, v in env.evaluation_result_list:
            pos = k.index('-')
@ -182,14 +180,14 @@ def early_stop(stopping_rounds, maximize=False, verbose=True):
        """internal function"""
        bst = env.model

-        if len(env.evaluation_result_list) == 0:
+        if not env.evaluation_result_list:
            raise ValueError('For early stopping you need at least one set in evals.')
        if len(env.evaluation_result_list) > 1 and verbose:
            msg = ("Multiple eval metrics have been passed: "
                   "'{0}' will be used for early stopping.\n\n")
            rabit.tracker_print(msg.format(env.evaluation_result_list[-1][0]))
        maximize_metrics = ('auc', 'aucpr', 'map', 'ndcg')
-        maximize_at_n_metrics = ('auc@', 'aucpr@' 'map@', 'ndcg@')
+        maximize_at_n_metrics = ('auc@', 'aucpr@', 'map@', 'ndcg@')
        maximize_score = maximize
        metric_label = env.evaluation_result_list[-1][0]
        metric = metric_label.split('-', 1)[-1]
@ -225,7 +223,7 @@ def early_stop(stopping_rounds, maximize=False, verbose=True):
    def callback(env):
        """internal function"""
        score = env.evaluation_result_list[-1][1]
-        if len(state) == 0:
+        if not state:
            init(env)
        best_score = state['best_score']
        best_iteration = state['best_iteration']
--- a/python-package/xgboost/compat.py
+++ b/python-package/xgboost/compat.py
@ -11,14 +11,13 @@ PY3 = (sys.version_info[0] == 3)

 if PY3:
    # pylint: disable=invalid-name, redefined-builtin
-    STRING_TYPES = str,
+    STRING_TYPES = (str,)

    def py_str(x):
        """convert c string back to python string"""
        return x.decode('utf-8')
 else:
-    # pylint: disable=invalid-name
-    STRING_TYPES = basestring,
+    STRING_TYPES = (basestring,)  # pylint: disable=undefined-variable

    def py_str(x):
        """convert c string back to python string"""
@ -37,13 +36,13 @@ try:
    PANDAS_INSTALLED = True
 except ImportError:

+    # pylint: disable=too-few-public-methods
    class MultiIndex(object):
        """ dummy for pandas.MultiIndex """
-        pass

+    # pylint: disable=too-few-public-methods
    class DataFrame(object):
        """ dummy for pandas.DataFrame """
-        pass

    PANDAS_INSTALLED = False

@ -57,9 +56,9 @@ try:
    DT_INSTALLED = True
 except ImportError:

+    # pylint: disable=too-few-public-methods
    class DataTable(object):
        """ dummy for datatable.DataTable """
-        pass

    DT_INSTALLED = False

--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@ -1,6 +1,6 @@
 # coding: utf-8
 # pylint: disable=too-many-arguments, too-many-branches, invalid-name
-# pylint: disable=too-many-branches, too-many-lines, W0141
+# pylint: disable=too-many-branches, too-many-lines, too-many-locals
 """Core XGBoost Library."""
 from __future__ import absolute_import
 import collections
@ -30,7 +30,6 @@ c_bst_ulong = ctypes.c_uint64

 class XGBoostError(Exception):
    """Error thrown by xgboost trainer."""
-    pass


 class EarlyStopException(Exception):
@ -67,18 +66,16 @@ def from_pystr_to_cstr(data):
        list of str
    """

-    if isinstance(data, list):
+    if not isinstance(data, list):
+        raise NotImplementedError
    pointers = (ctypes.c_char_p * len(data))()
    if PY3:
        data = [bytes(d, 'utf-8') for d in data]
    else:
-            data = [d.encode('utf-8') if isinstance(d, unicode) else d
+        data = [d.encode('utf-8') if isinstance(d, unicode) else d  # pylint: disable=undefined-variable
                for d in data]
    pointers[:] = data
    return pointers
-    else:
-        # copy from above when we actually use it
-        raise NotImplementedError


 def from_cstr_to_pystr(data, length):
@ -104,6 +101,7 @@ def from_cstr_to_pystr(data, length):
            try:
                res.append(str(data[i].decode('ascii')))
            except UnicodeDecodeError:
+                # pylint: disable=undefined-variable
                res.append(unicode(data[i].decode('utf-8')))
    return res

@ -123,7 +121,7 @@ def _get_log_callback_func():
 def _load_lib():
    """Load xgboost Library."""
    lib_paths = find_lib_path()
-    if len(lib_paths) == 0:
+    if not lib_paths:
        return None
    try:
        pathBackup = os.environ['PATH'].split(os.pathsep)
@ -243,7 +241,7 @@ def _maybe_pandas_data(data, feature_names, feature_types):
    if feature_names is None:
        if isinstance(data.columns, MultiIndex):
            feature_names = [
-                ' '.join(map(str, i))
+                ' '.join([str(x) for x in i])
                for i in data.columns
            ]
        else:
@ -267,7 +265,6 @@ def _maybe_pandas_label(label):
        label_dtypes = label.dtypes
        if not all(dtype.name in PANDAS_DTYPE_MAPPER for dtype in label_dtypes):
            raise ValueError('DataFrame.dtypes for label must be int, float or bool')
-        else:
        label = label.values.astype('float')
    # pd.Series can be passed to xgb as it is

@ -301,7 +298,6 @@ def _maybe_dt_data(data, feature_names, feature_types):
        # always return stypes for dt ingestion
        if feature_types is not None:
            raise ValueError('DataTable has own feature types, cannot pass them in')
-        else:
        feature_types = np.vectorize(DT_TYPE_MAPPER2.get)(data_types_names)

    return data, feature_names, feature_types
@ -512,7 +508,7 @@ class DMatrix(object):
                ptrs[icol] = ctypes.c_void_p(ptr)
        else:
            # datatable<=0.8.0
-            from datatable.internal import frame_column_data_r
+            from datatable.internal import frame_column_data_r  # pylint: disable=no-name-in-module,import-error
            for icol in range(data.ncols):
                ptrs[icol] = frame_column_data_r(data, icol)

@ -1039,7 +1035,6 @@ class Booster(object):
            self.handle, c_str(key), ctypes.byref(ret), ctypes.byref(success)))
        if success.value != 0:
            return py_str(ret.value)
-        else:
        return None

    def attributes(self):
@ -1056,8 +1051,7 @@ class Booster(object):
                                               ctypes.byref(length),
                                               ctypes.byref(sarr)))
        attr_names = from_cstr_to_pystr(sarr, length)
-        res = dict([(n, self.attr(n)) for n in attr_names])
-        return res
+        return {n: self.attr(n) for n in attr_names}

    def set_attr(self, **kwargs):
        """Set the attribute of the Booster.
@ -1399,13 +1393,13 @@ class Booster(object):
        ret = self.get_dump(fmap, with_stats, dump_format)
        if dump_format == 'json':
            fout.write('[\n')
-            for i in range(len(ret)):
+            for i, _ in enumerate(ret):
                fout.write(ret[i])
                if i < len(ret) - 1:
                    fout.write(",\n")
            fout.write('\n]')
        else:
-            for i in range(len(ret)):
+            for i, _ in enumerate(ret):
                fout.write('booster[{}]:\n'.format(i))
                fout.write(ret[i])
        if need_close:
@ -1538,7 +1532,6 @@ class Booster(object):

            return fmap

-        else:
        average_over_splits = True
        if importance_type == 'total_gain':
            importance_type = 'gain'
@ -1721,9 +1714,9 @@ class Booster(object):
        xgdump = self.get_dump(fmap=fmap)
        values = []
        regexp = re.compile(r"\[{0}<([\d.Ee+-]+)\]".format(feature))
-        for i in range(len(xgdump)):
+        for i, _ in enumerate(xgdump):
            m = re.findall(regexp, xgdump[i])
-            values.extend(map(float, m))
+            values.extend([float(x) for x in m])

        n_unique = len(np.unique(values))
        bins = max(min(n_unique, bins) if bins is not None else n_unique, 1)
@ -1734,9 +1727,7 @@ class Booster(object):

        if as_pandas and PANDAS_INSTALLED:
            return DataFrame(nph, columns=['SplitValue', 'Count'])
-        elif as_pandas and not PANDAS_INSTALLED:
+        if as_pandas and not PANDAS_INSTALLED:
            sys.stderr.write(
                "Returning histogram as ndarray (as_pandas == True, but pandas is not installed).")
        return nph
-        else:
-            return nph
--- a/python-package/xgboost/libpath.py
+++ b/python-package/xgboost/libpath.py
@ -8,7 +8,6 @@ import sys

 class XGBoostLibraryNotFound(Exception):
    """Error thrown by when xgboost is not found"""
-    pass


 def find_lib_path():
--- a/python-package/xgboost/plotting.py
+++ b/python-package/xgboost/plotting.py
@ -55,7 +55,6 @@ def plot_importance(booster, ax=None, height=0.2,
    -------
    ax : matplotlib Axes
    """
-    # TODO: move this to compat.py
    try:
        import matplotlib.pyplot as plt
    except ImportError:
@ -70,11 +69,12 @@ def plot_importance(booster, ax=None, height=0.2,
    else:
        raise ValueError('tree must be Booster, XGBModel or dict instance')

-    if len(importance) == 0:
+    if not importance:
        raise ValueError('Booster.get_score() results in empty')

    tuples = [(k, importance[k]) for k in importance]
    if max_num_features is not None:
+        # pylint: disable=invalid-unary-operand-type
        tuples = sorted(tuples, key=lambda x: x[1])[-max_num_features:]
    else:
        tuples = sorted(tuples, key=lambda x: x[1])
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@ -3,9 +3,9 @@
 """Scikit-Learn Wrapper interface for XGBoost."""
 from __future__ import absolute_import

-import numpy as np
 import warnings
 import json
+import numpy as np
 from .core import Booster, DMatrix, XGBoostError
 from .training import train

@ -107,15 +107,15 @@ class XGBModel(XGBModelBase):
    importance_type: string, default "gain"
        The feature importance type for the feature_importances_ property: either "gain",
        "weight", "cover", "total_gain" or "total_cover".
-    \*\*kwargs : dict, optional
+    \\*\\*kwargs : dict, optional
        Keyword arguments for XGBoost Booster object.  Full documentation of parameters can
        be found here: https://github.com/dmlc/xgboost/blob/master/doc/parameter.rst.
-        Attempting to set a parameter via the constructor args and \*\*kwargs dict simultaneously
+        Attempting to set a parameter via the constructor args and \\*\\*kwargs dict simultaneously
        will result in a TypeError.

-        .. note:: \*\*kwargs unsupported by scikit-learn
+        .. note:: \\*\\*kwargs unsupported by scikit-learn

-            \*\*kwargs is unsupported by scikit-learn.  We do not guarantee that parameters
+            \\*\\*kwargs is unsupported by scikit-learn.  We do not guarantee that parameters
            passed via this argument will interact properly with scikit-learn.

    Note
@ -597,7 +597,7 @@ class XGBModel(XGBModelBase):


 class XGBClassifier(XGBModel, XGBClassifierBase):
-    # pylint: disable=missing-docstring,too-many-arguments,invalid-name
+    # pylint: disable=missing-docstring,too-many-arguments,invalid-name,too-many-instance-attributes
    __doc__ = "Implementation of the scikit-learn API for XGBoost classification.\n\n" \
        + '\n'.join(XGBModel.__doc__.split('\n')[2:])

@ -834,7 +834,6 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
                                                 validate_features=validate_features)
        if self.objective == "multi:softprob":
            return class_probs
-        else:
        classone_probs = class_probs
        classzero_probs = 1.0 - classone_probs
        return np.vstack((classzero_probs, classone_probs)).transpose()
@ -1008,15 +1007,15 @@ class XGBRanker(XGBModel):
        missing : float, optional
            Value in the data which needs to be present as a missing value. If
            None, defaults to np.nan.
-        \*\*kwargs : dict, optional
+        \\*\\*kwargs : dict, optional
            Keyword arguments for XGBoost Booster object.  Full documentation of parameters can
            be found here: https://github.com/dmlc/xgboost/blob/master/doc/parameter.rst.
-            Attempting to set a parameter via the constructor args and \*\*kwargs dict
+            Attempting to set a parameter via the constructor args and \\*\\*kwargs dict
            simultaneously will result in a TypeError.

-            .. note:: \*\*kwargs unsupported by scikit-learn
+            .. note:: \\*\\*kwargs unsupported by scikit-learn

-                \*\*kwargs is unsupported by scikit-learn.  We do not guarantee that parameters
+                \\*\\*kwargs is unsupported by scikit-learn.  We do not guarantee that parameters
                passed via this argument will interact properly with scikit-learn.

        Note
@ -1073,7 +1072,7 @@ class XGBRanker(XGBModel):
            random_state=random_state, seed=seed, missing=missing, **kwargs)
        if callable(self.objective):
            raise ValueError("custom objective function not supported by XGBRanker")
-        elif "rank:" not in self.objective:
+        if "rank:" not in self.objective:
            raise ValueError("please use XGBRanker for ranking task")

    def fit(self, X, y, group, sample_weight=None, eval_set=None, sample_weight_eval_set=None,
@ -1158,9 +1157,9 @@ class XGBRanker(XGBModel):
        if eval_set is not None:
            if eval_group is None:
                raise ValueError("eval_group is required if eval_set is not None")
-            elif len(eval_group) != len(eval_set):
+            if len(eval_group) != len(eval_set):
                raise ValueError("length of eval_group should match that of eval_set")
-            elif any(group is None for group in eval_group):
+            if any(group is None for group in eval_group):
                raise ValueError("group is required for all eval datasets for ranking task")

        def _dmat_init(group, **params):
--- a/python-package/xgboost/training.py
+++ b/python-package/xgboost/training.py
@ -49,7 +49,7 @@ def _train_internal(params, dtrain,

    # Distributed code: Load the checkpoint from rabit.
    version = bst.load_rabit_checkpoint()
-    assert(rabit.get_world_size() != 1 or version == 0)
+    assert rabit.get_world_size() != 1 or version == 0
    rank = rabit.get_rank()
    start_iteration = int(version / 2)
    nboost += start_iteration
@ -75,12 +75,12 @@ def _train_internal(params, dtrain,
            bst.save_rabit_checkpoint()
            version += 1

-        assert(rabit.get_world_size() == 1 or version == rabit.version_number())
+        assert rabit.get_world_size() == 1 or version == rabit.version_number()

        nboost += 1
        evaluation_result_list = []
        # check evaluation result.
-        if len(evals) != 0:
+        if evals:
            bst_eval_set = bst.eval_set(evals, i, feval)
            if isinstance(bst_eval_set, STRING_TYPES):
                msg = bst_eval_set
@ -402,7 +402,7 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
    else:
        params = dict((k, v) for k, v in params.items())

-    if len(metrics) == 0 and 'eval_metric' in params:
+    if (not metrics) and 'eval_metric' in params:
        if isinstance(params['eval_metric'], list):
            metrics = params['eval_metric']
        else:
@ -462,7 +462,7 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
                               rank=0,
                               evaluation_result_list=res))
        except EarlyStopException as e:
-            for k in results.keys():
+            for k in results:
                results[k] = results[k][:(e.best_iteration + 1)]
            break
    if as_pandas:
--- a/tests/travis/run_test.sh
+++ b/tests/travis/run_test.sh
@ -1,6 +1,9 @@
 #!/bin/bash

 if [ ${TASK} == "lint" ]; then
+    source activate python3
+    conda install numpy scipy
+    python -m pip install cpplint pylint astroid
    make lint || exit -1
    echo "Check documentations..."

--- a/tests/travis/setup.sh
+++ b/tests/travis/setup.sh
@ -1,7 +1,19 @@
 #!/bin/bash

 if [ ${TASK} == "lint" ]; then
-    pip install --user  cpplint 'pylint==1.4.4' 'astroid==1.3.6' 
+    if [ ${TRAVIS_OS_NAME} == "osx" ]; then
+        wget -O conda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
+    else
+        wget -O conda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
+    fi
+    bash conda.sh -b -p $HOME/miniconda
+    export PATH="$HOME/miniconda/bin:$PATH"
+    hash -r
+    conda config --set always_yes yes --set changeps1 no
+    conda update -q conda
+    # Useful for debugging any issues with conda
+    conda info -a
+    conda create -n python3 python=3.7
 fi


@ -18,6 +30,6 @@ if [ ${TASK} == "python_test" ] || [ ${TASK} == "python_lightweight_test" ] || [
    conda update -q conda
    # Useful for debugging any issues with conda
    conda info -a
-    conda create -n python3 python=3.5
+    conda create -n python3 python=3.7
    conda create -n python2 python=2.7
 fi