Migrate pylint check to Python 3 (#4381)

* Migrate lint to Python 3

* Fix lint errors

* Use Miniconda3 to use Python 3.7

* Use latest pylint and astroid
This commit is contained in:
Philip Hyunsu Cho 2019-04-21 01:01:54 -07:00 committed by GitHub
parent 5e97de6a41
commit bbe0dbd7ec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 118 additions and 117 deletions

View File

@ -173,10 +173,10 @@ xgboost: $(CLI_OBJ) $(ALL_DEP)
$(CXX) $(CFLAGS) -o $@ $(filter %.o %.a, $^) $(LDFLAGS)
rcpplint:
python2 dmlc-core/scripts/lint.py xgboost ${LINT_LANG} R-package/src
python3 dmlc-core/scripts/lint.py xgboost ${LINT_LANG} R-package/src
lint: rcpplint
python2 dmlc-core/scripts/lint.py xgboost ${LINT_LANG} include src plugin python-package
python3 dmlc-core/scripts/lint.py --pylint-rc ${PWD}/python-package/.pylintrc xgboost ${LINT_LANG} include src plugin python-package
pylint:
flake8 --ignore E501 python-package

View File

@ -4,7 +4,7 @@ ignore=tests
extension-pkg-whitelist=numpy
disiable=unexpected-special-method-signature,too-many-nested-blocks
disable=unexpected-special-method-signature,too-many-nested-blocks,useless-object-inheritance
dummy-variables-rgx=(unused|)_.*

View File

@ -1,5 +1,5 @@
# coding: utf-8
# pylint: disable= invalid-name
# pylint: disable=invalid-name, too-many-statements
"""Training Library containing training routines."""
from __future__ import absolute_import
@ -20,12 +20,10 @@ def _fmt_metric(value, show_stdv=True):
"""format metric string"""
if len(value) == 2:
return '%s:%g' % (value[0], value[1])
elif len(value) == 3:
if len(value) == 3:
if show_stdv:
return '%s:%g+%g' % (value[0], value[1], value[2])
else:
return '%s:%g' % (value[0], value[1])
else:
raise ValueError("wrong metric value")
@ -50,10 +48,10 @@ def print_evaluation(period=1, show_stdv=True):
"""
def callback(env):
"""internal function"""
if env.rank != 0 or len(env.evaluation_result_list) == 0 or period is False or period == 0:
if env.rank != 0 or (not env.evaluation_result_list) or period is False or period == 0:
return
i = env.iteration
if (i % period == 0 or i + 1 == env.begin_iteration or i + 1 == env.end_iteration):
if i % period == 0 or i + 1 == env.begin_iteration or i + 1 == env.end_iteration:
msg = '\t'.join([_fmt_metric(x, show_stdv) for x in env.evaluation_result_list])
rabit.tracker_print('[%d]\t%s\n' % (i, msg))
return callback
@ -89,7 +87,7 @@ def record_evaluation(eval_result):
def callback(env):
"""internal function"""
if len(eval_result) == 0:
if not eval_result:
init(env)
for k, v in env.evaluation_result_list:
pos = k.index('-')
@ -182,14 +180,14 @@ def early_stop(stopping_rounds, maximize=False, verbose=True):
"""internal function"""
bst = env.model
if len(env.evaluation_result_list) == 0:
if not env.evaluation_result_list:
raise ValueError('For early stopping you need at least one set in evals.')
if len(env.evaluation_result_list) > 1 and verbose:
msg = ("Multiple eval metrics have been passed: "
"'{0}' will be used for early stopping.\n\n")
rabit.tracker_print(msg.format(env.evaluation_result_list[-1][0]))
maximize_metrics = ('auc', 'aucpr', 'map', 'ndcg')
maximize_at_n_metrics = ('auc@', 'aucpr@' 'map@', 'ndcg@')
maximize_at_n_metrics = ('auc@', 'aucpr@', 'map@', 'ndcg@')
maximize_score = maximize
metric_label = env.evaluation_result_list[-1][0]
metric = metric_label.split('-', 1)[-1]
@ -225,7 +223,7 @@ def early_stop(stopping_rounds, maximize=False, verbose=True):
def callback(env):
"""internal function"""
score = env.evaluation_result_list[-1][1]
if len(state) == 0:
if not state:
init(env)
best_score = state['best_score']
best_iteration = state['best_iteration']

View File

@ -11,14 +11,13 @@ PY3 = (sys.version_info[0] == 3)
if PY3:
# pylint: disable=invalid-name, redefined-builtin
STRING_TYPES = str,
STRING_TYPES = (str,)
def py_str(x):
"""convert c string back to python string"""
return x.decode('utf-8')
else:
# pylint: disable=invalid-name
STRING_TYPES = basestring,
STRING_TYPES = (basestring,) # pylint: disable=undefined-variable
def py_str(x):
"""convert c string back to python string"""
@ -37,13 +36,13 @@ try:
PANDAS_INSTALLED = True
except ImportError:
# pylint: disable=too-few-public-methods
class MultiIndex(object):
""" dummy for pandas.MultiIndex """
pass
# pylint: disable=too-few-public-methods
class DataFrame(object):
""" dummy for pandas.DataFrame """
pass
PANDAS_INSTALLED = False
@ -57,9 +56,9 @@ try:
DT_INSTALLED = True
except ImportError:
# pylint: disable=too-few-public-methods
class DataTable(object):
""" dummy for datatable.DataTable """
pass
DT_INSTALLED = False

View File

@ -1,6 +1,6 @@
# coding: utf-8
# pylint: disable=too-many-arguments, too-many-branches, invalid-name
# pylint: disable=too-many-branches, too-many-lines, W0141
# pylint: disable=too-many-branches, too-many-lines, too-many-locals
"""Core XGBoost Library."""
from __future__ import absolute_import
import collections
@ -30,7 +30,6 @@ c_bst_ulong = ctypes.c_uint64
class XGBoostError(Exception):
"""Error thrown by xgboost trainer."""
pass
class EarlyStopException(Exception):
@ -67,18 +66,16 @@ def from_pystr_to_cstr(data):
list of str
"""
if isinstance(data, list):
if not isinstance(data, list):
raise NotImplementedError
pointers = (ctypes.c_char_p * len(data))()
if PY3:
data = [bytes(d, 'utf-8') for d in data]
else:
data = [d.encode('utf-8') if isinstance(d, unicode) else d
data = [d.encode('utf-8') if isinstance(d, unicode) else d # pylint: disable=undefined-variable
for d in data]
pointers[:] = data
return pointers
else:
# copy from above when we actually use it
raise NotImplementedError
def from_cstr_to_pystr(data, length):
@ -104,6 +101,7 @@ def from_cstr_to_pystr(data, length):
try:
res.append(str(data[i].decode('ascii')))
except UnicodeDecodeError:
# pylint: disable=undefined-variable
res.append(unicode(data[i].decode('utf-8')))
return res
@ -123,7 +121,7 @@ def _get_log_callback_func():
def _load_lib():
"""Load xgboost Library."""
lib_paths = find_lib_path()
if len(lib_paths) == 0:
if not lib_paths:
return None
try:
pathBackup = os.environ['PATH'].split(os.pathsep)
@ -243,7 +241,7 @@ def _maybe_pandas_data(data, feature_names, feature_types):
if feature_names is None:
if isinstance(data.columns, MultiIndex):
feature_names = [
' '.join(map(str, i))
' '.join([str(x) for x in i])
for i in data.columns
]
else:
@ -267,7 +265,6 @@ def _maybe_pandas_label(label):
label_dtypes = label.dtypes
if not all(dtype.name in PANDAS_DTYPE_MAPPER for dtype in label_dtypes):
raise ValueError('DataFrame.dtypes for label must be int, float or bool')
else:
label = label.values.astype('float')
# pd.Series can be passed to xgb as it is
@ -301,7 +298,6 @@ def _maybe_dt_data(data, feature_names, feature_types):
# always return stypes for dt ingestion
if feature_types is not None:
raise ValueError('DataTable has own feature types, cannot pass them in')
else:
feature_types = np.vectorize(DT_TYPE_MAPPER2.get)(data_types_names)
return data, feature_names, feature_types
@ -512,7 +508,7 @@ class DMatrix(object):
ptrs[icol] = ctypes.c_void_p(ptr)
else:
# datatable<=0.8.0
from datatable.internal import frame_column_data_r
from datatable.internal import frame_column_data_r # pylint: disable=no-name-in-module,import-error
for icol in range(data.ncols):
ptrs[icol] = frame_column_data_r(data, icol)
@ -1039,7 +1035,6 @@ class Booster(object):
self.handle, c_str(key), ctypes.byref(ret), ctypes.byref(success)))
if success.value != 0:
return py_str(ret.value)
else:
return None
def attributes(self):
@ -1056,8 +1051,7 @@ class Booster(object):
ctypes.byref(length),
ctypes.byref(sarr)))
attr_names = from_cstr_to_pystr(sarr, length)
res = dict([(n, self.attr(n)) for n in attr_names])
return res
return {n: self.attr(n) for n in attr_names}
def set_attr(self, **kwargs):
"""Set the attribute of the Booster.
@ -1399,13 +1393,13 @@ class Booster(object):
ret = self.get_dump(fmap, with_stats, dump_format)
if dump_format == 'json':
fout.write('[\n')
for i in range(len(ret)):
for i, _ in enumerate(ret):
fout.write(ret[i])
if i < len(ret) - 1:
fout.write(",\n")
fout.write('\n]')
else:
for i in range(len(ret)):
for i, _ in enumerate(ret):
fout.write('booster[{}]:\n'.format(i))
fout.write(ret[i])
if need_close:
@ -1538,7 +1532,6 @@ class Booster(object):
return fmap
else:
average_over_splits = True
if importance_type == 'total_gain':
importance_type = 'gain'
@ -1721,9 +1714,9 @@ class Booster(object):
xgdump = self.get_dump(fmap=fmap)
values = []
regexp = re.compile(r"\[{0}<([\d.Ee+-]+)\]".format(feature))
for i in range(len(xgdump)):
for i, _ in enumerate(xgdump):
m = re.findall(regexp, xgdump[i])
values.extend(map(float, m))
values.extend([float(x) for x in m])
n_unique = len(np.unique(values))
bins = max(min(n_unique, bins) if bins is not None else n_unique, 1)
@ -1734,9 +1727,7 @@ class Booster(object):
if as_pandas and PANDAS_INSTALLED:
return DataFrame(nph, columns=['SplitValue', 'Count'])
elif as_pandas and not PANDAS_INSTALLED:
if as_pandas and not PANDAS_INSTALLED:
sys.stderr.write(
"Returning histogram as ndarray (as_pandas == True, but pandas is not installed).")
return nph
else:
return nph

View File

@ -8,7 +8,6 @@ import sys
class XGBoostLibraryNotFound(Exception):
"""Error thrown by when xgboost is not found"""
pass
def find_lib_path():

View File

@ -55,7 +55,6 @@ def plot_importance(booster, ax=None, height=0.2,
-------
ax : matplotlib Axes
"""
# TODO: move this to compat.py
try:
import matplotlib.pyplot as plt
except ImportError:
@ -70,11 +69,12 @@ def plot_importance(booster, ax=None, height=0.2,
else:
raise ValueError('tree must be Booster, XGBModel or dict instance')
if len(importance) == 0:
if not importance:
raise ValueError('Booster.get_score() results in empty')
tuples = [(k, importance[k]) for k in importance]
if max_num_features is not None:
# pylint: disable=invalid-unary-operand-type
tuples = sorted(tuples, key=lambda x: x[1])[-max_num_features:]
else:
tuples = sorted(tuples, key=lambda x: x[1])

View File

@ -3,9 +3,9 @@
"""Scikit-Learn Wrapper interface for XGBoost."""
from __future__ import absolute_import
import numpy as np
import warnings
import json
import numpy as np
from .core import Booster, DMatrix, XGBoostError
from .training import train
@ -107,15 +107,15 @@ class XGBModel(XGBModelBase):
importance_type: string, default "gain"
The feature importance type for the feature_importances_ property: either "gain",
"weight", "cover", "total_gain" or "total_cover".
\*\*kwargs : dict, optional
\\*\\*kwargs : dict, optional
Keyword arguments for XGBoost Booster object. Full documentation of parameters can
be found here: https://github.com/dmlc/xgboost/blob/master/doc/parameter.rst.
Attempting to set a parameter via the constructor args and \*\*kwargs dict simultaneously
Attempting to set a parameter via the constructor args and \\*\\*kwargs dict simultaneously
will result in a TypeError.
.. note:: \*\*kwargs unsupported by scikit-learn
.. note:: \\*\\*kwargs unsupported by scikit-learn
\*\*kwargs is unsupported by scikit-learn. We do not guarantee that parameters
\\*\\*kwargs is unsupported by scikit-learn. We do not guarantee that parameters
passed via this argument will interact properly with scikit-learn.
Note
@ -597,7 +597,7 @@ class XGBModel(XGBModelBase):
class XGBClassifier(XGBModel, XGBClassifierBase):
# pylint: disable=missing-docstring,too-many-arguments,invalid-name
# pylint: disable=missing-docstring,too-many-arguments,invalid-name,too-many-instance-attributes
__doc__ = "Implementation of the scikit-learn API for XGBoost classification.\n\n" \
+ '\n'.join(XGBModel.__doc__.split('\n')[2:])
@ -834,7 +834,6 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
validate_features=validate_features)
if self.objective == "multi:softprob":
return class_probs
else:
classone_probs = class_probs
classzero_probs = 1.0 - classone_probs
return np.vstack((classzero_probs, classone_probs)).transpose()
@ -1008,15 +1007,15 @@ class XGBRanker(XGBModel):
missing : float, optional
Value in the data which needs to be present as a missing value. If
None, defaults to np.nan.
\*\*kwargs : dict, optional
\\*\\*kwargs : dict, optional
Keyword arguments for XGBoost Booster object. Full documentation of parameters can
be found here: https://github.com/dmlc/xgboost/blob/master/doc/parameter.rst.
Attempting to set a parameter via the constructor args and \*\*kwargs dict
Attempting to set a parameter via the constructor args and \\*\\*kwargs dict
simultaneously will result in a TypeError.
.. note:: \*\*kwargs unsupported by scikit-learn
.. note:: \\*\\*kwargs unsupported by scikit-learn
\*\*kwargs is unsupported by scikit-learn. We do not guarantee that parameters
\\*\\*kwargs is unsupported by scikit-learn. We do not guarantee that parameters
passed via this argument will interact properly with scikit-learn.
Note
@ -1073,7 +1072,7 @@ class XGBRanker(XGBModel):
random_state=random_state, seed=seed, missing=missing, **kwargs)
if callable(self.objective):
raise ValueError("custom objective function not supported by XGBRanker")
elif "rank:" not in self.objective:
if "rank:" not in self.objective:
raise ValueError("please use XGBRanker for ranking task")
def fit(self, X, y, group, sample_weight=None, eval_set=None, sample_weight_eval_set=None,
@ -1158,9 +1157,9 @@ class XGBRanker(XGBModel):
if eval_set is not None:
if eval_group is None:
raise ValueError("eval_group is required if eval_set is not None")
elif len(eval_group) != len(eval_set):
if len(eval_group) != len(eval_set):
raise ValueError("length of eval_group should match that of eval_set")
elif any(group is None for group in eval_group):
if any(group is None for group in eval_group):
raise ValueError("group is required for all eval datasets for ranking task")
def _dmat_init(group, **params):

View File

@ -49,7 +49,7 @@ def _train_internal(params, dtrain,
# Distributed code: Load the checkpoint from rabit.
version = bst.load_rabit_checkpoint()
assert(rabit.get_world_size() != 1 or version == 0)
assert rabit.get_world_size() != 1 or version == 0
rank = rabit.get_rank()
start_iteration = int(version / 2)
nboost += start_iteration
@ -75,12 +75,12 @@ def _train_internal(params, dtrain,
bst.save_rabit_checkpoint()
version += 1
assert(rabit.get_world_size() == 1 or version == rabit.version_number())
assert rabit.get_world_size() == 1 or version == rabit.version_number()
nboost += 1
evaluation_result_list = []
# check evaluation result.
if len(evals) != 0:
if evals:
bst_eval_set = bst.eval_set(evals, i, feval)
if isinstance(bst_eval_set, STRING_TYPES):
msg = bst_eval_set
@ -402,7 +402,7 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
else:
params = dict((k, v) for k, v in params.items())
if len(metrics) == 0 and 'eval_metric' in params:
if (not metrics) and 'eval_metric' in params:
if isinstance(params['eval_metric'], list):
metrics = params['eval_metric']
else:
@ -462,7 +462,7 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
rank=0,
evaluation_result_list=res))
except EarlyStopException as e:
for k in results.keys():
for k in results:
results[k] = results[k][:(e.best_iteration + 1)]
break
if as_pandas:

View File

@ -1,6 +1,9 @@
#!/bin/bash
if [ ${TASK} == "lint" ]; then
source activate python3
conda install numpy scipy
python -m pip install cpplint pylint astroid
make lint || exit -1
echo "Check documentations..."

View File

@ -1,7 +1,19 @@
#!/bin/bash
if [ ${TASK} == "lint" ]; then
pip install --user cpplint 'pylint==1.4.4' 'astroid==1.3.6'
if [ ${TRAVIS_OS_NAME} == "osx" ]; then
wget -O conda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
else
wget -O conda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
fi
bash conda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
hash -r
conda config --set always_yes yes --set changeps1 no
conda update -q conda
# Useful for debugging any issues with conda
conda info -a
conda create -n python3 python=3.7
fi
@ -18,6 +30,6 @@ if [ ${TASK} == "python_test" ] || [ ${TASK} == "python_lightweight_test" ] || [
conda update -q conda
# Useful for debugging any issues with conda
conda info -a
conda create -n python3 python=3.5
conda create -n python3 python=3.7
conda create -n python2 python=2.7
fi