Migrate pylint check to Python 3 (#4381)
* Migrate lint to Python 3 * Fix lint errors * Use Miniconda3 to use Python 3.7 * Use latest pylint and astroid
This commit is contained in:
parent
5e97de6a41
commit
bbe0dbd7ec
4
Makefile
4
Makefile
@ -173,10 +173,10 @@ xgboost: $(CLI_OBJ) $(ALL_DEP)
|
||||
$(CXX) $(CFLAGS) -o $@ $(filter %.o %.a, $^) $(LDFLAGS)
|
||||
|
||||
rcpplint:
|
||||
python2 dmlc-core/scripts/lint.py xgboost ${LINT_LANG} R-package/src
|
||||
python3 dmlc-core/scripts/lint.py xgboost ${LINT_LANG} R-package/src
|
||||
|
||||
lint: rcpplint
|
||||
python2 dmlc-core/scripts/lint.py xgboost ${LINT_LANG} include src plugin python-package
|
||||
python3 dmlc-core/scripts/lint.py --pylint-rc ${PWD}/python-package/.pylintrc xgboost ${LINT_LANG} include src plugin python-package
|
||||
|
||||
pylint:
|
||||
flake8 --ignore E501 python-package
|
||||
|
||||
@ -4,7 +4,7 @@ ignore=tests
|
||||
|
||||
extension-pkg-whitelist=numpy
|
||||
|
||||
disiable=unexpected-special-method-signature,too-many-nested-blocks
|
||||
disable=unexpected-special-method-signature,too-many-nested-blocks,useless-object-inheritance
|
||||
|
||||
dummy-variables-rgx=(unused|)_.*
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
# coding: utf-8
|
||||
# pylint: disable= invalid-name
|
||||
# pylint: disable=invalid-name, too-many-statements
|
||||
"""Training Library containing training routines."""
|
||||
from __future__ import absolute_import
|
||||
|
||||
@ -20,12 +20,10 @@ def _fmt_metric(value, show_stdv=True):
|
||||
"""format metric string"""
|
||||
if len(value) == 2:
|
||||
return '%s:%g' % (value[0], value[1])
|
||||
elif len(value) == 3:
|
||||
if len(value) == 3:
|
||||
if show_stdv:
|
||||
return '%s:%g+%g' % (value[0], value[1], value[2])
|
||||
else:
|
||||
return '%s:%g' % (value[0], value[1])
|
||||
else:
|
||||
raise ValueError("wrong metric value")
|
||||
|
||||
|
||||
@ -50,10 +48,10 @@ def print_evaluation(period=1, show_stdv=True):
|
||||
"""
|
||||
def callback(env):
|
||||
"""internal function"""
|
||||
if env.rank != 0 or len(env.evaluation_result_list) == 0 or period is False or period == 0:
|
||||
if env.rank != 0 or (not env.evaluation_result_list) or period is False or period == 0:
|
||||
return
|
||||
i = env.iteration
|
||||
if (i % period == 0 or i + 1 == env.begin_iteration or i + 1 == env.end_iteration):
|
||||
if i % period == 0 or i + 1 == env.begin_iteration or i + 1 == env.end_iteration:
|
||||
msg = '\t'.join([_fmt_metric(x, show_stdv) for x in env.evaluation_result_list])
|
||||
rabit.tracker_print('[%d]\t%s\n' % (i, msg))
|
||||
return callback
|
||||
@ -89,7 +87,7 @@ def record_evaluation(eval_result):
|
||||
|
||||
def callback(env):
|
||||
"""internal function"""
|
||||
if len(eval_result) == 0:
|
||||
if not eval_result:
|
||||
init(env)
|
||||
for k, v in env.evaluation_result_list:
|
||||
pos = k.index('-')
|
||||
@ -182,14 +180,14 @@ def early_stop(stopping_rounds, maximize=False, verbose=True):
|
||||
"""internal function"""
|
||||
bst = env.model
|
||||
|
||||
if len(env.evaluation_result_list) == 0:
|
||||
if not env.evaluation_result_list:
|
||||
raise ValueError('For early stopping you need at least one set in evals.')
|
||||
if len(env.evaluation_result_list) > 1 and verbose:
|
||||
msg = ("Multiple eval metrics have been passed: "
|
||||
"'{0}' will be used for early stopping.\n\n")
|
||||
rabit.tracker_print(msg.format(env.evaluation_result_list[-1][0]))
|
||||
maximize_metrics = ('auc', 'aucpr', 'map', 'ndcg')
|
||||
maximize_at_n_metrics = ('auc@', 'aucpr@' 'map@', 'ndcg@')
|
||||
maximize_at_n_metrics = ('auc@', 'aucpr@', 'map@', 'ndcg@')
|
||||
maximize_score = maximize
|
||||
metric_label = env.evaluation_result_list[-1][0]
|
||||
metric = metric_label.split('-', 1)[-1]
|
||||
@ -225,7 +223,7 @@ def early_stop(stopping_rounds, maximize=False, verbose=True):
|
||||
def callback(env):
|
||||
"""internal function"""
|
||||
score = env.evaluation_result_list[-1][1]
|
||||
if len(state) == 0:
|
||||
if not state:
|
||||
init(env)
|
||||
best_score = state['best_score']
|
||||
best_iteration = state['best_iteration']
|
||||
|
||||
@ -11,14 +11,13 @@ PY3 = (sys.version_info[0] == 3)
|
||||
|
||||
if PY3:
|
||||
# pylint: disable=invalid-name, redefined-builtin
|
||||
STRING_TYPES = str,
|
||||
STRING_TYPES = (str,)
|
||||
|
||||
def py_str(x):
|
||||
"""convert c string back to python string"""
|
||||
return x.decode('utf-8')
|
||||
else:
|
||||
# pylint: disable=invalid-name
|
||||
STRING_TYPES = basestring,
|
||||
STRING_TYPES = (basestring,) # pylint: disable=undefined-variable
|
||||
|
||||
def py_str(x):
|
||||
"""convert c string back to python string"""
|
||||
@ -37,13 +36,13 @@ try:
|
||||
PANDAS_INSTALLED = True
|
||||
except ImportError:
|
||||
|
||||
# pylint: disable=too-few-public-methods
|
||||
class MultiIndex(object):
|
||||
""" dummy for pandas.MultiIndex """
|
||||
pass
|
||||
|
||||
# pylint: disable=too-few-public-methods
|
||||
class DataFrame(object):
|
||||
""" dummy for pandas.DataFrame """
|
||||
pass
|
||||
|
||||
PANDAS_INSTALLED = False
|
||||
|
||||
@ -57,9 +56,9 @@ try:
|
||||
DT_INSTALLED = True
|
||||
except ImportError:
|
||||
|
||||
# pylint: disable=too-few-public-methods
|
||||
class DataTable(object):
|
||||
""" dummy for datatable.DataTable """
|
||||
pass
|
||||
|
||||
DT_INSTALLED = False
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
# coding: utf-8
|
||||
# pylint: disable=too-many-arguments, too-many-branches, invalid-name
|
||||
# pylint: disable=too-many-branches, too-many-lines, W0141
|
||||
# pylint: disable=too-many-branches, too-many-lines, too-many-locals
|
||||
"""Core XGBoost Library."""
|
||||
from __future__ import absolute_import
|
||||
import collections
|
||||
@ -30,7 +30,6 @@ c_bst_ulong = ctypes.c_uint64
|
||||
|
||||
class XGBoostError(Exception):
|
||||
"""Error thrown by xgboost trainer."""
|
||||
pass
|
||||
|
||||
|
||||
class EarlyStopException(Exception):
|
||||
@ -67,18 +66,16 @@ def from_pystr_to_cstr(data):
|
||||
list of str
|
||||
"""
|
||||
|
||||
if isinstance(data, list):
|
||||
if not isinstance(data, list):
|
||||
raise NotImplementedError
|
||||
pointers = (ctypes.c_char_p * len(data))()
|
||||
if PY3:
|
||||
data = [bytes(d, 'utf-8') for d in data]
|
||||
else:
|
||||
data = [d.encode('utf-8') if isinstance(d, unicode) else d
|
||||
data = [d.encode('utf-8') if isinstance(d, unicode) else d # pylint: disable=undefined-variable
|
||||
for d in data]
|
||||
pointers[:] = data
|
||||
return pointers
|
||||
else:
|
||||
# copy from above when we actually use it
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def from_cstr_to_pystr(data, length):
|
||||
@ -104,6 +101,7 @@ def from_cstr_to_pystr(data, length):
|
||||
try:
|
||||
res.append(str(data[i].decode('ascii')))
|
||||
except UnicodeDecodeError:
|
||||
# pylint: disable=undefined-variable
|
||||
res.append(unicode(data[i].decode('utf-8')))
|
||||
return res
|
||||
|
||||
@ -123,7 +121,7 @@ def _get_log_callback_func():
|
||||
def _load_lib():
|
||||
"""Load xgboost Library."""
|
||||
lib_paths = find_lib_path()
|
||||
if len(lib_paths) == 0:
|
||||
if not lib_paths:
|
||||
return None
|
||||
try:
|
||||
pathBackup = os.environ['PATH'].split(os.pathsep)
|
||||
@ -243,7 +241,7 @@ def _maybe_pandas_data(data, feature_names, feature_types):
|
||||
if feature_names is None:
|
||||
if isinstance(data.columns, MultiIndex):
|
||||
feature_names = [
|
||||
' '.join(map(str, i))
|
||||
' '.join([str(x) for x in i])
|
||||
for i in data.columns
|
||||
]
|
||||
else:
|
||||
@ -267,7 +265,6 @@ def _maybe_pandas_label(label):
|
||||
label_dtypes = label.dtypes
|
||||
if not all(dtype.name in PANDAS_DTYPE_MAPPER for dtype in label_dtypes):
|
||||
raise ValueError('DataFrame.dtypes for label must be int, float or bool')
|
||||
else:
|
||||
label = label.values.astype('float')
|
||||
# pd.Series can be passed to xgb as it is
|
||||
|
||||
@ -301,7 +298,6 @@ def _maybe_dt_data(data, feature_names, feature_types):
|
||||
# always return stypes for dt ingestion
|
||||
if feature_types is not None:
|
||||
raise ValueError('DataTable has own feature types, cannot pass them in')
|
||||
else:
|
||||
feature_types = np.vectorize(DT_TYPE_MAPPER2.get)(data_types_names)
|
||||
|
||||
return data, feature_names, feature_types
|
||||
@ -512,7 +508,7 @@ class DMatrix(object):
|
||||
ptrs[icol] = ctypes.c_void_p(ptr)
|
||||
else:
|
||||
# datatable<=0.8.0
|
||||
from datatable.internal import frame_column_data_r
|
||||
from datatable.internal import frame_column_data_r # pylint: disable=no-name-in-module,import-error
|
||||
for icol in range(data.ncols):
|
||||
ptrs[icol] = frame_column_data_r(data, icol)
|
||||
|
||||
@ -1039,7 +1035,6 @@ class Booster(object):
|
||||
self.handle, c_str(key), ctypes.byref(ret), ctypes.byref(success)))
|
||||
if success.value != 0:
|
||||
return py_str(ret.value)
|
||||
else:
|
||||
return None
|
||||
|
||||
def attributes(self):
|
||||
@ -1056,8 +1051,7 @@ class Booster(object):
|
||||
ctypes.byref(length),
|
||||
ctypes.byref(sarr)))
|
||||
attr_names = from_cstr_to_pystr(sarr, length)
|
||||
res = dict([(n, self.attr(n)) for n in attr_names])
|
||||
return res
|
||||
return {n: self.attr(n) for n in attr_names}
|
||||
|
||||
def set_attr(self, **kwargs):
|
||||
"""Set the attribute of the Booster.
|
||||
@ -1399,13 +1393,13 @@ class Booster(object):
|
||||
ret = self.get_dump(fmap, with_stats, dump_format)
|
||||
if dump_format == 'json':
|
||||
fout.write('[\n')
|
||||
for i in range(len(ret)):
|
||||
for i, _ in enumerate(ret):
|
||||
fout.write(ret[i])
|
||||
if i < len(ret) - 1:
|
||||
fout.write(",\n")
|
||||
fout.write('\n]')
|
||||
else:
|
||||
for i in range(len(ret)):
|
||||
for i, _ in enumerate(ret):
|
||||
fout.write('booster[{}]:\n'.format(i))
|
||||
fout.write(ret[i])
|
||||
if need_close:
|
||||
@ -1538,7 +1532,6 @@ class Booster(object):
|
||||
|
||||
return fmap
|
||||
|
||||
else:
|
||||
average_over_splits = True
|
||||
if importance_type == 'total_gain':
|
||||
importance_type = 'gain'
|
||||
@ -1721,9 +1714,9 @@ class Booster(object):
|
||||
xgdump = self.get_dump(fmap=fmap)
|
||||
values = []
|
||||
regexp = re.compile(r"\[{0}<([\d.Ee+-]+)\]".format(feature))
|
||||
for i in range(len(xgdump)):
|
||||
for i, _ in enumerate(xgdump):
|
||||
m = re.findall(regexp, xgdump[i])
|
||||
values.extend(map(float, m))
|
||||
values.extend([float(x) for x in m])
|
||||
|
||||
n_unique = len(np.unique(values))
|
||||
bins = max(min(n_unique, bins) if bins is not None else n_unique, 1)
|
||||
@ -1734,9 +1727,7 @@ class Booster(object):
|
||||
|
||||
if as_pandas and PANDAS_INSTALLED:
|
||||
return DataFrame(nph, columns=['SplitValue', 'Count'])
|
||||
elif as_pandas and not PANDAS_INSTALLED:
|
||||
if as_pandas and not PANDAS_INSTALLED:
|
||||
sys.stderr.write(
|
||||
"Returning histogram as ndarray (as_pandas == True, but pandas is not installed).")
|
||||
return nph
|
||||
else:
|
||||
return nph
|
||||
|
||||
@ -8,7 +8,6 @@ import sys
|
||||
|
||||
class XGBoostLibraryNotFound(Exception):
|
||||
"""Error thrown by when xgboost is not found"""
|
||||
pass
|
||||
|
||||
|
||||
def find_lib_path():
|
||||
|
||||
@ -55,7 +55,6 @@ def plot_importance(booster, ax=None, height=0.2,
|
||||
-------
|
||||
ax : matplotlib Axes
|
||||
"""
|
||||
# TODO: move this to compat.py
|
||||
try:
|
||||
import matplotlib.pyplot as plt
|
||||
except ImportError:
|
||||
@ -70,11 +69,12 @@ def plot_importance(booster, ax=None, height=0.2,
|
||||
else:
|
||||
raise ValueError('tree must be Booster, XGBModel or dict instance')
|
||||
|
||||
if len(importance) == 0:
|
||||
if not importance:
|
||||
raise ValueError('Booster.get_score() results in empty')
|
||||
|
||||
tuples = [(k, importance[k]) for k in importance]
|
||||
if max_num_features is not None:
|
||||
# pylint: disable=invalid-unary-operand-type
|
||||
tuples = sorted(tuples, key=lambda x: x[1])[-max_num_features:]
|
||||
else:
|
||||
tuples = sorted(tuples, key=lambda x: x[1])
|
||||
|
||||
@ -3,9 +3,9 @@
|
||||
"""Scikit-Learn Wrapper interface for XGBoost."""
|
||||
from __future__ import absolute_import
|
||||
|
||||
import numpy as np
|
||||
import warnings
|
||||
import json
|
||||
import numpy as np
|
||||
from .core import Booster, DMatrix, XGBoostError
|
||||
from .training import train
|
||||
|
||||
@ -107,15 +107,15 @@ class XGBModel(XGBModelBase):
|
||||
importance_type: string, default "gain"
|
||||
The feature importance type for the feature_importances_ property: either "gain",
|
||||
"weight", "cover", "total_gain" or "total_cover".
|
||||
\*\*kwargs : dict, optional
|
||||
\\*\\*kwargs : dict, optional
|
||||
Keyword arguments for XGBoost Booster object. Full documentation of parameters can
|
||||
be found here: https://github.com/dmlc/xgboost/blob/master/doc/parameter.rst.
|
||||
Attempting to set a parameter via the constructor args and \*\*kwargs dict simultaneously
|
||||
Attempting to set a parameter via the constructor args and \\*\\*kwargs dict simultaneously
|
||||
will result in a TypeError.
|
||||
|
||||
.. note:: \*\*kwargs unsupported by scikit-learn
|
||||
.. note:: \\*\\*kwargs unsupported by scikit-learn
|
||||
|
||||
\*\*kwargs is unsupported by scikit-learn. We do not guarantee that parameters
|
||||
\\*\\*kwargs is unsupported by scikit-learn. We do not guarantee that parameters
|
||||
passed via this argument will interact properly with scikit-learn.
|
||||
|
||||
Note
|
||||
@ -597,7 +597,7 @@ class XGBModel(XGBModelBase):
|
||||
|
||||
|
||||
class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
# pylint: disable=missing-docstring,too-many-arguments,invalid-name
|
||||
# pylint: disable=missing-docstring,too-many-arguments,invalid-name,too-many-instance-attributes
|
||||
__doc__ = "Implementation of the scikit-learn API for XGBoost classification.\n\n" \
|
||||
+ '\n'.join(XGBModel.__doc__.split('\n')[2:])
|
||||
|
||||
@ -834,7 +834,6 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
validate_features=validate_features)
|
||||
if self.objective == "multi:softprob":
|
||||
return class_probs
|
||||
else:
|
||||
classone_probs = class_probs
|
||||
classzero_probs = 1.0 - classone_probs
|
||||
return np.vstack((classzero_probs, classone_probs)).transpose()
|
||||
@ -1008,15 +1007,15 @@ class XGBRanker(XGBModel):
|
||||
missing : float, optional
|
||||
Value in the data which needs to be present as a missing value. If
|
||||
None, defaults to np.nan.
|
||||
\*\*kwargs : dict, optional
|
||||
\\*\\*kwargs : dict, optional
|
||||
Keyword arguments for XGBoost Booster object. Full documentation of parameters can
|
||||
be found here: https://github.com/dmlc/xgboost/blob/master/doc/parameter.rst.
|
||||
Attempting to set a parameter via the constructor args and \*\*kwargs dict
|
||||
Attempting to set a parameter via the constructor args and \\*\\*kwargs dict
|
||||
simultaneously will result in a TypeError.
|
||||
|
||||
.. note:: \*\*kwargs unsupported by scikit-learn
|
||||
.. note:: \\*\\*kwargs unsupported by scikit-learn
|
||||
|
||||
\*\*kwargs is unsupported by scikit-learn. We do not guarantee that parameters
|
||||
\\*\\*kwargs is unsupported by scikit-learn. We do not guarantee that parameters
|
||||
passed via this argument will interact properly with scikit-learn.
|
||||
|
||||
Note
|
||||
@ -1073,7 +1072,7 @@ class XGBRanker(XGBModel):
|
||||
random_state=random_state, seed=seed, missing=missing, **kwargs)
|
||||
if callable(self.objective):
|
||||
raise ValueError("custom objective function not supported by XGBRanker")
|
||||
elif "rank:" not in self.objective:
|
||||
if "rank:" not in self.objective:
|
||||
raise ValueError("please use XGBRanker for ranking task")
|
||||
|
||||
def fit(self, X, y, group, sample_weight=None, eval_set=None, sample_weight_eval_set=None,
|
||||
@ -1158,9 +1157,9 @@ class XGBRanker(XGBModel):
|
||||
if eval_set is not None:
|
||||
if eval_group is None:
|
||||
raise ValueError("eval_group is required if eval_set is not None")
|
||||
elif len(eval_group) != len(eval_set):
|
||||
if len(eval_group) != len(eval_set):
|
||||
raise ValueError("length of eval_group should match that of eval_set")
|
||||
elif any(group is None for group in eval_group):
|
||||
if any(group is None for group in eval_group):
|
||||
raise ValueError("group is required for all eval datasets for ranking task")
|
||||
|
||||
def _dmat_init(group, **params):
|
||||
|
||||
@ -49,7 +49,7 @@ def _train_internal(params, dtrain,
|
||||
|
||||
# Distributed code: Load the checkpoint from rabit.
|
||||
version = bst.load_rabit_checkpoint()
|
||||
assert(rabit.get_world_size() != 1 or version == 0)
|
||||
assert rabit.get_world_size() != 1 or version == 0
|
||||
rank = rabit.get_rank()
|
||||
start_iteration = int(version / 2)
|
||||
nboost += start_iteration
|
||||
@ -75,12 +75,12 @@ def _train_internal(params, dtrain,
|
||||
bst.save_rabit_checkpoint()
|
||||
version += 1
|
||||
|
||||
assert(rabit.get_world_size() == 1 or version == rabit.version_number())
|
||||
assert rabit.get_world_size() == 1 or version == rabit.version_number()
|
||||
|
||||
nboost += 1
|
||||
evaluation_result_list = []
|
||||
# check evaluation result.
|
||||
if len(evals) != 0:
|
||||
if evals:
|
||||
bst_eval_set = bst.eval_set(evals, i, feval)
|
||||
if isinstance(bst_eval_set, STRING_TYPES):
|
||||
msg = bst_eval_set
|
||||
@ -402,7 +402,7 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
|
||||
else:
|
||||
params = dict((k, v) for k, v in params.items())
|
||||
|
||||
if len(metrics) == 0 and 'eval_metric' in params:
|
||||
if (not metrics) and 'eval_metric' in params:
|
||||
if isinstance(params['eval_metric'], list):
|
||||
metrics = params['eval_metric']
|
||||
else:
|
||||
@ -462,7 +462,7 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
|
||||
rank=0,
|
||||
evaluation_result_list=res))
|
||||
except EarlyStopException as e:
|
||||
for k in results.keys():
|
||||
for k in results:
|
||||
results[k] = results[k][:(e.best_iteration + 1)]
|
||||
break
|
||||
if as_pandas:
|
||||
|
||||
@ -1,6 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [ ${TASK} == "lint" ]; then
|
||||
source activate python3
|
||||
conda install numpy scipy
|
||||
python -m pip install cpplint pylint astroid
|
||||
make lint || exit -1
|
||||
echo "Check documentations..."
|
||||
|
||||
|
||||
@ -1,7 +1,19 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [ ${TASK} == "lint" ]; then
|
||||
pip install --user cpplint 'pylint==1.4.4' 'astroid==1.3.6'
|
||||
if [ ${TRAVIS_OS_NAME} == "osx" ]; then
|
||||
wget -O conda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
|
||||
else
|
||||
wget -O conda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
|
||||
fi
|
||||
bash conda.sh -b -p $HOME/miniconda
|
||||
export PATH="$HOME/miniconda/bin:$PATH"
|
||||
hash -r
|
||||
conda config --set always_yes yes --set changeps1 no
|
||||
conda update -q conda
|
||||
# Useful for debugging any issues with conda
|
||||
conda info -a
|
||||
conda create -n python3 python=3.7
|
||||
fi
|
||||
|
||||
|
||||
@ -18,6 +30,6 @@ if [ ${TASK} == "python_test" ] || [ ${TASK} == "python_lightweight_test" ] || [
|
||||
conda update -q conda
|
||||
# Useful for debugging any issues with conda
|
||||
conda info -a
|
||||
conda create -n python3 python=3.5
|
||||
conda create -n python3 python=3.7
|
||||
conda create -n python2 python=2.7
|
||||
fi
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user