Enable flake8

2016-04-24 16:34:46 +09:00
parent b3c9e6a0db
commit 8fc2456c87
19 changed files with 282 additions and 199 deletions
--- a/python-package/setup.py
+++ b/python-package/setup.py
@@ -4,7 +4,7 @@ from __future__ import absolute_import
 import sys
 import os
 from setuptools import setup, find_packages
-#import subprocess
+# import subprocess
 sys.path.insert(0, '.')

 CURRENT_DIR = os.path.dirname(__file__)
@@ -18,12 +18,12 @@ exec(compile(open(libpath_py, "rb").read(), libpath_py, 'exec'), libpath, libpat

 LIB_PATH = libpath['find_lib_path']()
 print("Install libxgboost from: %s" % LIB_PATH)
-#Please use setup_pip.py for generating and deploying pip installation
-#detailed instruction in setup_pip.py
+# Please use setup_pip.py for generating and deploying pip installation
+# detailed instruction in setup_pip.py
 setup(name='xgboost',
      version=open(os.path.join(CURRENT_DIR, 'xgboost/VERSION')).read().strip(),
-      #version='0.4a23',
-      description = "XGBoost Python Package",
+      # version='0.4a23',
+      description="XGBoost Python Package",
      long_description=open(os.path.join(CURRENT_DIR, 'README.rst')).read(),
      install_requires=[
          'numpy',
@@ -33,8 +33,8 @@ setup(name='xgboost',
      maintainer_email='phunter.lau@gmail.com',
      zip_safe=False,
      packages=find_packages(),
-      #this will use MANIFEST.in during install where we specify additional files,
-      #this is the golden line
+      # this will use MANIFEST.in during install where we specify additional files,
+      # this is the golden line
      include_package_data=True,
      data_files=[('xgboost', LIB_PATH)],
      url='https://github.com/dmlc/xgboost')
--- a/python-package/setup_pip.py
+++ b/python-package/setup_pip.py
@@ -4,14 +4,14 @@ from __future__ import absolute_import
 import sys
 import os
 from setuptools import setup, find_packages
-#import subprocess
+# import subprocess
 sys.path.insert(0, '.')

-#this script is for packing and shipping pip installation
-#it builds xgboost code on the fly and packs for pip
-#please don't use this file for installing from github
+# this script is for packing and shipping pip installation
+# it builds xgboost code on the fly and packs for pip
+# please don't use this file for installing from github

-if os.name != 'nt': #if not windows, compile and install
+if os.name != 'nt':     # if not windows, compile and install
    os.system('sh ./xgboost/build-python.sh')
 else:
    print('Windows users please use github installation.')
@@ -28,12 +28,12 @@ exec(compile(open(libpath_py, "rb").read(), libpath_py, 'exec'), libpath, libpat

 LIB_PATH = libpath['find_lib_path']()

-#to deploy to pip, please use
-#make pythonpack
-#python setup.py register sdist upload
-#and be sure to test it firstly using "python setup.py register sdist upload -r pypitest"
+# to deploy to pip, please use
+# make pythonpack
+# python setup.py register sdist upload
+# and be sure to test it firstly using "python setup.py register sdist upload -r pypitest"
 setup(name='xgboost',
-      #version=open(os.path.join(CURRENT_DIR, 'xgboost/VERSION')).read().strip(),
+      # version=open(os.path.join(CURRENT_DIR, 'xgboost/VERSION')).read().strip(),
      version='0.4a30',
      description=open(os.path.join(CURRENT_DIR, 'README.rst')).read(),
      install_requires=[
@@ -44,15 +44,15 @@ setup(name='xgboost',
      maintainer_email='phunter.lau@gmail.com',
      zip_safe=False,
      packages=find_packages(),
-      #don't need this and don't use this, give everything to MANIFEST.in
-      #package_dir = {'':'xgboost'},
-      #package_data = {'': ['*.txt','*.md','*.sh'],
+      # don't need this and don't use this, give everything to MANIFEST.in
+      # package_dir = {'':'xgboost'},
+      # package_data = {'': ['*.txt','*.md','*.sh'],
      #               }
-      #this will use MANIFEST.in during install where we specify additional files,
-      #this is the golden line
+      # this will use MANIFEST.in during install where we specify additional files,
+      # this is the golden line
      include_package_data=True,
-      #!!! don't use data_files for creating pip installation,
-      #otherwise install_data process will copy it to
-      #root directory for some machines, and cause confusions on building
-      #data_files=[('xgboost', LIB_PATH)],
+      # !!! don't use data_files for creating pip installation,
+      # otherwise install_data process will copy it to
+      # root directory for some machines, and cause confusions on building
+      # data_files=[('xgboost', LIB_PATH)],
      url='https://github.com/dmlc/xgboost')
--- a/python-package/xgboost/init.py
+++ b/python-package/xgboost/init.py
@@ -10,7 +10,7 @@ import os

 from .core import DMatrix, Booster
 from .training import train, cv
-from . import rabit
+from . import rabit                   # noqa
 try:
    from .sklearn import XGBModel, XGBClassifier, XGBRegressor
    from .plotting import plot_importance, plot_tree, to_graphviz
--- a/python-package/xgboost/compat.py
+++ b/python-package/xgboost/compat.py
@@ -12,11 +12,21 @@ PY3 = (sys.version_info[0] == 3)
 if PY3:
    # pylint: disable=invalid-name, redefined-builtin
    STRING_TYPES = str,
-    py_str = lambda x: x.decode('utf-8')
+
+    def py_str(x):
+        return x.decode('utf-8')
 else:
    # pylint: disable=invalid-name
    STRING_TYPES = basestring,
-    py_str = lambda x: x
+
+    def py_str(x):
+        return x
+
+try:
+    import cPickle as pickle   # noqa
+except ImportError:
+    import pickle              # noqa
+

 # pandas
 try:
@@ -34,7 +44,7 @@ except ImportError:
 try:
    from sklearn.base import BaseEstimator
    from sklearn.base import RegressorMixin, ClassifierMixin
-    from sklearn.preprocessing import LabelEncoder
+    from sklearn.preprocessing import LabelEncoder                # noqa
    from sklearn.cross_validation import KFold, StratifiedKFold
    SKLEARN_INSTALLED = True

--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@@ -14,6 +14,7 @@ from .libpath import find_lib_path

 from .compat import STRING_TYPES, PY3, DataFrame, py_str

+
 class XGBoostError(Exception):
    """Error throwed by xgboost trainer."""
    pass
@@ -82,6 +83,7 @@ def _load_lib():
 # load the XGBoost library globally
 _LIB = _load_lib()

+
 def _check_call(ret):
    """Check the return value of C API call

@@ -129,7 +131,6 @@ def c_array(ctype, values):
    return (ctype * len(values))(*values)


-
 PANDAS_DTYPE_MAPPER = {'int8': 'int', 'int16': 'int', 'int32': 'int', 'int64': 'int',
                       'uint8': 'int', 'uint16': 'int', 'uint32': 'int', 'uint64': 'int',
                       'float16': 'float', 'float32': 'float', 'float64': 'float',
@@ -144,8 +145,12 @@ def _maybe_pandas_data(data, feature_names, feature_types):

    data_dtypes = data.dtypes
    if not all(dtype.name in PANDAS_DTYPE_MAPPER for dtype in data_dtypes):
-        bad_fields = [data.columns[i] for i, dtype in enumerate(data_dtypes) if dtype.name not in PANDAS_DTYPE_MAPPER ]  
-        raise ValueError('DataFrame.dtypes for data must be int, float or bool.\nDid not expect the data types in fie    lds '+', '.join(bad_fields))
+        bad_fields = [data.columns[i] for i, dtype in
+                      enumerate(data_dtypes) if dtype.name not in PANDAS_DTYPE_MAPPER]
+
+        msg = """DataFrame.dtypes for data must be int, float or bool.
+Did not expect the data types in fields """
+        raise ValueError(msg + ', '.join(bad_fields))

    if feature_names is None:
        feature_names = data.columns.format()
@@ -174,6 +179,7 @@ def _maybe_pandas_label(label):

    return label

+
 class DMatrix(object):
    """Data Matrix used in XGBoost.

@@ -1041,8 +1047,14 @@ class Booster(object):
            if self.feature_names != data.feature_names:
                dat_missing = set(self.feature_names) - set(data.feature_names)
                my_missing = set(data.feature_names) - set(self.feature_names)
+
                msg = 'feature_names mismatch: {0} {1}'
-                if dat_missing: msg +='\nexpected ' + ', '.join(str(s) for s in dat_missing) +' in input data'
-                if my_missing: msg +='\ntraining data did not have the following fields: ' + ', '.join(str(s) for s in my_missing)
+
+                if dat_missing:
+                    msg += '\nexpected ' + ', '.join(str(s) for s in dat_missing) + ' in input data'
+
+                if my_missing:
+                    msg += '\ntraining data did not have the following fields: ' + ', '.join(str(s) for s in my_missing)
+
                raise ValueError(msg.format(self.feature_names,
                                            data.feature_names))
--- a/python-package/xgboost/libpath.py
+++ b/python-package/xgboost/libpath.py
@@ -36,7 +36,8 @@ def find_lib_path():
    else:
        dll_path = [os.path.join(p, 'libxgboost.so') for p in dll_path]
    lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]
-    #From github issues, most of installation errors come from machines w/o compilers
+
+    # From github issues, most of installation errors come from machines w/o compilers
    if len(lib_path) == 0 and not os.environ.get('XGBOOST_BUILD_DOC', False):
        raise XGBoostLibraryNotFound(
            'Cannot find XGBoost Libarary in the candicate path, ' +
--- a/python-package/xgboost/plotting.py
+++ b/python-package/xgboost/plotting.py
@@ -10,6 +10,7 @@ import numpy as np
 from .core import Booster
 from .sklearn import XGBModel

+
 def plot_importance(booster, ax=None, height=0.2,
                    xlim=None, ylim=None, title='Feature importance',
                    xlabel='F score', ylabel='Features',
@@ -105,6 +106,7 @@ _LEAFPAT = re.compile(r'(\d+):(leaf=.+)')
 _EDGEPAT = re.compile(r'yes=(\d+),no=(\d+),missing=(\d+)')
 _EDGEPAT2 = re.compile(r'yes=(\d+),no=(\d+)')

+
 def _parse_node(graph, text):
    """parse dumped node"""
    match = _NODEPAT.match(text)
--- a/python-package/xgboost/rabit.py
+++ b/python-package/xgboost/rabit.py
@@ -1,11 +1,12 @@
 """Distributed XGBoost Rabit related API."""
 from __future__ import absolute_import
 import sys
-import atexit
 import ctypes
 import numpy as np

 from .core import _LIB, c_str, STRING_TYPES
+from .compat import pickle
+

 def _init_rabit():
    """internal libary initializer."""
@@ -15,6 +16,7 @@ def _init_rabit():
        _LIB.RabitIsDistributed.restype = ctypes.c_int
        _LIB.RabitVersionNumber.restype = ctypes.c_int

+
 def init(args=None):
    """Initialize the rabit libary with arguments"""
    if args is None:
@@ -73,6 +75,7 @@ def tracker_print(msg):
        sys.stdout.write(msg)
        sys.stdout.flush()

+
 def get_processor_name():
    """Get the processor name.

@@ -127,14 +130,14 @@ def broadcast(data, root):

 # enumeration of dtypes
 DTYPE_ENUM__ = {
-    np.dtype('int8') : 0,
-    np.dtype('uint8') : 1,
-    np.dtype('int32') : 2,
-    np.dtype('uint32') : 3,
-    np.dtype('int64') : 4,
-    np.dtype('uint64') : 5,
-    np.dtype('float32') : 6,
-    np.dtype('float64') : 7
+    np.dtype('int8'): 0,
+    np.dtype('uint8'): 1,
+    np.dtype('int32'): 2,
+    np.dtype('uint32'): 3,
+    np.dtype('int64'): 4,
+    np.dtype('uint64'): 5,
+    np.dtype('float32'): 6,
+    np.dtype('float64'): 7
 }


@@ -175,6 +178,7 @@ def allreduce(data, op, prepare_fun=None):
                            op, None, None)
    else:
        func_ptr = ctypes.CFUNCTYPE(None, ctypes.c_void_p)
+
        def pfunc(args):
            """prepare function."""
            prepare_fun(data)
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -366,7 +366,6 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
        self.classes_ = np.unique(y)
        self.n_classes_ = len(self.classes_)

-
        xgb_options = self.get_xgb_params()

        if callable(self.objective):
--- a/python-package/xgboost/training.py
+++ b/python-package/xgboost/training.py
@@ -6,12 +6,12 @@ from __future__ import absolute_import

 import sys
 import re
-import os
 import numpy as np
-from .core import Booster, STRING_TYPES
-from .compat import (SKLEARN_INSTALLED, XGBStratifiedKFold, XGBKFold)
+from .core import Booster, STRING_TYPES, XGBoostError
+from .compat import (SKLEARN_INSTALLED, XGBStratifiedKFold)
 from . import rabit

+
 def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
          maximize=False, early_stopping_rounds=None, evals_result=None,
          verbose_eval=True, learning_rates=None, xgb_model=None):
@@ -97,7 +97,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
            verbose_eval = True if verbose_eval_every_line > 0 else False

    if rabit.get_rank() != 0:
-        verbose_eval = False;
+        verbose_eval = False

    if xgb_model is not None:
        if not isinstance(xgb_model, STRING_TYPES):
@@ -135,8 +135,9 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
        if isinstance(params, list):
            if len(params) != len(dict(params).items()):
                params = dict(params)
-                rabit.tracker_print("Multiple eval metrics have been passed: " \
-                                    "'{0}' will be used for early stopping.\n\n".format(params['eval_metric']))
+                msg = ("Multiple eval metrics have been passed: "
+                       "'{0}' will be used for early stopping.\n\n")
+                rabit.tracker_print(msg.format(params['eval_metric']))
            else:
                params = dict(params)

@@ -173,7 +174,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,

        # Distributed code: need to resume to this point.
        # Skip the first update if it is a recovery step.
-        if version % 2  == 0:
+        if version % 2 == 0:
            bst.update(dtrain, i, obj)
            bst.save_rabit_checkpoint()
            version += 1
@@ -203,7 +204,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
                    evals_idx = evals_name.index(key)
                    res_per_eval = len(res) // len(evals_name)
                    for r in range(res_per_eval):
-                        res_item = res[(evals_idx*res_per_eval) + r]
+                        res_item = res[(evals_idx * res_per_eval) + r]
                        res_key = res_item[0]
                        res_val = res_item[1]
                        if res_key in evals_result[key]:
@@ -224,7 +225,8 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
                elif i - best_iteration >= early_stopping_rounds:
                    best_msg = bst.attr('best_msg')
                    if verbose_eval:
-                        rabit.tracker_print("Stopping. Best iteration:\n{}\n\n".format(best_msg))
+                        msg = "Stopping. Best iteration:\n{}\n\n"
+                        rabit.tracker_print(msg.format(best_msg))
                    break
        # do checkpoint after evaluation, in case evaluation also updates booster.
        bst.save_rabit_checkpoint()
@@ -290,6 +292,7 @@ def mknfold(dall, nfold, param, seed, evals=(), fpreproc=None, stratified=False,
        ret.append(CVPack(dtrain, dtest, plst))
    return ret

+
 def aggcv(rlist, show_stdv=True, verbose_eval=None, as_pandas=True, trial=0):
    # pylint: disable=invalid-name
    """
@@ -405,8 +408,8 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
    -------
    evaluation history : list(string)
    """
-    if stratified == True and not SKLEARN_INSTALLED:
-            raise XGBoostError('sklearn needs to be installed in order to use stratified cv')
+    if stratified is True and not SKLEARN_INSTALLED:
+        raise XGBoostError('sklearn needs to be installed in order to use stratified cv')

    if isinstance(metrics, str):
        metrics = [metrics]
@@ -417,7 +420,7 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
        if 'eval_metric' in params:
            params['eval_metric'] = _metrics
    else:
-        params= dict((k, v) for k, v in params.items())
+        params = dict((k, v) for k, v in params.items())

    if len(metrics) == 0 and 'eval_metric' in params:
        if isinstance(params['eval_metric'], list):
@@ -428,12 +431,14 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
    params.pop("eval_metric", None)

    if early_stopping_rounds is not None:
+
        if len(metrics) > 1:
-            raise ValueError('Check your params. '\
-                                     'Early stopping works with single eval metric only.')
+            msg = ('Check your params. '
+                   'Early stopping works with single eval metric only.')
+            raise ValueError(msg)
        if verbose_eval:
-            sys.stderr.write("Will train until cv error hasn't decreased in {} rounds.\n".format(\
-                early_stopping_rounds))
+            msg = "Will train until cv error hasn't decreased in {} rounds.\n"
+            sys.stderr.write(msg.format(early_stopping_rounds))

        maximize_score = False
        if len(metrics) == 1:
@@ -466,10 +471,10 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
                best_score = score
                best_score_i = i
            elif i - best_score_i >= early_stopping_rounds:
-                results = results[:best_score_i+1]
+                results = results[:best_score_i + 1]
                if verbose_eval:
-                    sys.stderr.write("Stopping. Best iteration:\n[{}] cv-mean:{}\tcv-std:{}\n".
-                                     format(best_score_i, results[-1][0], results[-1][1]))
+                    msg = "Stopping. Best iteration:\n[{}] cv-mean:{}\tcv-std:{}\n"
+                    sys.stderr.write(msg.format(best_score_i, results[-1][0], results[-1][1]))
                break
    if as_pandas:
        try: