From 59b91cf205f294ead9242b1895cae4ebb4b466c7 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 3 Jul 2015 20:36:41 -0700
Subject: [PATCH] make  python lint

---
 wrapper/setup.py   |   5 +-
 wrapper/xgboost.py | 427 +++++++++++++++++++++++++++------------------
 2 files changed, 263 insertions(+), 169 deletions(-)

diff --git a/wrapper/setup.py b/wrapper/setup.py
index 52bf1cf82..5365d61b0 100644
--- a/wrapper/setup.py
+++ b/wrapper/setup.py
@@ -1,9 +1,12 @@
+# pylint: disable=invalid-name
+"""Setup xgboost package."""
 import os
 import platform
 from setuptools import setup
 
 
 class XGBoostLibraryNotFound(Exception):
+    """Exception to raise when xgboost library cannot be found."""
     pass
 
 
@@ -15,7 +18,7 @@ if os.name == 'nt':
         dll_path.append(os.path.join(curr_dir, '../windows/x64/Release/'))
     else:
         dll_path.append(os.path.join(curr_dir, '../windows/Release/'))
-        
+
 
 if os.name == 'nt':
     dll_path = [os.path.join(p, 'xgboost_wrapper.dll') for p in dll_path]
diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py
index 0280d87b3..c21545b4c 100644
--- a/wrapper/xgboost.py
+++ b/wrapper/xgboost.py
@@ -6,7 +6,7 @@ Version: 0.40
 Authors: Tianqi Chen, Bing Xu
 Early stopping by Zygmunt Zając
 """
-
+# pylint: disable=too-many-arguments, too-many-locals, too-many-lines
 from __future__ import absolute_import
 
 import os
@@ -28,20 +28,25 @@ except ImportError:
     SKLEARN_INSTALLED = False
 
 class XGBoostLibraryNotFound(Exception):
+    """Error throwed by when xgboost is not found"""
     pass
 
 class XGBoostError(Exception):
+    """Error throwed by xgboost trainer."""
     pass
 
 __all__ = ['DMatrix', 'CVPack', 'Booster', 'aggcv', 'cv', 'mknfold', 'train']
 
 if sys.version_info[0] == 3:
-    string_types = str,
+    # pylint: disable=invalid-name
+    STRING_TYPES = str,
 else:
-    string_types = basestring,
+    # pylint: disable=invalid-name
+    STRING_TYPES = basestring,
 
 
 def load_xglib():
+    """Load the xgboost library."""
     curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
     dll_path = [curr_path]
     if os.name == 'nt':
@@ -55,7 +60,8 @@ def load_xglib():
         dll_path = [os.path.join(p, 'libxgboostwrapper.so') for p in dll_path]
     lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]
     if len(dll_path) == 0:
-        raise XGBoostLibraryNotFound('cannot find find the files in the candicate path ' + str(dll_path))
+        raise XGBoostLibraryNotFound(
+            'cannot find find the files in the candicate path ' + str(dll_path))
     lib = ctypes.cdll.LoadLibrary(lib_path[0])
 
     # DMatrix functions
@@ -79,12 +85,11 @@ def load_xglib():
     return lib
 
 # load the XGBoost library globally
-xglib = load_xglib()
+_LIB = load_xglib()
 
 
 def ctypes2numpy(cptr, length, dtype):
-    """
-    Convert a ctypes pointer array to a numpy array.
+    """Convert a ctypes pointer array to a numpy array.
     """
     if not isinstance(cptr, ctypes.POINTER(ctypes.c_float)):
         raise RuntimeError('expected float pointer')
@@ -95,6 +100,7 @@ def ctypes2numpy(cptr, length, dtype):
 
 
 def ctypes2buffer(cptr, length):
+    """Convert ctypes pointer to buffer type."""
     if not isinstance(cptr, ctypes.POINTER(ctypes.c_char)):
         raise RuntimeError('expected char pointer')
     res = bytearray(length)
@@ -105,14 +111,17 @@ def ctypes2buffer(cptr, length):
 
 
 def c_str(string):
+    """Convert a python string to cstring."""
     return ctypes.c_char_p(string.encode('utf-8'))
 
 
 def c_array(ctype, values):
+    """Convert a python string to c array."""
     return (ctype * len(values))(*values)
 
 
 class DMatrix(object):
+    """Data Matrix used in XGBoost."""
     def __init__(self, data, label=None, missing=0.0, weight=None, silent=False):
         """
         Data matrix used in XGBoost.
@@ -135,8 +144,8 @@ class DMatrix(object):
         if data is None:
             self.handle = None
             return
-        if isinstance(data, string_types):
-            self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromFile(c_str(data), int(silent)))
+        if isinstance(data, STRING_TYPES):
+            self.handle = ctypes.c_void_p(_LIB.XGDMatrixCreateFromFile(c_str(data), int(silent)))
         elif isinstance(data, scipy.sparse.csr_matrix):
             self._init_from_csr(data)
         elif isinstance(data, scipy.sparse.csc_matrix):
@@ -160,7 +169,7 @@ class DMatrix(object):
         """
         if len(csr.indices) != len(csr.data):
             raise ValueError('length mismatch: {} vs {}'.format(len(csr.indices), len(csr.data)))
-        self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromCSR(
+        self.handle = ctypes.c_void_p(_LIB.XGDMatrixCreateFromCSR(
             c_array(ctypes.c_ulong, csr.indptr),
             c_array(ctypes.c_uint, csr.indices),
             c_array(ctypes.c_float, csr.data),
@@ -172,7 +181,7 @@ class DMatrix(object):
         """
         if len(csc.indices) != len(csc.data):
             raise ValueError('length mismatch: {} vs {}'.format(len(csc.indices), len(csc.data)))
-        self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromCSC(
+        self.handle = ctypes.c_void_p(_LIB.XGDMatrixCreateFromCSC(
             c_array(ctypes.c_ulong, csc.indptr),
             c_array(ctypes.c_uint, csc.indices),
             c_array(ctypes.c_float, csc.data),
@@ -183,34 +192,77 @@ class DMatrix(object):
         Initialize data from a 2-D numpy matrix.
         """
         data = np.array(mat.reshape(mat.size), dtype=np.float32)
-        self.handle = ctypes.c_void_p(xglib.XGDMatrixCreateFromMat(
+        self.handle = ctypes.c_void_p(_LIB.XGDMatrixCreateFromMat(
             data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
             mat.shape[0], mat.shape[1], ctypes.c_float(missing)))
 
     def __del__(self):
-        xglib.XGDMatrixFree(self.handle)
+        _LIB.XGDMatrixFree(self.handle)
 
     def get_float_info(self, field):
+        """Get float property from the DMatrix.
+
+        Parameters
+        ----------
+        field: str
+            The field name of the information
+
+        Returns
+        -------
+        info : array
+            a numpy array of float information of the data
+        """
         length = ctypes.c_ulong()
-        ret = xglib.XGDMatrixGetFloatInfo(self.handle, c_str(field), ctypes.byref(length))
+        ret = _LIB.XGDMatrixGetFloatInfo(self.handle, c_str(field), ctypes.byref(length))
         return ctypes2numpy(ret, length.value, np.float32)
 
     def get_uint_info(self, field):
+        """Get unsigned integer property from the DMatrix.
+
+        Parameters
+        ----------
+        field: str
+            The field name of the information
+
+        Returns
+        -------
+        info : array
+            a numpy array of float information of the data
+        """
         length = ctypes.c_ulong()
-        ret = xglib.XGDMatrixGetUIntInfo(self.handle, c_str(field), ctypes.byref(length))
+        ret = _LIB.XGDMatrixGetUIntInfo(self.handle, c_str(field), ctypes.byref(length))
         return ctypes2numpy(ret, length.value, np.uint32)
 
     def set_float_info(self, field, data):
-        xglib.XGDMatrixSetFloatInfo(self.handle, c_str(field),
-                                    c_array(ctypes.c_float, data), len(data))
+        """Set float type property into the DMatrix.
+
+        Parameters
+        ----------
+        field: str
+            The field name of the information
+
+        data: numpy array
+            The array ofdata to be set
+        """
+        _LIB.XGDMatrixSetFloatInfo(self.handle, c_str(field),
+                                   c_array(ctypes.c_float, data), len(data))
 
     def set_uint_info(self, field, data):
-        xglib.XGDMatrixSetUIntInfo(self.handle, c_str(field),
-                                   c_array(ctypes.c_uint, data), len(data))
+        """Set uint type property into the DMatrix.
+
+        Parameters
+        ----------
+        field: str
+            The field name of the information
+
+        data: numpy array
+            The array ofdata to be set
+        """
+        _LIB.XGDMatrixSetUIntInfo(self.handle, c_str(field),
+                                  c_array(ctypes.c_uint, data), len(data))
 
     def save_binary(self, fname, silent=True):
-        """
-        Save DMatrix to an XGBoost buffer.
+        """Save DMatrix to an XGBoost buffer.
 
         Parameters
         ----------
@@ -219,74 +271,74 @@ class DMatrix(object):
         silent : bool (optional; default: True)
             If set, the output is suppressed.
         """
-        xglib.XGDMatrixSaveBinary(self.handle, c_str(fname), int(silent))
+        _LIB.XGDMatrixSaveBinary(self.handle, c_str(fname), int(silent))
 
     def set_label(self, label):
-        """set label of dmatrix
-            Args:
-                label: list
-                       label for DMatrix
-            Returns:
-                None
+        """Set label of dmatrix
+
+        Parameters
+        ----------
+        label: array like
+            The label information to be set into DMatrix
         """
         self.set_float_info('label', label)
 
     def set_weight(self, weight):
-        """
-        Set weight of each instance.
+        """ Set weight of each instance.
 
         Parameters
         ----------
-        weight : float
-            Weight for positive instance.
+        weight : array like
+            Weight for each data point
         """
         self.set_float_info('weight', weight)
 
     def set_base_margin(self, margin):
-        """
-        set base margin of booster to start from
-        this can be used to specify a prediction value of
+        """ Set base margin of booster to start from.
+
+        This can be used to specify a prediction value of
         existing model to be base_margin
         However, remember margin is needed, instead of transformed prediction
         e.g. for logistic regression: need to put in value before logistic transformation
         see also example/demo.py
+
+        Parameters
+        ----------
+        margin: array like
+            Prediction margin of each datapoint
         """
         self.set_float_info('base_margin', margin)
 
     def set_group(self, group):
-        """
-        Set group size of DMatrix (used for ranking).
+        """Set group size of DMatrix (used for ranking).
 
         Parameters
         ----------
-        group : int
-            Group size.
+        group : array like
+            Group size of each group
         """
-        xglib.XGDMatrixSetGroup(self.handle, c_array(ctypes.c_uint, group), len(group))
+        _LIB.XGDMatrixSetGroup(self.handle, c_array(ctypes.c_uint, group), len(group))
 
     def get_label(self):
-        """
-        Get the label of the DMatrix.
+        """Get the label of the DMatrix.
 
         Returns
         -------
-        label : list
+        label : array
         """
         return self.get_float_info('label')
 
     def get_weight(self):
-        """
-        Get the weight of the DMatrix.
+        """Get the weight of the DMatrix.
 
         Returns
         -------
-        weight : float
+        weight : array
         """
         return self.get_float_info('weight')
 
     def get_base_margin(self):
-        """
-        Get the base margin of the DMatrix.
+        """Get the base margin of the DMatrix.
 
         Returns
         -------
@@ -295,18 +347,16 @@ class DMatrix(object):
         return self.get_float_info('base_margin')
 
     def num_row(self):
-        """
-        Get the number of rows in the DMatrix.
+        """Get the number of rows in the DMatrix.
 
         Returns
         -------
         number of rows : int
         """
-        return xglib.XGDMatrixNumRow(self.handle)
+        return _LIB.XGDMatrixNumRow(self.handle)
 
     def slice(self, rindex):
-        """
-        Slice the DMatrix and return a new DMatrix that only contains `rindex`.
+        """Slice the DMatrix and return a new DMatrix that only contains `rindex`.
 
         Parameters
         ----------
@@ -319,13 +369,15 @@ class DMatrix(object):
             A new DMatrix containing only selected indices.
         """
         res = DMatrix(None)
-        res.handle = ctypes.c_void_p(xglib.XGDMatrixSliceDMatrix(
+        res.handle = ctypes.c_void_p(_LIB.XGDMatrixSliceDMatrix(
             self.handle, c_array(ctypes.c_int, rindex), len(rindex)))
         return res
 
 
 class Booster(object):
+    """"A Booster of of XGBoost."""
     def __init__(self, params=None, cache=(), model_file=None):
+        # pylint: disable=invalid-name
         """
         Learner class.
 
@@ -342,14 +394,14 @@ class Booster(object):
             if not isinstance(d, DMatrix):
                 raise TypeError('invalid cache item: {}'.format(type(d).__name__))
         dmats = c_array(ctypes.c_void_p, [d.handle for d in cache])
-        self.handle = ctypes.c_void_p(xglib.XGBoosterCreate(dmats, len(cache)))
+        self.handle = ctypes.c_void_p(_LIB.XGBoosterCreate(dmats, len(cache)))
         self.set_param({'seed': 0})
         self.set_param(params or {})
         if model_file is not None:
             self.load_model(model_file)
 
     def __del__(self):
-        xglib.XGBoosterFree(self.handle)
+        _LIB.XGBoosterFree(self.handle)
 
     def __getstate__(self):
         # can't pickle ctypes pointers
@@ -367,10 +419,10 @@ class Booster(object):
         if handle is not None:
             buf = handle
             dmats = c_array(ctypes.c_void_p, [])
-            handle = ctypes.c_void_p(xglib.XGBoosterCreate(dmats, 0))
+            handle = ctypes.c_void_p(_LIB.XGBoosterCreate(dmats, 0))
             length = ctypes.c_ulong(len(buf))
             ptr = (ctypes.c_char * len(buf)).from_buffer(buf)
-            xglib.XGBoosterLoadModelFromBuffer(handle, ptr, length)
+            _LIB.XGBoosterLoadModelFromBuffer(handle, ptr, length)
             state['handle'] = handle
         self.__dict__.update(state)
         self.set_param({'seed': 0})
@@ -379,11 +431,10 @@ class Booster(object):
         return self.__deepcopy__()
 
     def __deepcopy__(self):
-        return Booster(model_file = self.save_raw())
+        return Booster(model_file=self.save_raw())
 
     def copy(self):
-        """
-        Copy the booster object
+        """Copy the booster object.
 
         Returns
         --------
@@ -391,15 +442,16 @@ class Booster(object):
         """
         return self.__copy__()
 
-    def set_param(self, params, pv=None):
+    def set_param(self, params, value=None):
+        """Set parameters into the DMatrix."""
         if isinstance(params, collections.Mapping):
             params = params.items()
-        elif isinstance(params, string_types) and pv is not None:
-            params = [(params, pv)]
-        for k, v in params:
-            xglib.XGBoosterSetParam(self.handle, c_str(k), c_str(str(v)))
+        elif isinstance(params, STRING_TYPES) and value is not None:
+            params = [(params, value)]
+        for key, val in params:
+            _LIB.XGBoosterSetParam(self.handle, c_str(key), c_str(str(val)))
 
-    def update(self, dtrain, it, fobj=None):
+    def update(self, dtrain, iteration, fobj=None):
         """
         Update (one iteration).
 
@@ -407,7 +459,7 @@ class Booster(object):
         ----------
         dtrain : DMatrix
             Training data.
-        it : int
+        iteration : int
             Current iteration number.
         fobj : function
             Customized objective function.
@@ -415,7 +467,7 @@ class Booster(object):
         if not isinstance(dtrain, DMatrix):
             raise TypeError('invalid training matrix: {}'.format(type(dtrain).__name__))
         if fobj is None:
-            xglib.XGBoosterUpdateOneIter(self.handle, it, dtrain.handle)
+            _LIB.XGBoosterUpdateOneIter(self.handle, iteration, dtrain.handle)
         else:
             pred = self.predict(dtrain)
             grad, hess = fobj(pred, dtrain)
@@ -438,20 +490,20 @@ class Booster(object):
             raise ValueError('grad / hess length mismatch: {} / {}'.format(len(grad), len(hess)))
         if not isinstance(dtrain, DMatrix):
             raise TypeError('invalid training matrix: {}'.format(type(dtrain).__name__))
-        xglib.XGBoosterBoostOneIter(self.handle, dtrain.handle,
-                                    c_array(ctypes.c_float, grad),
-                                    c_array(ctypes.c_float, hess),
-                                    len(grad))
+        _LIB.XGBoosterBoostOneIter(self.handle, dtrain.handle,
+                                   c_array(ctypes.c_float, grad),
+                                   c_array(ctypes.c_float, hess),
+                                   len(grad))
 
-    def eval_set(self, evals, it=0, feval=None):
-        """
-        Evaluate by a metric.
+    def eval_set(self, evals, iteration=0, feval=None):
+        # pylint: disable=invalid-name
+        """Evaluate  a set of data.
 
         Parameters
         ----------
         evals : list of tuples (DMatrix, string)
             List of items to be evaluated.
-        it : int
+        iteration : int
             Current iteration.
         feval : function
             Custom evaluation function.
@@ -464,20 +516,35 @@ class Booster(object):
             for d in evals:
                 if not isinstance(d[0], DMatrix):
                     raise TypeError('expected DMatrix, got {}'.format(type(d[0]).__name__))
-                if not isinstance(d[1], string_types):
+                if not isinstance(d[1], STRING_TYPES):
                     raise TypeError('expected string, got {}'.format(type(d[1]).__name__))
             dmats = c_array(ctypes.c_void_p, [d[0].handle for d in evals])
             evnames = c_array(ctypes.c_char_p, [c_str(d[1]) for d in evals])
-            return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals))
+            return _LIB.XGBoosterEvalOneIter(self.handle, iteration, dmats, evnames, len(evals))
         else:
-            res = '[%d]' % it
-            for dm, evname in evals:
-                name, val = feval(self.predict(dm), dm)
+            res = '[%d]' % iteration
+            for dmat, evname in evals:
+                name, val = feval(self.predict(dmat), dmat)
                 res += '\t%s-%s:%f' % (evname, name, val)
             return res
 
-    def eval(self, mat, name='eval', it=0):
-        return self.eval_set([(mat, name)], it)
+    def eval(self, data, name='eval', iteration=0):
+        """Evaluate the model on mat.
+
+
+        Parameters
+        ---------
+        data : DMatrix
+            The dmatrix storing the input.
+
+        name : str (default = 'eval')
+            The name of the dataset
+
+
+        iteration : int (default = 0)
+            The current iteration number
+        """
+        return self.eval_set([(data, name)], iteration)
 
     def predict(self, data, output_margin=False, ntree_limit=0, pred_leaf=False):
         """
@@ -492,10 +559,13 @@ class Booster(object):
         ----------
         data : DMatrix
             The dmatrix storing the input.
+
         output_margin : bool
             Whether to output the raw untransformed margin value.
+
         ntree_limit : int
             Limit number of trees in the prediction; defaults to 0 (use all trees).
+
         pred_leaf : bool
             When this option is on, the output will be a matrix of (nsample, ntrees)
             with each record indicating the predicted leaf index of each sample in each tree.
@@ -512,8 +582,8 @@ class Booster(object):
         if pred_leaf:
             option_mask |= 0x02
         length = ctypes.c_ulong()
-        preds = xglib.XGBoosterPredict(self.handle, data.handle,
-                                       option_mask, ntree_limit, ctypes.byref(length))
+        preds = _LIB.XGBoosterPredict(self.handle, data.handle,
+                                      option_mask, ntree_limit, ctypes.byref(length))
         preds = ctypes2numpy(preds, length.value, np.float32)
         if pred_leaf:
             preds = preds.astype(np.int32)
@@ -531,8 +601,8 @@ class Booster(object):
         fname : string
             Output file name
         """
-        if isinstance(fname, string_types):  # assume file name
-            xglib.XGBoosterSaveModel(self.handle, c_str(fname))
+        if isinstance(fname, STRING_TYPES):  # assume file name
+            _LIB.XGBoosterSaveModel(self.handle, c_str(fname))
         else:
             raise TypeError("fname must be a string")
 
@@ -545,8 +615,8 @@ class Booster(object):
         a in memory buffer represetation of the model
         """
         length = ctypes.c_ulong()
-        cptr = xglib.XGBoosterGetModelRaw(self.handle,
-                                          ctypes.byref(length))
+        cptr = _LIB.XGBoosterGetModelRaw(self.handle,
+                                         ctypes.byref(length))
         return ctypes2buffer(cptr, length.value)
 
     def load_model(self, fname):
@@ -559,59 +629,63 @@ class Booster(object):
             Input file name or memory buffer(see also save_raw)
         """
         if isinstance(fname, str):  # assume file name
-            xglib.XGBoosterLoadModel(self.handle, c_str(fname))
+            _LIB.XGBoosterLoadModel(self.handle, c_str(fname))
         else:
             buf = fname
             length = ctypes.c_ulong(len(buf))
             ptr = (ctypes.c_char * len(buf)).from_buffer(buf)
-            xglib.XGBoosterLoadModelFromBuffer(self.handle, ptr, length)
+            _LIB.XGBoosterLoadModelFromBuffer(self.handle, ptr, length)
 
-    def dump_model(self, fo, fmap='', with_stats=False):
+    def dump_model(self, fout, fmap='', with_stats=False):
         """
         Dump model into a text file.
 
         Parameters
         ----------
-        fo : string
+        foout : string
             Output file name.
         fmap : string, optional
             Name of the file containing feature map names.
         with_stats : bool (optional)
             Controls whether the split statistics are output.
         """
-        if isinstance(fo, string_types):
-            fo = open(fo, 'w')
+        if isinstance(fout, STRING_TYPES):
+            fout = open(fout, 'w')
             need_close = True
         else:
             need_close = False
         ret = self.get_dump(fmap, with_stats)
         for i in range(len(ret)):
-            fo.write('booster[{}]:\n'.format(i))
-            fo.write(ret[i])
+            fout.write('booster[{}]:\n'.format(i))
+            fout.write(ret[i])
         if need_close:
-            fo.close()
+            fout.close()
 
     def get_dump(self, fmap='', with_stats=False):
         """
         Returns the dump the model as a list of strings.
         """
         length = ctypes.c_ulong()
-        sarr = xglib.XGBoosterDumpModel(self.handle, c_str(fmap),
-                                        int(with_stats), ctypes.byref(length))
+        sarr = _LIB.XGBoosterDumpModel(self.handle, c_str(fmap),
+                                       int(with_stats), ctypes.byref(length))
         res = []
         for i in range(length.value):
             res.append(str(sarr[i].decode('ascii')))
         return res
 
     def get_fscore(self, fmap=''):
-        """
-        Get feature importance of each feature.
+        """Get feature importance of each feature.
+
+        Parameters
+        ----------
+        fmap: str (optional)
+           The name of feature map file
         """
         trees = self.get_dump(fmap)
         fmap = {}
         for tree in trees:
-            for l in tree.split('\n'):
-                arr = l.split('[')
+            for line in tree.split('\n'):
+                arr = line.split('[')
                 if len(arr) == 1:
                     continue
                 fid = arr[1].split(']')[0]
@@ -624,9 +698,9 @@ class Booster(object):
 
 
 def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
-    early_stopping_rounds=None,evals_result=None):
-    """
-    Train a booster with given parameters.
+          early_stopping_rounds=None, evals_result=None):
+    # pylint: disable=too-many-statements,too-many-branches, attribute-defined-outside-init
+    """Train a booster with given parameters.
 
     Parameters
     ----------
@@ -663,7 +737,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
     bst = Booster(params, [dtrain] + [d[0] for d in evals])
 
     if evals_result is not None:
-        if type(evals_result) is not dict:
+        if isinstance(evals_result, dict):
             raise TypeError('evals_result has to be a dictionary')
         else:
             evals_name = [d[1] for d in evals]
@@ -675,37 +749,38 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
             bst.update(dtrain, i, obj)
             if len(evals) != 0:
                 bst_eval_set = bst.eval_set(evals, i, feval)
-                if isinstance(bst_eval_set, string_types):
+                if isinstance(bst_eval_set, STRING_TYPES):
                     msg = bst_eval_set
                 else:
                     msg = bst_eval_set.decode()
 
                 sys.stderr.write(msg + '\n')
                 if evals_result is not None:
-                    res = re.findall(":([0-9.]+).",msg)
-                    for key,val in zip(evals_name,res):
+                    res = re.findall(":([0-9.]+).", msg)
+                    for key, val in zip(evals_name, res):
                         evals_result[key].append(val)
         return bst
 
     else:
         # early stopping
-
         if len(evals) < 1:
             raise ValueError('For early stopping you need at least one set in evals.')
 
-        sys.stderr.write("Will train until {} error hasn't decreased in {} rounds.\n".format(evals[-1][1], early_stopping_rounds))
+        sys.stderr.write("Will train until {} error hasn't decreased in {} rounds.\n".format(\
+                evals[-1][1], early_stopping_rounds))
 
         # is params a list of tuples? are we using multiple eval metrics?
-        if type(params) == list:
+        if isinstance(params, list):
             if len(params) != len(dict(params).items()):
-                raise ValueError('Check your params. Early stopping works with single eval metric only.')
+                raise ValueError('Check your params.'\
+                                     'Early stopping works with single eval metric only.')
             params = dict(params)
 
         # either minimize loss or maximize AUC/MAP/NDCG
         maximize_score = False
         if 'eval_metric' in params:
             maximize_metrics = ('auc', 'map', 'ndcg')
-            if list(filter(lambda x: params['eval_metric'].startswith(x), maximize_metrics)):
+            if any(params['eval_metric'].startswith(x) for x in maximize_metrics):
                 maximize_score = True
 
         if maximize_score:
@@ -720,7 +795,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
             bst.update(dtrain, i, obj)
             bst_eval_set = bst.eval_set(evals, i, feval)
 
-            if isinstance(bst_eval_set, string_types):
+            if isinstance(bst_eval_set, STRING_TYPES):
                 msg = bst_eval_set
             else:
                 msg = bst_eval_set.decode()
@@ -728,8 +803,8 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
             sys.stderr.write(msg + '\n')
 
             if evals_result is not None:
-                res = re.findall(":([0-9.]+).",msg)
-                for key,val in zip(evals_name,res):
+                res = re.findall(":([0-9.]+).", msg)
+                for key, val in zip(evals_name, res):
                     evals_result[key].append(val)
 
             score = float(msg.rsplit(':', 1)[1])
@@ -748,17 +823,21 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
         return bst
 
 class CVPack(object):
+    """"Auxiliary datastruct to hold one fold of CV."""
     def __init__(self, dtrain, dtest, param):
+        """"Initialize the CVPack"""
         self.dtrain = dtrain
         self.dtest = dtest
         self.watchlist = [(dtrain, 'train'), (dtest, 'test')]
         self.bst = Booster(param, [dtrain, dtest])
 
-    def update(self, r, fobj):
-        self.bst.update(self.dtrain, r, fobj)
+    def update(self, iteration, fobj):
+        """"Update the boosters for one iteration"""
+        self.bst.update(self.dtrain, iteration, fobj)
 
-    def eval(self, r, feval):
-        return self.bst.eval_set(self.watchlist, r, feval)
+    def eval(self, iteration, feval):
+        """"Evaluate the CVPack for one iteration."""
+        return self.bst.eval_set(self.watchlist, iteration, feval)
 
 
 def mknfold(dall, nfold, param, seed, evals=(), fpreproc=None):
@@ -785,6 +864,7 @@ def mknfold(dall, nfold, param, seed, evals=(), fpreproc=None):
 
 
 def aggcv(rlist, show_stdv=True):
+    # pylint: disable=invalid-name
     """
     Aggregate cross-validation results.
     """
@@ -794,7 +874,7 @@ def aggcv(rlist, show_stdv=True):
         arr = line.split()
         assert ret == arr[0]
         for it in arr[1:]:
-            if not isinstance(it, string_types):
+            if not isinstance(it, STRING_TYPES):
                 it = it.decode()
             k, v = it.split(':')
             if k not in cvmap:
@@ -802,7 +882,7 @@ def aggcv(rlist, show_stdv=True):
             cvmap[k].append(float(v))
     for k, v in sorted(cvmap.items(), key=lambda x: x[0]):
         v = np.array(v)
-        if not isinstance(ret, string_types):
+        if not isinstance(ret, STRING_TYPES):
             ret = ret.decode()
         if show_stdv:
             ret += '\tcv-%s:%f+%f' % (k, np.mean(v), np.std(v))
@@ -813,8 +893,8 @@ def aggcv(rlist, show_stdv=True):
 
 def cv(params, dtrain, num_boost_round=10, nfold=3, metrics=(),
        obj=None, feval=None, fpreproc=None, show_stdv=True, seed=0):
-    """
-    Cross-validation with given paramaters.
+    # pylint: disable = invalid-name
+    """Cross-validation with given paramaters.
 
     Parameters
     ----------
@@ -847,8 +927,8 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, metrics=(),
     results = []
     cvfolds = mknfold(dtrain, nfold, params, seed, metrics, fpreproc)
     for i in range(num_boost_round):
-        for f in cvfolds:
-            f.update(i, obj)
+        for fold in cvfolds:
+            fold.update(i, obj)
         res = aggcv([f.eval(i, feval) for f in cvfolds], show_stdv)
         sys.stderr.write(res + '\n')
         results.append(res)
@@ -857,16 +937,16 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, metrics=(),
 
 # used for compatiblity without sklearn
 XGBModelBase = object
-XGBClassifier = object
-XGBRegressor = object
+XGBClassifierBase = object
+XGBRegressorBase = object
 if SKLEARN_INSTALLED:
     XGBModelBase = BaseEstimator
-    XGBRegressor = RegressorMixin
-    XGBClassifier = ClassifierMixin
+    XGBRegressorBase = RegressorMixin
+    XGBClassifierBase = ClassifierMixin
 
 class XGBModel(XGBModelBase):
-    """
-    Implementation of the Scikit-Learn API for XGBoost.
+    # pylint: disable=too-many-arguments, too-many-instance-attributes, invalid-name
+    """Implementation of the Scikit-Learn API for XGBoost.
 
     Parameters
     ----------
@@ -902,8 +982,10 @@ class XGBModel(XGBModelBase):
         Value in the data which needs to be present as a missing value. If
         None, defaults to np.nan.
     """
-    def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective="reg:linear",
-                 nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1,
+    def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100,
+                 silent=True, objective="reg:linear",
+                 nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0,
+                 subsample=1, colsample_bytree=1,
                  base_score=0.5, seed=0, missing=None):
         if not SKLEARN_INSTALLED:
             raise XGBoostError('sklearn needs to be installed in order to use this module')
@@ -923,7 +1005,6 @@ class XGBModel(XGBModelBase):
         self.base_score = base_score
         self.seed = seed
         self.missing = missing if missing is not None else np.nan
-
         self._Booster = None
 
     def __setstate__(self, state):
@@ -936,9 +1017,9 @@ class XGBModel(XGBModelBase):
         self.__dict__.update(state)
 
     def booster(self):
-        """
-        get the underlying xgboost Booster of this model
-        will raise an exception when fit was not called
+        """Get the underlying xgboost Booster of this model.
+
+        This will raise an exception when fit was not called
 
         Returns
         -------
@@ -949,12 +1030,14 @@ class XGBModel(XGBModelBase):
         return self._Booster
 
     def get_params(self, deep=False):
+        """Get parameter.s"""
         params = super(XGBModel, self).get_params(deep=deep)
         if params['missing'] is np.nan:
             params['missing'] = None  # sklearn doesn't handle nan. see #4725
         return params
 
     def get_xgb_params(self):
+        """Get xgboost type parameters."""
         xgb_params = self.get_params()
 
         xgb_params['silent'] = 1 if self.silent else 0
@@ -963,30 +1046,39 @@ class XGBModel(XGBModelBase):
             xgb_params.pop('nthread', None)
         return xgb_params
 
-    def fit(self, X, y):
-        trainDmatrix = DMatrix(X, label=y, missing=self.missing)
-        self._Booster = train(self.get_xgb_params(), trainDmatrix, self.n_estimators)
+    def fit(self, data, y):
+        # pylint: disable=missing-docstring,invalid-name
+        train_dmatrix = DMatrix(data, label=y, missing=self.missing)
+        self._Booster = train(self.get_xgb_params(), train_dmatrix, self.n_estimators)
         return self
 
-    def predict(self, X):
-        testDmatrix = DMatrix(X, missing=self.missing)
-        return self.booster().predict(testDmatrix)
+    def predict(self, data):
+        # pylint: disable=missing-docstring,invalid-name
+        test_dmatrix = DMatrix(data, missing=self.missing)
+        return self.booster().predict(test_dmatrix)
 
 
-class XGBClassifier(XGBModel, XGBClassifier):
+class XGBClassifier(XGBModel, XGBClassifierBase):
+    # pylint: disable=missing-docstring,too-many-arguments,invalid-name
     __doc__ = """
     Implementation of the scikit-learn API for XGBoost classification
     """ + "\n".join(XGBModel.__doc__.split('\n')[2:])
 
-    def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective="binary:logistic",
-                 nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1,
+    def __init__(self, max_depth=3, learning_rate=0.1,
+                 n_estimators=100, silent=True,
+                 objective="binary:logistic",
+                 nthread=-1, gamma=0, min_child_weight=1,
+                 max_delta_step=0, subsample=1, colsample_bytree=1,
                  base_score=0.5, seed=0, missing=None):
-        super(XGBClassifier, self).__init__(max_depth, learning_rate, n_estimators, silent, objective,
-                                            nthread, gamma, min_child_weight, max_delta_step, subsample,
+        super(XGBClassifier, self).__init__(max_depth, learning_rate,
+                                            n_estimators, silent, objective,
+                                            nthread, gamma, min_child_weight,
+                                            max_delta_step, subsample,
                                             colsample_bytree,
                                             base_score, seed, missing)
 
     def fit(self, X, y, sample_weight=None):
+        # pylint: disable = attribute-defined-outside-init,arguments-differ
         self.classes_ = list(np.unique(y))
         self.n_classes_ = len(self.classes_)
         if self.n_classes_ > 2:
@@ -1001,29 +1093,29 @@ class XGBClassifier(XGBModel, XGBClassifier):
         training_labels = self._le.transform(y)
 
         if sample_weight is not None:
-            trainDmatrix = DMatrix(X, label=training_labels, weight=sample_weight,
-                                   missing=self.missing)
+            train_dmatrix = DMatrix(X, label=training_labels, weight=sample_weight,
+                                    missing=self.missing)
         else:
-            trainDmatrix = DMatrix(X, label=training_labels,
-                                   missing=self.missing)
+            train_dmatrix = DMatrix(X, label=training_labels,
+                                    missing=self.missing)
 
-        self._Booster = train(xgb_options, trainDmatrix, self.n_estimators)
+        self._Booster = train(xgb_options, train_dmatrix, self.n_estimators)
 
         return self
 
-    def predict(self, X):
-        testDmatrix = DMatrix(X, missing=self.missing)
-        class_probs = self.booster().predict(testDmatrix)
+    def predict(self, data):
+        test_dmatrix = DMatrix(data, missing=self.missing)
+        class_probs = self.booster().predict(test_dmatrix)
         if len(class_probs.shape) > 1:
             column_indexes = np.argmax(class_probs, axis=1)
         else:
-            column_indexes = np.repeat(0, X.shape[0])
+            column_indexes = np.repeat(0, data.shape[0])
             column_indexes[class_probs > 0.5] = 1
         return self._le.inverse_transform(column_indexes)
 
-    def predict_proba(self, X):
-        testDmatrix = DMatrix(X, missing=self.missing)
-        class_probs = self.booster().predict(testDmatrix)
+    def predict_proba(self, data):
+        test_dmatrix = DMatrix(data, missing=self.missing)
+        class_probs = self.booster().predict(test_dmatrix)
         if self.objective == "multi:softprob":
             return class_probs
         else:
@@ -1031,9 +1123,8 @@ class XGBClassifier(XGBModel, XGBClassifier):
             classzero_probs = 1.0 - classone_probs
             return np.vstack((classzero_probs, classone_probs)).transpose()
 
-class XGBRegressor(XGBModel, XGBRegressor):
+class XGBRegressor(XGBModel, XGBRegressorBase):
+    # pylint: disable=missing-docstring
     __doc__ = """
     Implementation of the scikit-learn API for XGBoost regression
     """ + "\n".join(XGBModel.__doc__.split('\n')[2:])
-
-    pass