Enable flake8
This commit is contained in:
@@ -4,7 +4,7 @@ from __future__ import absolute_import
|
||||
import sys
|
||||
import os
|
||||
from setuptools import setup, find_packages
|
||||
#import subprocess
|
||||
# import subprocess
|
||||
sys.path.insert(0, '.')
|
||||
|
||||
CURRENT_DIR = os.path.dirname(__file__)
|
||||
@@ -18,12 +18,12 @@ exec(compile(open(libpath_py, "rb").read(), libpath_py, 'exec'), libpath, libpat
|
||||
|
||||
LIB_PATH = libpath['find_lib_path']()
|
||||
print("Install libxgboost from: %s" % LIB_PATH)
|
||||
#Please use setup_pip.py for generating and deploying pip installation
|
||||
#detailed instruction in setup_pip.py
|
||||
# Please use setup_pip.py for generating and deploying pip installation
|
||||
# detailed instruction in setup_pip.py
|
||||
setup(name='xgboost',
|
||||
version=open(os.path.join(CURRENT_DIR, 'xgboost/VERSION')).read().strip(),
|
||||
#version='0.4a23',
|
||||
description = "XGBoost Python Package",
|
||||
# version='0.4a23',
|
||||
description="XGBoost Python Package",
|
||||
long_description=open(os.path.join(CURRENT_DIR, 'README.rst')).read(),
|
||||
install_requires=[
|
||||
'numpy',
|
||||
@@ -33,8 +33,8 @@ setup(name='xgboost',
|
||||
maintainer_email='phunter.lau@gmail.com',
|
||||
zip_safe=False,
|
||||
packages=find_packages(),
|
||||
#this will use MANIFEST.in during install where we specify additional files,
|
||||
#this is the golden line
|
||||
# this will use MANIFEST.in during install where we specify additional files,
|
||||
# this is the golden line
|
||||
include_package_data=True,
|
||||
data_files=[('xgboost', LIB_PATH)],
|
||||
url='https://github.com/dmlc/xgboost')
|
||||
|
||||
@@ -4,14 +4,14 @@ from __future__ import absolute_import
|
||||
import sys
|
||||
import os
|
||||
from setuptools import setup, find_packages
|
||||
#import subprocess
|
||||
# import subprocess
|
||||
sys.path.insert(0, '.')
|
||||
|
||||
#this script is for packing and shipping pip installation
|
||||
#it builds xgboost code on the fly and packs for pip
|
||||
#please don't use this file for installing from github
|
||||
# this script is for packing and shipping pip installation
|
||||
# it builds xgboost code on the fly and packs for pip
|
||||
# please don't use this file for installing from github
|
||||
|
||||
if os.name != 'nt': #if not windows, compile and install
|
||||
if os.name != 'nt': # if not windows, compile and install
|
||||
os.system('sh ./xgboost/build-python.sh')
|
||||
else:
|
||||
print('Windows users please use github installation.')
|
||||
@@ -28,12 +28,12 @@ exec(compile(open(libpath_py, "rb").read(), libpath_py, 'exec'), libpath, libpat
|
||||
|
||||
LIB_PATH = libpath['find_lib_path']()
|
||||
|
||||
#to deploy to pip, please use
|
||||
#make pythonpack
|
||||
#python setup.py register sdist upload
|
||||
#and be sure to test it firstly using "python setup.py register sdist upload -r pypitest"
|
||||
# to deploy to pip, please use
|
||||
# make pythonpack
|
||||
# python setup.py register sdist upload
|
||||
# and be sure to test it firstly using "python setup.py register sdist upload -r pypitest"
|
||||
setup(name='xgboost',
|
||||
#version=open(os.path.join(CURRENT_DIR, 'xgboost/VERSION')).read().strip(),
|
||||
# version=open(os.path.join(CURRENT_DIR, 'xgboost/VERSION')).read().strip(),
|
||||
version='0.4a30',
|
||||
description=open(os.path.join(CURRENT_DIR, 'README.rst')).read(),
|
||||
install_requires=[
|
||||
@@ -44,15 +44,15 @@ setup(name='xgboost',
|
||||
maintainer_email='phunter.lau@gmail.com',
|
||||
zip_safe=False,
|
||||
packages=find_packages(),
|
||||
#don't need this and don't use this, give everything to MANIFEST.in
|
||||
#package_dir = {'':'xgboost'},
|
||||
#package_data = {'': ['*.txt','*.md','*.sh'],
|
||||
# don't need this and don't use this, give everything to MANIFEST.in
|
||||
# package_dir = {'':'xgboost'},
|
||||
# package_data = {'': ['*.txt','*.md','*.sh'],
|
||||
# }
|
||||
#this will use MANIFEST.in during install where we specify additional files,
|
||||
#this is the golden line
|
||||
# this will use MANIFEST.in during install where we specify additional files,
|
||||
# this is the golden line
|
||||
include_package_data=True,
|
||||
#!!! don't use data_files for creating pip installation,
|
||||
#otherwise install_data process will copy it to
|
||||
#root directory for some machines, and cause confusions on building
|
||||
#data_files=[('xgboost', LIB_PATH)],
|
||||
# !!! don't use data_files for creating pip installation,
|
||||
# otherwise install_data process will copy it to
|
||||
# root directory for some machines, and cause confusions on building
|
||||
# data_files=[('xgboost', LIB_PATH)],
|
||||
url='https://github.com/dmlc/xgboost')
|
||||
|
||||
@@ -10,7 +10,7 @@ import os
|
||||
|
||||
from .core import DMatrix, Booster
|
||||
from .training import train, cv
|
||||
from . import rabit
|
||||
from . import rabit # noqa
|
||||
try:
|
||||
from .sklearn import XGBModel, XGBClassifier, XGBRegressor
|
||||
from .plotting import plot_importance, plot_tree, to_graphviz
|
||||
|
||||
@@ -12,11 +12,21 @@ PY3 = (sys.version_info[0] == 3)
|
||||
if PY3:
|
||||
# pylint: disable=invalid-name, redefined-builtin
|
||||
STRING_TYPES = str,
|
||||
py_str = lambda x: x.decode('utf-8')
|
||||
|
||||
def py_str(x):
|
||||
return x.decode('utf-8')
|
||||
else:
|
||||
# pylint: disable=invalid-name
|
||||
STRING_TYPES = basestring,
|
||||
py_str = lambda x: x
|
||||
|
||||
def py_str(x):
|
||||
return x
|
||||
|
||||
try:
|
||||
import cPickle as pickle # noqa
|
||||
except ImportError:
|
||||
import pickle # noqa
|
||||
|
||||
|
||||
# pandas
|
||||
try:
|
||||
@@ -34,7 +44,7 @@ except ImportError:
|
||||
try:
|
||||
from sklearn.base import BaseEstimator
|
||||
from sklearn.base import RegressorMixin, ClassifierMixin
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
from sklearn.preprocessing import LabelEncoder # noqa
|
||||
from sklearn.cross_validation import KFold, StratifiedKFold
|
||||
SKLEARN_INSTALLED = True
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ from .libpath import find_lib_path
|
||||
|
||||
from .compat import STRING_TYPES, PY3, DataFrame, py_str
|
||||
|
||||
|
||||
class XGBoostError(Exception):
|
||||
"""Error throwed by xgboost trainer."""
|
||||
pass
|
||||
@@ -82,6 +83,7 @@ def _load_lib():
|
||||
# load the XGBoost library globally
|
||||
_LIB = _load_lib()
|
||||
|
||||
|
||||
def _check_call(ret):
|
||||
"""Check the return value of C API call
|
||||
|
||||
@@ -129,7 +131,6 @@ def c_array(ctype, values):
|
||||
return (ctype * len(values))(*values)
|
||||
|
||||
|
||||
|
||||
PANDAS_DTYPE_MAPPER = {'int8': 'int', 'int16': 'int', 'int32': 'int', 'int64': 'int',
|
||||
'uint8': 'int', 'uint16': 'int', 'uint32': 'int', 'uint64': 'int',
|
||||
'float16': 'float', 'float32': 'float', 'float64': 'float',
|
||||
@@ -144,8 +145,12 @@ def _maybe_pandas_data(data, feature_names, feature_types):
|
||||
|
||||
data_dtypes = data.dtypes
|
||||
if not all(dtype.name in PANDAS_DTYPE_MAPPER for dtype in data_dtypes):
|
||||
bad_fields = [data.columns[i] for i, dtype in enumerate(data_dtypes) if dtype.name not in PANDAS_DTYPE_MAPPER ]
|
||||
raise ValueError('DataFrame.dtypes for data must be int, float or bool.\nDid not expect the data types in fie lds '+', '.join(bad_fields))
|
||||
bad_fields = [data.columns[i] for i, dtype in
|
||||
enumerate(data_dtypes) if dtype.name not in PANDAS_DTYPE_MAPPER]
|
||||
|
||||
msg = """DataFrame.dtypes for data must be int, float or bool.
|
||||
Did not expect the data types in fields """
|
||||
raise ValueError(msg + ', '.join(bad_fields))
|
||||
|
||||
if feature_names is None:
|
||||
feature_names = data.columns.format()
|
||||
@@ -174,6 +179,7 @@ def _maybe_pandas_label(label):
|
||||
|
||||
return label
|
||||
|
||||
|
||||
class DMatrix(object):
|
||||
"""Data Matrix used in XGBoost.
|
||||
|
||||
@@ -1041,8 +1047,14 @@ class Booster(object):
|
||||
if self.feature_names != data.feature_names:
|
||||
dat_missing = set(self.feature_names) - set(data.feature_names)
|
||||
my_missing = set(data.feature_names) - set(self.feature_names)
|
||||
|
||||
msg = 'feature_names mismatch: {0} {1}'
|
||||
if dat_missing: msg +='\nexpected ' + ', '.join(str(s) for s in dat_missing) +' in input data'
|
||||
if my_missing: msg +='\ntraining data did not have the following fields: ' + ', '.join(str(s) for s in my_missing)
|
||||
|
||||
if dat_missing:
|
||||
msg += '\nexpected ' + ', '.join(str(s) for s in dat_missing) + ' in input data'
|
||||
|
||||
if my_missing:
|
||||
msg += '\ntraining data did not have the following fields: ' + ', '.join(str(s) for s in my_missing)
|
||||
|
||||
raise ValueError(msg.format(self.feature_names,
|
||||
data.feature_names))
|
||||
|
||||
@@ -36,7 +36,8 @@ def find_lib_path():
|
||||
else:
|
||||
dll_path = [os.path.join(p, 'libxgboost.so') for p in dll_path]
|
||||
lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]
|
||||
#From github issues, most of installation errors come from machines w/o compilers
|
||||
|
||||
# From github issues, most of installation errors come from machines w/o compilers
|
||||
if len(lib_path) == 0 and not os.environ.get('XGBOOST_BUILD_DOC', False):
|
||||
raise XGBoostLibraryNotFound(
|
||||
'Cannot find XGBoost Libarary in the candicate path, ' +
|
||||
|
||||
@@ -10,6 +10,7 @@ import numpy as np
|
||||
from .core import Booster
|
||||
from .sklearn import XGBModel
|
||||
|
||||
|
||||
def plot_importance(booster, ax=None, height=0.2,
|
||||
xlim=None, ylim=None, title='Feature importance',
|
||||
xlabel='F score', ylabel='Features',
|
||||
@@ -105,6 +106,7 @@ _LEAFPAT = re.compile(r'(\d+):(leaf=.+)')
|
||||
_EDGEPAT = re.compile(r'yes=(\d+),no=(\d+),missing=(\d+)')
|
||||
_EDGEPAT2 = re.compile(r'yes=(\d+),no=(\d+)')
|
||||
|
||||
|
||||
def _parse_node(graph, text):
|
||||
"""parse dumped node"""
|
||||
match = _NODEPAT.match(text)
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
"""Distributed XGBoost Rabit related API."""
|
||||
from __future__ import absolute_import
|
||||
import sys
|
||||
import atexit
|
||||
import ctypes
|
||||
import numpy as np
|
||||
|
||||
from .core import _LIB, c_str, STRING_TYPES
|
||||
from .compat import pickle
|
||||
|
||||
|
||||
def _init_rabit():
|
||||
"""internal libary initializer."""
|
||||
@@ -15,6 +16,7 @@ def _init_rabit():
|
||||
_LIB.RabitIsDistributed.restype = ctypes.c_int
|
||||
_LIB.RabitVersionNumber.restype = ctypes.c_int
|
||||
|
||||
|
||||
def init(args=None):
|
||||
"""Initialize the rabit libary with arguments"""
|
||||
if args is None:
|
||||
@@ -73,6 +75,7 @@ def tracker_print(msg):
|
||||
sys.stdout.write(msg)
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def get_processor_name():
|
||||
"""Get the processor name.
|
||||
|
||||
@@ -127,14 +130,14 @@ def broadcast(data, root):
|
||||
|
||||
# enumeration of dtypes
|
||||
DTYPE_ENUM__ = {
|
||||
np.dtype('int8') : 0,
|
||||
np.dtype('uint8') : 1,
|
||||
np.dtype('int32') : 2,
|
||||
np.dtype('uint32') : 3,
|
||||
np.dtype('int64') : 4,
|
||||
np.dtype('uint64') : 5,
|
||||
np.dtype('float32') : 6,
|
||||
np.dtype('float64') : 7
|
||||
np.dtype('int8'): 0,
|
||||
np.dtype('uint8'): 1,
|
||||
np.dtype('int32'): 2,
|
||||
np.dtype('uint32'): 3,
|
||||
np.dtype('int64'): 4,
|
||||
np.dtype('uint64'): 5,
|
||||
np.dtype('float32'): 6,
|
||||
np.dtype('float64'): 7
|
||||
}
|
||||
|
||||
|
||||
@@ -175,6 +178,7 @@ def allreduce(data, op, prepare_fun=None):
|
||||
op, None, None)
|
||||
else:
|
||||
func_ptr = ctypes.CFUNCTYPE(None, ctypes.c_void_p)
|
||||
|
||||
def pfunc(args):
|
||||
"""prepare function."""
|
||||
prepare_fun(data)
|
||||
|
||||
@@ -366,7 +366,6 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
self.classes_ = np.unique(y)
|
||||
self.n_classes_ = len(self.classes_)
|
||||
|
||||
|
||||
xgb_options = self.get_xgb_params()
|
||||
|
||||
if callable(self.objective):
|
||||
|
||||
@@ -6,12 +6,12 @@ from __future__ import absolute_import
|
||||
|
||||
import sys
|
||||
import re
|
||||
import os
|
||||
import numpy as np
|
||||
from .core import Booster, STRING_TYPES
|
||||
from .compat import (SKLEARN_INSTALLED, XGBStratifiedKFold, XGBKFold)
|
||||
from .core import Booster, STRING_TYPES, XGBoostError
|
||||
from .compat import (SKLEARN_INSTALLED, XGBStratifiedKFold)
|
||||
from . import rabit
|
||||
|
||||
|
||||
def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
||||
maximize=False, early_stopping_rounds=None, evals_result=None,
|
||||
verbose_eval=True, learning_rates=None, xgb_model=None):
|
||||
@@ -97,7 +97,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
||||
verbose_eval = True if verbose_eval_every_line > 0 else False
|
||||
|
||||
if rabit.get_rank() != 0:
|
||||
verbose_eval = False;
|
||||
verbose_eval = False
|
||||
|
||||
if xgb_model is not None:
|
||||
if not isinstance(xgb_model, STRING_TYPES):
|
||||
@@ -135,8 +135,9 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
||||
if isinstance(params, list):
|
||||
if len(params) != len(dict(params).items()):
|
||||
params = dict(params)
|
||||
rabit.tracker_print("Multiple eval metrics have been passed: " \
|
||||
"'{0}' will be used for early stopping.\n\n".format(params['eval_metric']))
|
||||
msg = ("Multiple eval metrics have been passed: "
|
||||
"'{0}' will be used for early stopping.\n\n")
|
||||
rabit.tracker_print(msg.format(params['eval_metric']))
|
||||
else:
|
||||
params = dict(params)
|
||||
|
||||
@@ -173,7 +174,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
||||
|
||||
# Distributed code: need to resume to this point.
|
||||
# Skip the first update if it is a recovery step.
|
||||
if version % 2 == 0:
|
||||
if version % 2 == 0:
|
||||
bst.update(dtrain, i, obj)
|
||||
bst.save_rabit_checkpoint()
|
||||
version += 1
|
||||
@@ -203,7 +204,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
||||
evals_idx = evals_name.index(key)
|
||||
res_per_eval = len(res) // len(evals_name)
|
||||
for r in range(res_per_eval):
|
||||
res_item = res[(evals_idx*res_per_eval) + r]
|
||||
res_item = res[(evals_idx * res_per_eval) + r]
|
||||
res_key = res_item[0]
|
||||
res_val = res_item[1]
|
||||
if res_key in evals_result[key]:
|
||||
@@ -224,7 +225,8 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
||||
elif i - best_iteration >= early_stopping_rounds:
|
||||
best_msg = bst.attr('best_msg')
|
||||
if verbose_eval:
|
||||
rabit.tracker_print("Stopping. Best iteration:\n{}\n\n".format(best_msg))
|
||||
msg = "Stopping. Best iteration:\n{}\n\n"
|
||||
rabit.tracker_print(msg.format(best_msg))
|
||||
break
|
||||
# do checkpoint after evaluation, in case evaluation also updates booster.
|
||||
bst.save_rabit_checkpoint()
|
||||
@@ -290,6 +292,7 @@ def mknfold(dall, nfold, param, seed, evals=(), fpreproc=None, stratified=False,
|
||||
ret.append(CVPack(dtrain, dtest, plst))
|
||||
return ret
|
||||
|
||||
|
||||
def aggcv(rlist, show_stdv=True, verbose_eval=None, as_pandas=True, trial=0):
|
||||
# pylint: disable=invalid-name
|
||||
"""
|
||||
@@ -405,8 +408,8 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
|
||||
-------
|
||||
evaluation history : list(string)
|
||||
"""
|
||||
if stratified == True and not SKLEARN_INSTALLED:
|
||||
raise XGBoostError('sklearn needs to be installed in order to use stratified cv')
|
||||
if stratified is True and not SKLEARN_INSTALLED:
|
||||
raise XGBoostError('sklearn needs to be installed in order to use stratified cv')
|
||||
|
||||
if isinstance(metrics, str):
|
||||
metrics = [metrics]
|
||||
@@ -417,7 +420,7 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
|
||||
if 'eval_metric' in params:
|
||||
params['eval_metric'] = _metrics
|
||||
else:
|
||||
params= dict((k, v) for k, v in params.items())
|
||||
params = dict((k, v) for k, v in params.items())
|
||||
|
||||
if len(metrics) == 0 and 'eval_metric' in params:
|
||||
if isinstance(params['eval_metric'], list):
|
||||
@@ -428,12 +431,14 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
|
||||
params.pop("eval_metric", None)
|
||||
|
||||
if early_stopping_rounds is not None:
|
||||
|
||||
if len(metrics) > 1:
|
||||
raise ValueError('Check your params. '\
|
||||
'Early stopping works with single eval metric only.')
|
||||
msg = ('Check your params. '
|
||||
'Early stopping works with single eval metric only.')
|
||||
raise ValueError(msg)
|
||||
if verbose_eval:
|
||||
sys.stderr.write("Will train until cv error hasn't decreased in {} rounds.\n".format(\
|
||||
early_stopping_rounds))
|
||||
msg = "Will train until cv error hasn't decreased in {} rounds.\n"
|
||||
sys.stderr.write(msg.format(early_stopping_rounds))
|
||||
|
||||
maximize_score = False
|
||||
if len(metrics) == 1:
|
||||
@@ -466,10 +471,10 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
|
||||
best_score = score
|
||||
best_score_i = i
|
||||
elif i - best_score_i >= early_stopping_rounds:
|
||||
results = results[:best_score_i+1]
|
||||
results = results[:best_score_i + 1]
|
||||
if verbose_eval:
|
||||
sys.stderr.write("Stopping. Best iteration:\n[{}] cv-mean:{}\tcv-std:{}\n".
|
||||
format(best_score_i, results[-1][0], results[-1][1]))
|
||||
msg = "Stopping. Best iteration:\n[{}] cv-mean:{}\tcv-std:{}\n"
|
||||
sys.stderr.write(msg.format(best_score_i, results[-1][0], results[-1][1]))
|
||||
break
|
||||
if as_pandas:
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user