@@ -5,3 +5,10 @@ recursive-include xgboost/windows *
|
||||
recursive-include xgboost/subtree *
|
||||
recursive-include xgboost/src *
|
||||
recursive-include xgboost/multi-node *
|
||||
#exclude pre-compiled .o file for less confusions
|
||||
#include the pre-compiled .so is needed as a placeholder
|
||||
#since it will be copy after compiling on the fly
|
||||
global-exclude xgboost/wrapper/*.so.gz
|
||||
global-exclude xgboost/*.o
|
||||
global-exclude *.pyo
|
||||
global-exclude *.pyc
|
||||
|
||||
48
python-package/build_trouble_shooting.md
Normal file
48
python-package/build_trouble_shooting.md
Normal file
@@ -0,0 +1,48 @@
|
||||
XGBoost Python Package Troubleshooting
|
||||
======================
|
||||
Windows platform
|
||||
------------
|
||||
The current best solution for installing xgboost on windows machine is building from github. Please go to [windows](/windows/), build with the Visual Studio project file, and install. Additional detailed instruction can be found at this [installation tutorial](https://www.kaggle.com/c/otto-group-product-classification-challenge/forums/t/13043/run-xgboost-from-windows-and-python) from Kaggle Otto Forum.
|
||||
|
||||
`pip install xgboost` is **not** tested nor supported in windows platform for now.
|
||||
|
||||
Linux platform (also Mac OS X in general)
|
||||
------------
|
||||
**Trouble 0**: I see error messages like this when install from github using `python setup.py install`.
|
||||
|
||||
XGBoostLibraryNotFound: Cannot find XGBoost Libarary in the candicate path, did you install compilers and run build.sh in root path?
|
||||
List of candidates:
|
||||
/home/dmlc/anaconda/lib/python2.7/site-packages/xgboost-0.4-py2.7.egg/xgboost/libxgboostwrapper.so
|
||||
/home/dmlc/anaconda/lib/python2.7/site-packages/xgboost-0.4-py2.7.egg/xgboost/../../wrapper/libxgboostwrapper.so
|
||||
/home/dmlc/anaconda/lib/python2.7/site-packages/xgboost-0.4-py2.7.egg/xgboost/./wrapper/libxgboostwrapper.so
|
||||
|
||||
**Solution 0**: Please check if you have:
|
||||
|
||||
* installed C++ compilers, for example `g++` and `gcc` (Linux) or `clang LLVM` (Mac OS X). Recommended compilers are `g++-5` or newer (Linux and Mac), or `clang` comes with Xcode in Mac OS X. For installting compilers, please refer to your system package management commands, e.g. `apt-get` `yum` or `brew`(Mac).
|
||||
* compilers in your `$PATH`. Try typing `gcc` and see if your have it in your path.
|
||||
|
||||
**Trouble 1**: I see the same error message in **Trouble 0** when install from `pip install xgboost`.
|
||||
|
||||
**Solution 1**: the problem is the same as in **Trouble 0**, please see **Solution 0**.
|
||||
|
||||
**Trouble 2**: I see this error message when `pip install xgboost`. It says I have `libxgboostwrapper.so` but it is not valid.
|
||||
|
||||
OSError: /home/dmlc/anaconda/lib/python2.7/site-packages/xgboost/./wrapper/libxgboostwrapper.so: invalid ELF header
|
||||
|
||||
**Solution 2**: Solution is as in 0 and 1 by installing `g++` compiler. The reason for this rare error is that, `pip` ships with a pre-compiled `libxgboostwrapper.so` with Mac for placeholder for allowing `setup.py` to find the right lib path. If a system doesn't compile, it may refer to this placeholder lib and fail. This placeholder `libxgboostwrapper.so` will be automatically removed and correctly generated by the compiling on-the-fly for the system.
|
||||
|
||||
**Trouble 3**: My system's `pip` says it can't find a valid `xgboost` installation release on `PyPI`.
|
||||
**Solution 3**: Some linux system comes with an old `pip` version. Please update to the latest `pip` by following the official installation document at <http://pip.readthedocs.org/en/stable/installing/>
|
||||
|
||||
**Trouble 4**: I tried `python setup.py install` but it says `setuptools` import fail.
|
||||
**Solution 4**: Please make sure you have [setuptools](https://pypi.python.org/pypi/setuptools) before installing the python package.
|
||||
|
||||
Mac OS X (specific)
|
||||
------------
|
||||
Most of the troubles and solutions are the same with that in the Linux platform. Mac has the following specific problems.
|
||||
|
||||
**Trouble 0**: I successfully installed `xgboost` using github installation/using `pip install xgboost`. But it runs very slow with only single thread, what is going on?
|
||||
**Solution 0**: `clang LLVM` compiler on Mac OS X from Xcode doesn't support OpenMP multi-thread. An alternative choice is installing `homebrew` <http://brew.sh/> and `brew install g++-5` which provides multi-thread OpenMP support.
|
||||
|
||||
**Trouble 1**: Can I install `clang-omp` for supporting OpenMP without using `gcc`?
|
||||
**Solution 1**: it is not support and may have linking errors.
|
||||
@@ -2,21 +2,10 @@
|
||||
"""Setup xgboost package."""
|
||||
from __future__ import absolute_import
|
||||
import sys
|
||||
from setuptools import setup, find_packages
|
||||
import subprocess
|
||||
sys.path.insert(0, '.')
|
||||
|
||||
import os
|
||||
#build on the fly if install in pip
|
||||
#otherwise, use build.sh in the parent directory
|
||||
|
||||
if 'pip' in __file__:
|
||||
if not os.name == 'nt': #if not windows
|
||||
build_sh = subprocess.Popen(['sh', 'xgboost/build-python.sh'])
|
||||
build_sh.wait()
|
||||
output = build_sh.communicate()
|
||||
print(output)
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
#import subprocess
|
||||
sys.path.insert(0, '.')
|
||||
|
||||
CURRENT_DIR = os.path.dirname(__file__)
|
||||
|
||||
@@ -28,15 +17,12 @@ libpath = {'__file__': libpath_py}
|
||||
exec(compile(open(libpath_py, "rb").read(), libpath_py, 'exec'), libpath, libpath)
|
||||
|
||||
LIB_PATH = libpath['find_lib_path']()
|
||||
#print LIB_PATH
|
||||
|
||||
#to deploy to pip, please use
|
||||
#make pythonpack
|
||||
#python setup.py register sdist upload
|
||||
#and be sure to test it firstly using "python setup.py register sdist upload -r pypitest"
|
||||
#Please use setup_pip.py for generating and deploying pip installation
|
||||
#detailed instruction in setup_pip.py
|
||||
setup(name='xgboost',
|
||||
version=open(os.path.join(CURRENT_DIR, 'xgboost/VERSION')).read().strip(),
|
||||
#version='0.4a13',
|
||||
#version='0.4a23',
|
||||
description=open(os.path.join(CURRENT_DIR, 'README.md')).read(),
|
||||
install_requires=[
|
||||
'numpy',
|
||||
@@ -46,10 +32,6 @@ setup(name='xgboost',
|
||||
maintainer_email='phunter.lau@gmail.com',
|
||||
zip_safe=False,
|
||||
packages=find_packages(),
|
||||
#don't need this and don't use this, give everything to MANIFEST.in
|
||||
#package_dir = {'':'xgboost'},
|
||||
#package_data = {'': ['*.txt','*.md','*.sh'],
|
||||
# }
|
||||
#this will use MANIFEST.in during install where we specify additional files,
|
||||
#this is the golden line
|
||||
include_package_data=True,
|
||||
|
||||
58
python-package/setup_pip.py
Normal file
58
python-package/setup_pip.py
Normal file
@@ -0,0 +1,58 @@
|
||||
# pylint: disable=invalid-name, exec-used
|
||||
"""Setup xgboost package."""
|
||||
from __future__ import absolute_import
|
||||
import sys
|
||||
import os
|
||||
from setuptools import setup, find_packages
|
||||
#import subprocess
|
||||
sys.path.insert(0, '.')
|
||||
|
||||
#this script is for packing and shipping pip installation
|
||||
#it builds xgboost code on the fly and packs for pip
|
||||
#please don't use this file for installing from github
|
||||
|
||||
if not os.name == 'nt': #if not windows, compile and install
|
||||
os.system('sh ./xgboost/build-python.sh')
|
||||
else:
|
||||
print('Windows users please use github installation.')
|
||||
sys.exit()
|
||||
|
||||
CURRENT_DIR = os.path.dirname(__file__)
|
||||
|
||||
# We can not import `xgboost.libpath` in setup.py directly since xgboost/__init__.py
|
||||
# import `xgboost.core` and finally will import `numpy` and `scipy` which are setup
|
||||
# `install_requires`. That's why we're using `exec` here.
|
||||
libpath_py = os.path.join(CURRENT_DIR, 'xgboost/libpath.py')
|
||||
libpath = {'__file__': libpath_py}
|
||||
exec(compile(open(libpath_py, "rb").read(), libpath_py, 'exec'), libpath, libpath)
|
||||
|
||||
LIB_PATH = libpath['find_lib_path']()
|
||||
|
||||
#to deploy to pip, please use
|
||||
#make pythonpack
|
||||
#python setup.py register sdist upload
|
||||
#and be sure to test it firstly using "python setup.py register sdist upload -r pypitest"
|
||||
setup(name='xgboost',
|
||||
#version=open(os.path.join(CURRENT_DIR, 'xgboost/VERSION')).read().strip(),
|
||||
version='0.4a24',
|
||||
description=open(os.path.join(CURRENT_DIR, 'README.md')).read(),
|
||||
install_requires=[
|
||||
'numpy',
|
||||
'scipy',
|
||||
],
|
||||
maintainer='Hongliang Liu',
|
||||
maintainer_email='phunter.lau@gmail.com',
|
||||
zip_safe=False,
|
||||
packages=find_packages(),
|
||||
#don't need this and don't use this, give everything to MANIFEST.in
|
||||
#package_dir = {'':'xgboost'},
|
||||
#package_data = {'': ['*.txt','*.md','*.sh'],
|
||||
# }
|
||||
#this will use MANIFEST.in during install where we specify additional files,
|
||||
#this is the golden line
|
||||
include_package_data=True,
|
||||
#!!! don't use data_files for creating pip installation,
|
||||
#otherwise install_data process will copy it to
|
||||
#root directory for some machines, and cause confusions on building
|
||||
#data_files=[('xgboost', LIB_PATH)],
|
||||
url='https://github.com/dmlc/xgboost')
|
||||
@@ -11,6 +11,8 @@
|
||||
|
||||
|
||||
pushd xgboost
|
||||
#remove the pre-compiled .so and trigger the system's on-the-fly compiling
|
||||
make clean
|
||||
if make python; then
|
||||
echo "Successfully build multi-thread xgboost"
|
||||
else
|
||||
|
||||
@@ -249,7 +249,7 @@ class DMatrix(object):
|
||||
csr = scipy.sparse.csr_matrix(data)
|
||||
self._init_from_csr(csr)
|
||||
except:
|
||||
raise TypeError('can not intialize DMatrix from {}'.format(type(data).__name__))
|
||||
raise TypeError('can not initialize DMatrix from {}'.format(type(data).__name__))
|
||||
if label is not None:
|
||||
self.set_label(label)
|
||||
if weight is not None:
|
||||
|
||||
@@ -36,9 +36,10 @@ def find_lib_path():
|
||||
else:
|
||||
dll_path = [os.path.join(p, 'libxgboostwrapper.so') for p in dll_path]
|
||||
lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]
|
||||
#From github issues, most of installation errors come from machines w/o compilers
|
||||
if len(lib_path) == 0 and not os.environ.get('XGBOOST_BUILD_DOC', False):
|
||||
raise XGBoostLibraryNotFound(
|
||||
'Cannot find XGBoost Libarary in the candicate path, ' +
|
||||
'did you run build.sh in root path?\n'
|
||||
'did you install compilers and run build.sh in root path?\n'
|
||||
'List of candidates:\n' + ('\n'.join(dll_path)))
|
||||
return lib_path
|
||||
|
||||
@@ -7,11 +7,12 @@ from __future__ import absolute_import
|
||||
import re
|
||||
import numpy as np
|
||||
from .core import Booster
|
||||
from .sklearn import XGBModel
|
||||
|
||||
from io import BytesIO
|
||||
|
||||
def plot_importance(booster, ax=None, height=0.2,
|
||||
xlim=None, title='Feature importance',
|
||||
xlim=None, ylim=None, title='Feature importance',
|
||||
xlabel='F score', ylabel='Features',
|
||||
grid=True, **kwargs):
|
||||
|
||||
@@ -19,14 +20,16 @@ def plot_importance(booster, ax=None, height=0.2,
|
||||
|
||||
Parameters
|
||||
----------
|
||||
booster : Booster or dict
|
||||
Booster instance, or dict taken by Booster.get_fscore()
|
||||
booster : Booster, XGBModel or dict
|
||||
Booster or XGBModel instance, or dict taken by Booster.get_fscore()
|
||||
ax : matplotlib Axes, default None
|
||||
Target axes instance. If None, new figure and axes will be created.
|
||||
height : float, default 0.2
|
||||
Bar height, passed to ax.barh()
|
||||
xlim : tuple, default None
|
||||
Tuple passed to axes.xlim()
|
||||
ylim : tuple, default None
|
||||
Tuple passed to axes.ylim()
|
||||
title : str, default "Feature importance"
|
||||
Axes title. To disable, pass None.
|
||||
xlabel : str, default "F score"
|
||||
@@ -46,12 +49,14 @@ def plot_importance(booster, ax=None, height=0.2,
|
||||
except ImportError:
|
||||
raise ImportError('You must install matplotlib to plot importance')
|
||||
|
||||
if isinstance(booster, Booster):
|
||||
if isinstance(booster, XGBModel):
|
||||
importance = booster.booster().get_fscore()
|
||||
elif isinstance(booster, Booster):
|
||||
importance = booster.get_fscore()
|
||||
elif isinstance(booster, dict):
|
||||
importance = booster
|
||||
else:
|
||||
raise ValueError('tree must be Booster or dict instance')
|
||||
raise ValueError('tree must be Booster, XGBModel or dict instance')
|
||||
|
||||
if len(importance) == 0:
|
||||
raise ValueError('Booster.get_fscore() results in empty')
|
||||
@@ -73,12 +78,19 @@ def plot_importance(booster, ax=None, height=0.2,
|
||||
ax.set_yticklabels(labels)
|
||||
|
||||
if xlim is not None:
|
||||
if not isinstance(xlim, tuple) or len(xlim, 2):
|
||||
if not isinstance(xlim, tuple) or len(xlim) != 2:
|
||||
raise ValueError('xlim must be a tuple of 2 elements')
|
||||
else:
|
||||
xlim = (0, max(values) * 1.1)
|
||||
ax.set_xlim(xlim)
|
||||
|
||||
if ylim is not None:
|
||||
if not isinstance(ylim, tuple) or len(ylim) != 2:
|
||||
raise ValueError('ylim must be a tuple of 2 elements')
|
||||
else:
|
||||
ylim = (-1, len(importance))
|
||||
ax.set_ylim(ylim)
|
||||
|
||||
if title is not None:
|
||||
ax.set_title(title)
|
||||
if xlabel is not None:
|
||||
@@ -142,8 +154,8 @@ def to_graphviz(booster, num_trees=0, rankdir='UT',
|
||||
|
||||
Parameters
|
||||
----------
|
||||
booster : Booster
|
||||
Booster instance
|
||||
booster : Booster, XGBModel
|
||||
Booster or XGBModel instance
|
||||
num_trees : int, default 0
|
||||
Specify the ordinal number of target tree
|
||||
rankdir : str, default "UT"
|
||||
@@ -165,8 +177,11 @@ def to_graphviz(booster, num_trees=0, rankdir='UT',
|
||||
except ImportError:
|
||||
raise ImportError('You must install graphviz to plot tree')
|
||||
|
||||
if not isinstance(booster, Booster):
|
||||
raise ValueError('booster must be Booster instance')
|
||||
if not isinstance(booster, (Booster, XGBModel)):
|
||||
raise ValueError('booster must be Booster or XGBModel instance')
|
||||
|
||||
if isinstance(booster, XGBModel):
|
||||
booster = booster.booster()
|
||||
|
||||
tree = booster.get_dump()[num_trees]
|
||||
tree = tree.split()
|
||||
@@ -193,8 +208,8 @@ def plot_tree(booster, num_trees=0, rankdir='UT', ax=None, **kwargs):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
booster : Booster
|
||||
Booster instance
|
||||
booster : Booster, XGBModel
|
||||
Booster or XGBModel instance
|
||||
num_trees : int, default 0
|
||||
Specify the ordinal number of target tree
|
||||
rankdir : str, default "UT"
|
||||
@@ -216,7 +231,6 @@ def plot_tree(booster, num_trees=0, rankdir='UT', ax=None, **kwargs):
|
||||
except ImportError:
|
||||
raise ImportError('You must install matplotlib to plot tree')
|
||||
|
||||
|
||||
if ax is None:
|
||||
_, ax = plt.subplots(1, 1)
|
||||
|
||||
|
||||
@@ -54,6 +54,14 @@ class XGBModel(XGBModelBase):
|
||||
Subsample ratio of the training instance.
|
||||
colsample_bytree : float
|
||||
Subsample ratio of columns when constructing each tree.
|
||||
colsample_bylevel : float
|
||||
Subsample ratio of columns for each split, in each level.
|
||||
reg_alpha : float (xgb's alpha)
|
||||
L2 regularization term on weights
|
||||
reg_lambda : float (xgb's lambda)
|
||||
L1 regularization term on weights
|
||||
scale_pos_weight : float
|
||||
Balancing of positive and negative weights.
|
||||
|
||||
base_score:
|
||||
The initial prediction score of all instances, global bias.
|
||||
@@ -66,7 +74,8 @@ class XGBModel(XGBModelBase):
|
||||
def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100,
|
||||
silent=True, objective="reg:linear",
|
||||
nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0,
|
||||
subsample=1, colsample_bytree=1,
|
||||
subsample=1, colsample_bytree=1, colsample_bylevel=1,
|
||||
reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
|
||||
base_score=0.5, seed=0, missing=None):
|
||||
if not SKLEARN_INSTALLED:
|
||||
raise XGBoostError('sklearn needs to be installed in order to use this module')
|
||||
@@ -82,6 +91,10 @@ class XGBModel(XGBModelBase):
|
||||
self.max_delta_step = max_delta_step
|
||||
self.subsample = subsample
|
||||
self.colsample_bytree = colsample_bytree
|
||||
self.colsample_bylevel = colsample_bylevel
|
||||
self.reg_alpha = reg_alpha
|
||||
self.reg_lambda = reg_lambda
|
||||
self.scale_pos_weight = scale_pos_weight
|
||||
|
||||
self.base_score = base_score
|
||||
self.seed = seed
|
||||
@@ -190,7 +203,7 @@ class XGBModel(XGBModelBase):
|
||||
|
||||
if evals_result:
|
||||
for val in evals_result.items():
|
||||
evals_result_key = val[1].keys()[0]
|
||||
evals_result_key = list(val[1].keys())[0]
|
||||
evals_result[val[0]][evals_result_key] = val[1][evals_result_key]
|
||||
self.evals_result_ = evals_result
|
||||
|
||||
@@ -199,10 +212,12 @@ class XGBModel(XGBModelBase):
|
||||
self.best_iteration = self._Booster.best_iteration
|
||||
return self
|
||||
|
||||
def predict(self, data):
|
||||
def predict(self, data, output_margin=False, ntree_limit=0):
|
||||
# pylint: disable=missing-docstring,invalid-name
|
||||
test_dmatrix = DMatrix(data, missing=self.missing)
|
||||
return self.booster().predict(test_dmatrix)
|
||||
return self.booster().predict(test_dmatrix,
|
||||
output_margin=output_margin,
|
||||
ntree_limit=ntree_limit)
|
||||
|
||||
def evals_result(self):
|
||||
"""Return the evaluation results.
|
||||
@@ -251,14 +266,16 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
n_estimators=100, silent=True,
|
||||
objective="binary:logistic",
|
||||
nthread=-1, gamma=0, min_child_weight=1,
|
||||
max_delta_step=0, subsample=1, colsample_bytree=1,
|
||||
max_delta_step=0, subsample=1, colsample_bytree=1, colsample_bylevel=1,
|
||||
reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
|
||||
base_score=0.5, seed=0, missing=None):
|
||||
super(XGBClassifier, self).__init__(max_depth, learning_rate,
|
||||
n_estimators, silent, objective,
|
||||
nthread, gamma, min_child_weight,
|
||||
max_delta_step, subsample,
|
||||
colsample_bytree,
|
||||
base_score, seed, missing)
|
||||
colsample_bytree, colsample_bylevel,
|
||||
reg_alpha, reg_lambda,
|
||||
scale_pos_weight, base_score, seed, missing)
|
||||
|
||||
def fit(self, X, y, sample_weight=None, eval_set=None, eval_metric=None,
|
||||
early_stopping_rounds=None, verbose=True):
|
||||
@@ -341,7 +358,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
|
||||
if evals_result:
|
||||
for val in evals_result.items():
|
||||
evals_result_key = val[1].keys()[0]
|
||||
evals_result_key = list(val[1].keys())[0]
|
||||
evals_result[val[0]][evals_result_key] = val[1][evals_result_key]
|
||||
self.evals_result_ = evals_result
|
||||
|
||||
@@ -351,9 +368,11 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
|
||||
return self
|
||||
|
||||
def predict(self, data):
|
||||
def predict(self, data, output_margin=False, ntree_limit=0):
|
||||
test_dmatrix = DMatrix(data, missing=self.missing)
|
||||
class_probs = self.booster().predict(test_dmatrix)
|
||||
class_probs = self.booster().predict(test_dmatrix,
|
||||
output_margin=output_margin,
|
||||
ntree_limit=ntree_limit)
|
||||
if len(class_probs.shape) > 1:
|
||||
column_indexes = np.argmax(class_probs, axis=1)
|
||||
else:
|
||||
@@ -361,9 +380,11 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
column_indexes[class_probs > 0.5] = 1
|
||||
return self._le.inverse_transform(column_indexes)
|
||||
|
||||
def predict_proba(self, data):
|
||||
def predict_proba(self, data, output_margin=False, ntree_limit=0):
|
||||
test_dmatrix = DMatrix(data, missing=self.missing)
|
||||
class_probs = self.booster().predict(test_dmatrix)
|
||||
class_probs = self.booster().predict(test_dmatrix,
|
||||
output_margin=output_margin,
|
||||
ntree_limit=ntree_limit)
|
||||
if self.objective == "multi:softprob":
|
||||
return class_probs
|
||||
else:
|
||||
|
||||
@@ -10,7 +10,8 @@ import numpy as np
|
||||
from .core import Booster, STRING_TYPES
|
||||
|
||||
def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
||||
early_stopping_rounds=None, evals_result=None, verbose_eval=True):
|
||||
maximize=False, early_stopping_rounds=None, evals_result=None,
|
||||
verbose_eval=True, learning_rates=None, xgb_model=None):
|
||||
# pylint: disable=too-many-statements,too-many-branches, attribute-defined-outside-init
|
||||
"""Train a booster with given parameters.
|
||||
|
||||
@@ -29,6 +30,8 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
||||
Customized objective function.
|
||||
feval : function
|
||||
Customized evaluation function.
|
||||
maximize : bool
|
||||
Whether to maximize feval.
|
||||
early_stopping_rounds: int
|
||||
Activates early stopping. Validation error needs to decrease at least
|
||||
every <early_stopping_rounds> round(s) to continue training.
|
||||
@@ -46,13 +49,27 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
||||
verbose_eval : bool
|
||||
If `verbose_eval` then the evaluation metric on the validation set, if
|
||||
given, is printed at each boosting stage.
|
||||
learning_rates: list or function
|
||||
Learning rate for each boosting round (yields learning rate decay).
|
||||
- list l: eta = l[boosting round]
|
||||
- function f: eta = f(boosting round, num_boost_round)
|
||||
xgb_model : file name of stored xgb model or 'Booster' instance
|
||||
Xgb model to be loaded before training (allows training continuation).
|
||||
|
||||
Returns
|
||||
-------
|
||||
booster : a trained booster model
|
||||
"""
|
||||
evals = list(evals)
|
||||
bst = Booster(params, [dtrain] + [d[0] for d in evals])
|
||||
ntrees = 0
|
||||
if xgb_model is not None:
|
||||
if not isinstance(xgb_model, STRING_TYPES):
|
||||
xgb_model = xgb_model.save_raw()
|
||||
bst = Booster(params, [dtrain] + [d[0] for d in evals], model_file=xgb_model)
|
||||
ntrees = len(bst.get_dump())
|
||||
else:
|
||||
bst = Booster(params, [dtrain] + [d[0] for d in evals])
|
||||
|
||||
|
||||
if evals_result is not None:
|
||||
if not isinstance(evals_result, dict):
|
||||
@@ -65,6 +82,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
||||
if not early_stopping_rounds:
|
||||
for i in range(num_boost_round):
|
||||
bst.update(dtrain, i, obj)
|
||||
ntrees += 1
|
||||
if len(evals) != 0:
|
||||
bst_eval_set = bst.eval_set(evals, i, feval)
|
||||
if isinstance(bst_eval_set, STRING_TYPES):
|
||||
@@ -78,7 +96,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
||||
res = re.findall("([0-9a-zA-Z@]+[-]*):-?([0-9.]+).", msg)
|
||||
for key in evals_name:
|
||||
evals_idx = evals_name.index(key)
|
||||
res_per_eval = len(res) / len(evals_name)
|
||||
res_per_eval = len(res) // len(evals_name)
|
||||
for r in range(res_per_eval):
|
||||
res_item = res[(evals_idx*res_per_eval) + r]
|
||||
res_key = res_item[0]
|
||||
@@ -87,6 +105,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
||||
evals_result[key][res_key].append(res_val)
|
||||
else:
|
||||
evals_result[key][res_key] = [res_val]
|
||||
bst.best_iteration = (ntrees - 1)
|
||||
return bst
|
||||
|
||||
else:
|
||||
@@ -94,7 +113,8 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
||||
if len(evals) < 1:
|
||||
raise ValueError('For early stopping you need at least one set in evals.')
|
||||
|
||||
sys.stderr.write("Will train until {} error hasn't decreased in {} rounds.\n".format(\
|
||||
if verbose_eval:
|
||||
sys.stderr.write("Will train until {} error hasn't decreased in {} rounds.\n".format(\
|
||||
evals[-1][1], early_stopping_rounds))
|
||||
|
||||
# is params a list of tuples? are we using multiple eval metrics?
|
||||
@@ -110,6 +130,8 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
||||
maximize_metrics = ('auc', 'map', 'ndcg')
|
||||
if any(params['eval_metric'].startswith(x) for x in maximize_metrics):
|
||||
maximize_score = True
|
||||
if feval is not None:
|
||||
maximize_score = maximize
|
||||
|
||||
if maximize_score:
|
||||
best_score = 0.0
|
||||
@@ -117,10 +139,19 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
||||
best_score = float('inf')
|
||||
|
||||
best_msg = ''
|
||||
best_score_i = 0
|
||||
best_score_i = ntrees
|
||||
|
||||
if isinstance(learning_rates, list) and len(learning_rates) != num_boost_round:
|
||||
raise ValueError("Length of list 'learning_rates' has to equal 'num_boost_round'.")
|
||||
|
||||
for i in range(num_boost_round):
|
||||
if learning_rates is not None:
|
||||
if isinstance(learning_rates, list):
|
||||
bst.set_param({'eta': learning_rates[i]})
|
||||
else:
|
||||
bst.set_param({'eta': learning_rates(i, num_boost_round)})
|
||||
bst.update(dtrain, i, obj)
|
||||
ntrees += 1
|
||||
bst_eval_set = bst.eval_set(evals, i, feval)
|
||||
|
||||
if isinstance(bst_eval_set, STRING_TYPES):
|
||||
@@ -135,7 +166,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
||||
res = re.findall("([0-9a-zA-Z@]+[-]*):-?([0-9.]+).", msg)
|
||||
for key in evals_name:
|
||||
evals_idx = evals_name.index(key)
|
||||
res_per_eval = len(res) / len(evals_name)
|
||||
res_per_eval = len(res) // len(evals_name)
|
||||
for r in range(res_per_eval):
|
||||
res_item = res[(evals_idx*res_per_eval) + r]
|
||||
res_key = res_item[0]
|
||||
@@ -149,7 +180,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
|
||||
if (maximize_score and score > best_score) or \
|
||||
(not maximize_score and score < best_score):
|
||||
best_score = score
|
||||
best_score_i = i
|
||||
best_score_i = (ntrees - 1)
|
||||
best_msg = msg
|
||||
elif i - best_score_i >= early_stopping_rounds:
|
||||
sys.stderr.write("Stopping. Best iteration:\n{}\n\n".format(best_msg))
|
||||
|
||||
Reference in New Issue
Block a user