Merge pull request #712 from Far0n/py_cv

python cv bugfixing (eval metrics)
This commit is contained in:
Yuan (Terry) Tang 2015-12-29 07:30:26 -06:00
commit d747649892
2 changed files with 84 additions and 23 deletions

View File

@ -361,7 +361,7 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, metrics=(),
Number of boosting iterations. Number of boosting iterations.
nfold : int nfold : int
Number of folds in CV. Number of folds in CV.
metrics : list of strings metrics : string or list of strings
Evaluation metrics to be watched in CV. Evaluation metrics to be watched in CV.
obj : function obj : function
Custom objective function. Custom objective function.
@ -394,6 +394,25 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, metrics=(),
------- -------
evaluation history : list(string) evaluation history : list(string)
""" """
if isinstance(metrics, str):
metrics = [metrics]
if isinstance(params, list):
_metrics = [x[1] for x in params if x[0] == 'eval_metric']
params = dict(params)
if 'eval_metric' in params:
params['eval_metric'] = _metrics
else:
params= dict((k, v) for k, v in params.items())
if len(metrics) == 0 and 'eval_metric' in params:
if isinstance(params['eval_metric'], list):
metrics = params['eval_metric']
else:
metrics = [params['eval_metric']]
params.pop("eval_metric", None)
if early_stopping_rounds is not None: if early_stopping_rounds is not None:
if len(metrics) > 1: if len(metrics) > 1:
raise ValueError('Check your params. '\ raise ValueError('Check your params. '\
@ -434,7 +453,7 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, metrics=(),
best_score_i = i best_score_i = i
elif i - best_score_i >= early_stopping_rounds: elif i - best_score_i >= early_stopping_rounds:
results = results[:best_score_i+1] results = results[:best_score_i+1]
sys.stderr.write("Stopping. Best iteration: {} (mean: {}, std: {})\n". sys.stderr.write("Stopping. Best iteration:\n[{}] cv-mean:{}\tcv-std:{}\n".
format(best_score_i, results[-1][0], results[-1][1])) format(best_score_i, results[-1][0], results[-1][1]))
break break
if as_pandas: if as_pandas:

View File

@ -4,13 +4,14 @@ import xgboost as xgb
import unittest import unittest
import matplotlib import matplotlib
matplotlib.use('Agg') matplotlib.use('Agg')
dpath = 'demo/data/' dpath = 'demo/data/'
rng = np.random.RandomState(1994) rng = np.random.RandomState(1994)
class TestBasic(unittest.TestCase):
class TestBasic(unittest.TestCase):
def test_basic(self): def test_basic(self):
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test') dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
@ -62,6 +63,7 @@ class TestBasic(unittest.TestCase):
def incorrect_type_set(): def incorrect_type_set():
dm.feature_types = list('abcde') dm.feature_types = list('abcde')
self.assertRaises(ValueError, incorrect_type_set) self.assertRaises(ValueError, incorrect_type_set)
# reset # reset
@ -180,7 +182,6 @@ class TestBasic(unittest.TestCase):
assert dm.num_row() == 3 assert dm.num_row() == 3
assert dm.num_col() == 2 assert dm.num_col() == 2
def test_load_file_invalid(self): def test_load_file_invalid(self):
self.assertRaises(ValueError, xgb.Booster, self.assertRaises(ValueError, xgb.Booster,
@ -241,6 +242,47 @@ class TestBasic(unittest.TestCase):
assert isinstance(cv, np.ndarray) assert isinstance(cv, np.ndarray)
assert cv.shape == (10, 4) assert cv.shape == (10, 4)
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', 'eval_metric': 'auc'}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True)
assert 'eval_metric' in params
assert 'auc' in cv.columns[0]
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', 'eval_metric': ['auc']}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True)
assert 'eval_metric' in params
assert 'auc' in cv.columns[0]
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', 'eval_metric': ['auc']}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, early_stopping_rounds=1)
assert 'eval_metric' in params
assert 'auc' in cv.columns[0]
assert cv.shape[0] < 10
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics='auc')
assert 'auc' in cv.columns[0]
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=['auc'])
assert 'auc' in cv.columns[0]
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', 'eval_metric': ['auc']}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics='error')
assert 'eval_metric' in params
assert 'auc' not in cv.columns[0]
assert 'error' in cv.columns[0]
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=['error'])
assert 'eval_metric' in params
assert 'auc' not in cv.columns[0]
assert 'error' in cv.columns[0]
params = list(params.items())
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True, metrics=['error'])
assert isinstance(params, list)
assert 'auc' not in cv.columns[0]
assert 'error' in cv.columns[0]
def test_plotting(self): def test_plotting(self):
bst2 = xgb.Booster(model_file='xgb.model') bst2 = xgb.Booster(model_file='xgb.model')
# plotting # plotting