Merge branch 'master' of ssh://github.com/tqchen/xgboost

This commit is contained in:
tqchen 2014-09-03 13:14:00 -07:00
commit 41ea0bf97a

View File

@ -438,13 +438,13 @@ def mknfold(dall, nfold, param, seed, evals=[], fpreproc = None):
mk nfold list of cvpack from randidx mk nfold list of cvpack from randidx
""" """
np.random.seed(seed) np.random.seed(seed)
randidx = np.random.permutation(dall.num_rows()) randidx = np.random.permutation(dall.num_row())
kstep = len(randidx) / nfold kstep = len(randidx) / nfold
idset = [randidx[ (i*kstep) : min(len(randidx),(i+1)*kstep) ] for i in range(nfold)] idset = [randidx[ (i*kstep) : min(len(randidx),(i+1)*kstep) ] for i in range(nfold)]
ret = [] ret = []
for k in range(nfold): for k in range(nfold):
dtrain = dall.slice(np.concatenate([idset[i] for i in range(nfold) if k != i])) dtrain = dall.slice(np.concatenate([idset[i] for i in range(nfold) if k != i]))
dtest = all.slice(idxset[k]) dtest = dall.slice(idset[k])
# run preprocessing on the data set if needed # run preprocessing on the data set if needed
if fpreproc is not None: if fpreproc is not None:
dtrain, dtest, tparam = fpreproc(dtrain, dtest, param.copy()) dtrain, dtest, tparam = fpreproc(dtrain, dtest, param.copy())
@ -490,7 +490,7 @@ def cv(params, dtrain, num_boost_round = 10, nfold=3, eval_metric = [], \
fpreproc: preprocessing function that takes dtrain, dtest, fpreproc: preprocessing function that takes dtrain, dtest,
param and return transformed version of dtrain, dtest, param param and return transformed version of dtrain, dtest, param
""" """
cvfolds = mknfold(dtrain, nfold, params, 0, eval_metrics, fpreproc) cvfolds = mknfold(dtrain, nfold, params, 0, eval_metric, fpreproc)
for i in range(num_boost_round): for i in range(num_boost_round):
for f in cvfolds: for f in cvfolds:
f.update(i, obj) f.update(i, obj)