make some changes to cv
This commit is contained in:
parent
2182ebcba1
commit
998ca3bdc9
@ -433,31 +433,22 @@ class CVPack:
|
|||||||
def eval(self, r, feval):
|
def eval(self, r, feval):
|
||||||
return self.bst.eval_set(self.watchlist, r, feval)
|
return self.bst.eval_set(self.watchlist, r, feval)
|
||||||
|
|
||||||
def mknfold(dall, nfold, param, seed, evals=[]):
|
def mknfold(dall, nfold, param, seed, evals=[], fpreproc = None):
|
||||||
"""
|
"""
|
||||||
mk nfold list of cvpack from randidx
|
mk nfold list of cvpack from randidx
|
||||||
"""
|
"""
|
||||||
randidx = range(dall.num_row())
|
np.random.seed(seed)
|
||||||
random.seed(seed)
|
randidx = np.random.permutation(dall.num_rows())
|
||||||
random.shuffle(randidx)
|
|
||||||
|
|
||||||
idxset = []
|
|
||||||
kstep = len(randidx) / nfold
|
kstep = len(randidx) / nfold
|
||||||
for i in range(nfold):
|
idset = [randidx[ (i*kstep) : min(len(randidx),(i+1)*kstep) ] for i in range(nfold)]
|
||||||
idxset.append(randidx[ (i*kstep) : min(len(randidx),(i+1)*kstep) ])
|
|
||||||
|
|
||||||
ret = []
|
ret = []
|
||||||
for k in range(nfold):
|
for k in range(nfold):
|
||||||
trainlst = []
|
dtrain = dall.slice(np.concatenate([idset[i] for i in range(nfold) if k != i]))
|
||||||
for j in range(nfold):
|
dtest = all.slice(idxset[k])
|
||||||
if j == k:
|
# run preprocessing on the data set if needed
|
||||||
testlst = idxset[j]
|
if fpreproc is not None:
|
||||||
else:
|
dtrain, dtest, tparam = fpreproc(dtrain, dtest, param.copy())
|
||||||
trainlst += idxset[j]
|
plst = tparam.items() + [('eval_metric', itm) for itm in evals]
|
||||||
dtrain = dall.slice(trainlst)
|
|
||||||
dtest = dall.slice(testlst)
|
|
||||||
# rescale weight of dtrain and dtest
|
|
||||||
plst = param.items() + [('eval_metric', itm) for itm in evals]
|
|
||||||
ret.append(CVPack(dtrain, dtest, plst))
|
ret.append(CVPack(dtrain, dtest, plst))
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
@ -466,25 +457,22 @@ def aggcv(rlist):
|
|||||||
aggregate cross validation results
|
aggregate cross validation results
|
||||||
"""
|
"""
|
||||||
cvmap = {}
|
cvmap = {}
|
||||||
arr = rlist[0].split()
|
ret = rlist[0].split()[0]
|
||||||
ret = arr[0]
|
for line in rlist:
|
||||||
for it in arr[1:]:
|
|
||||||
k, v = it.split(':')
|
|
||||||
cvmap[k] = [float(v)]
|
|
||||||
for line in rlist[1:]:
|
|
||||||
arr = line.split()
|
arr = line.split()
|
||||||
assert ret == arr[0]
|
assert ret == arr[0]
|
||||||
for it in arr[1:]:
|
for it in arr[1:]:
|
||||||
k, v = it.split(':')
|
k, v = it.split(':')
|
||||||
|
if k not in cvmap:
|
||||||
|
cvmap[k] = []
|
||||||
cvmap[k].append(float(v))
|
cvmap[k].append(float(v))
|
||||||
|
|
||||||
for k, v in sorted(cvmap.items(), key = lambda x:x[0]):
|
for k, v in sorted(cvmap.items(), key = lambda x:x[0]):
|
||||||
v = np.array(v)
|
v = np.array(v)
|
||||||
ret += '\t%s:%f+%f' % (k, np.mean(v), np.std(v))
|
ret += '\t%s:%f+%f' % (k, np.mean(v), np.std(v))
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def cv(params, dtrain, num_boost_round = 10, nfold=3, eval_metrics = [], \
|
def cv(params, dtrain, num_boost_round = 10, nfold=3, eval_metric = [], \
|
||||||
obj=None, feval=None):
|
obj = None, feval = None, fpreproc = None):
|
||||||
""" cross validation with given paramaters
|
""" cross validation with given paramaters
|
||||||
Args:
|
Args:
|
||||||
params: dict
|
params: dict
|
||||||
@ -495,12 +483,14 @@ def cv(params, dtrain, num_boost_round = 10, nfold=3, eval_metrics = [], \
|
|||||||
num of round to be boosted
|
num of round to be boosted
|
||||||
nfold: int
|
nfold: int
|
||||||
folds to do cv
|
folds to do cv
|
||||||
evals: list
|
evals: list or
|
||||||
list of items to be evaluated
|
list of items to be evaluated
|
||||||
obj:
|
obj:
|
||||||
feval:
|
feval:
|
||||||
|
fpreproc: preprocessing function that takes dtrain, dtest,
|
||||||
|
param and return transformed version of dtrain, dtest, param
|
||||||
"""
|
"""
|
||||||
cvfolds = mknfold(dtrain, nfold, params, 0, eval_metrics)
|
cvfolds = mknfold(dtrain, nfold, params, 0, eval_metrics, fpreproc)
|
||||||
for i in range(num_boost_round):
|
for i in range(num_boost_round):
|
||||||
for f in cvfolds:
|
for f in cvfolds:
|
||||||
f.update(i, obj)
|
f.update(i, obj)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user