From e90b25a38102cd36e30e0f02d2ff0a1ed26422e4 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 8 Sep 2014 16:20:41 -0700 Subject: [PATCH 1/2] add object bound checking --- src/learner/objective-inl.hpp | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/learner/objective-inl.hpp b/src/learner/objective-inl.hpp index 96aacf12d..69b7ae4fd 100644 --- a/src/learner/objective-inl.hpp +++ b/src/learner/objective-inl.hpp @@ -41,6 +41,25 @@ struct LossType { default: utils::Error("unknown loss_type"); return 0.0f; } } + /*! + * \brief check if label range is valid + */ + inline bool CheckLabel(float x) const { + if (loss_type != kLinearSquare) { + return x >= 0.0f && x <= 1.0f; + } + return true; + } + /*! + * \brief error message displayed when check label fail + */ + inline const char * CheckLabelErrorMsg(void) const { + if (loss_type != kLinearSquare) { + return "label must be in [0,1] for logistic regression"; + } else { + return ""; + } + } /*! * \brief calculate first order gradient of loss, given transformed prediction * \param predt transformed prediction @@ -115,6 +134,8 @@ class RegLossObj : public IObjFunction{ "labels are not correctly provided"); std::vector &gpair = *out_gpair; gpair.resize(preds.size()); + // check if label in range + bool label_correct = true; // start calculating gradient const unsigned nstep = static_cast(info.labels.size()); const bst_omp_uint ndata = static_cast(preds.size()); @@ -124,9 +145,11 @@ class RegLossObj : public IObjFunction{ float p = loss.PredTransform(preds[i]); float w = info.GetWeight(j); if (info.labels[j] == 1.0f) w *= scale_pos_weight; + if (!loss.CheckLabel(info.labels[j])) label_correct = false; gpair[i] = bst_gpair(loss.FirstOrderGradient(p, info.labels[j]) * w, loss.SecondOrderGradient(p, info.labels[j]) * w); } + utils::Check(label_correct, loss.CheckLabelErrorMsg()); } virtual const char* DefaultEvalMetric(void) const { return loss.DefaultEvalMetric(); From a3806398b9471384e89f9c7b58081aa458ef3e68 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 8 Sep 2014 21:34:42 -0700 Subject: [PATCH 2/2] delete old cvpack --- wrapper/xgboost.py | 65 ---------------------------------------------- 1 file changed, 65 deletions(-) diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py index 04dcfa781..134926c8a 100644 --- a/wrapper/xgboost.py +++ b/wrapper/xgboost.py @@ -227,71 +227,6 @@ class DMatrix: self.handle, (ctypes.c_int*len(rindex))(*rindex), len(rindex))) return res -class CVPack: - def __init__(self, dtrain, dtest, param): - self.dtrain = dtrain - self.dtest = dtest - self.watchlist = watchlist = [ (dtrain,'train'), (dtest, 'test') ] - self.bst = Booster(param, [dtrain,dtest]) - def update(self,r): - self.bst.update(self.dtrain, r) - def eval(self,r): - return self.bst.eval_set(self.watchlist, r) - -def mknfold(dall, nfold, param, seed, weightscale=None): - """ - mk nfold list of cvpack from randidx - """ - randidx = range(dall.num_row()) - random.seed(seed) - random.shuffle(randidx) - - idxset = [] - kstep = len(randidx) / nfold - for i in range(nfold): - idxset.append(randidx[ (i*kstep) : min(len(randidx),(i+1)*kstep) ]) - - ret = [] - for k in range(nfold): - trainlst = [] - for j in range(nfold): - if j == k: - testlst = idxset[j] - else: - trainlst += idxset[j] - dtrain = dall.slice(trainlst) - dtest = dall.slice(testlst) - # rescale weight of dtrain and dtest - if weightscale != None: - dtrain.set_weight( dtrain.get_weight() * weightscale * dall.num_row() / dtrain.num_row() ) - dtest.set_weight( dtest.get_weight() * weightscale * dall.num_row() / dtest.num_row() ) - - ret.append(CVPack(dtrain, dtest, param)) - return ret - -def aggcv(rlist): - """ - aggregate cross validation results - """ - cvmap = {} - arr = rlist[0].split() - ret = arr[0] - for it in arr[1:]: - k, v = it.split(':') - cvmap[k] = [float(v)] - for line in rlist[1:]: - arr = line.split() - assert ret == arr[0] - for it in arr[1:]: - k, v = it.split(':') - cvmap[k].append(float(v)) - - for k, v in sorted(cvmap.items(), key = lambda x:x[0]): - v = np.array(v) - ret += '\t%s:%f+%f' % (k, np.mean(v), np.std(v)) - return ret - - class Booster: """learner class """ def __init__(self, params={}, cache=[], model_file = None):