From 129fee64f32150547d331430bb4f208018ab468d Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 16 Nov 2014 11:38:21 -0800 Subject: [PATCH] fix regression --- demo/kaggle-higgs/higgs-cv.py | 2 +- src/tree/updater_histmaker-inl.hpp | 22 +++++++++++++++------- src/utils/quantile.h | 7 ++++++- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/demo/kaggle-higgs/higgs-cv.py b/demo/kaggle-higgs/higgs-cv.py index 3e36fa66b..1d660aa8f 100755 --- a/demo/kaggle-higgs/higgs-cv.py +++ b/demo/kaggle-higgs/higgs-cv.py @@ -10,7 +10,7 @@ label = train[:,32] data = train[:,1:31] weight = train[:,31] dtrain = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight ) -param = {'max_depth':6, 'eta':0.1, 'silent':1, 'objective':'binary:logitraw', 'nthread':4} +param = {'max_depth':6, 'eta':0.1, 'silent':1, 'objective':'binary:logitraw', 'nthread':4, 'updater':'grow_histmaker,prune'} num_round = 120 print ('running cross validation, with preprocessing function') diff --git a/src/tree/updater_histmaker-inl.hpp b/src/tree/updater_histmaker-inl.hpp index 68ceca371..8cade6313 100644 --- a/src/tree/updater_histmaker-inl.hpp +++ b/src/tree/updater_histmaker-inl.hpp @@ -53,7 +53,7 @@ class HistMaker: public IUpdater { const std::vector &gpair, const BoosterInfo &info, const bst_uint ridx) { - unsigned i = std::lower_bound(cut, cut + size, fv) - cut; + unsigned i = std::upper_bound(cut, cut + size, fv) - cut; utils::Assert(i < size, "maximum value must be in cut"); data[i].Add(gpair, info, ridx); } @@ -155,7 +155,7 @@ class HistMaker: public IUpdater { RegTree *p_tree) { this->InitData(gpair, *p_fmat, info.root_index, *p_tree); this->UpdateNode2WorkIndex(*p_tree); - for (int depth = 0; depth < param.max_depth; ++depth) { + for (int depth = 0; depth < param.max_depth; ++depth) { this->FindSplit(depth, gpair, p_fmat, info, p_tree); this->UpdateQueueExpand(*p_tree); this->UpdateNode2WorkIndex(*p_tree); @@ -278,6 +278,7 @@ class HistMaker: public IUpdater { SplitEntry *best, TStats *left_sum) { if (hist.size == 0) return; + double root_gain = node_sum.CalcGain(param); TStats s(param), c(param); for (bst_uint i = 0; i < hist.size; ++i) { @@ -383,7 +384,7 @@ class QuantileHistMaker: public HistMaker { sketchs.resize(this->qexpand.size() * tree.param.num_feature); for (size_t i = 0; i < sketchs.size(); ++i) { sketchs[i].Init(info.num_row, this->param.sketch_eps); - } + } // start accumulating statistics utils::IIterator *iter = p_fmat->RowIterator(); iter->BeforeFirst(); @@ -453,14 +454,21 @@ class QuantileHistMaker: public HistMaker { this->wspace.rptr.clear(); this->wspace.rptr.push_back(0); for (size_t wid = 0; wid < this->qexpand.size(); ++wid) { - for (size_t fid = 0; fid < tree.param.num_feature; ++fid) { + for (int fid = 0; fid < tree.param.num_feature; ++fid) { const WXQSketch::Summary a = summary_array[wid * tree.param.num_feature + fid]; - for (size_t i = 0; i < a.size; ++i) { - bst_float cpt = a.data[i].value + rt_eps; - if (i == 0 || cpt > this->wspace.cut.back()) { + for (size_t i = 1; i < a.size; ++i) { + bst_float cpt = a.data[i].value - rt_eps; + if (i == 1 || cpt > this->wspace.cut.back()) { this->wspace.cut.push_back(cpt); } } + // push a value that is greater than anything + if (a.size != 0) { + bst_float cpt = a.data[a.size - 1].value; + // this must be bigger than last value in a scale + bst_float last = cpt + fabs(cpt); + this->wspace.cut.push_back(last); + } this->wspace.rptr.push_back(this->wspace.cut.size()); } // reserve last value for global statistics diff --git a/src/utils/quantile.h b/src/utils/quantile.h index a3b8c18dd..53117f28b 100644 --- a/src/utils/quantile.h +++ b/src/utils/quantile.h @@ -10,6 +10,7 @@ #include #include #include +#include "./io.h" #include "./utils.h" namespace xgboost { @@ -481,7 +482,11 @@ class QuantileSketchTemplate { /*! \brief same as summary, but use STL to backup the space */ struct SummaryContainer : public Summary { std::vector space; - SummaryContainer(void) : Summary(NULL, 0) { + explicit SummaryContainer(void) : Summary(NULL, 0) { + } + explicit SummaryContainer(const SummaryContainer &src) : Summary(NULL, src.size) { + this->space = src.space; + this->data = BeginPtr(this->space); } /*! \brief reserve space for summary */ inline void Reserve(size_t size) {