fix regression

This commit is contained in:
tqchen 2014-11-16 11:38:21 -08:00
parent 02c2278f96
commit 129fee64f3
3 changed files with 22 additions and 9 deletions

View File

@ -10,7 +10,7 @@ label = train[:,32]
data = train[:,1:31]
weight = train[:,31]
dtrain = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight )
param = {'max_depth':6, 'eta':0.1, 'silent':1, 'objective':'binary:logitraw', 'nthread':4}
param = {'max_depth':6, 'eta':0.1, 'silent':1, 'objective':'binary:logitraw', 'nthread':4, 'updater':'grow_histmaker,prune'}
num_round = 120
print ('running cross validation, with preprocessing function')

View File

@ -53,7 +53,7 @@ class HistMaker: public IUpdater {
const std::vector<bst_gpair> &gpair,
const BoosterInfo &info,
const bst_uint ridx) {
unsigned i = std::lower_bound(cut, cut + size, fv) - cut;
unsigned i = std::upper_bound(cut, cut + size, fv) - cut;
utils::Assert(i < size, "maximum value must be in cut");
data[i].Add(gpair, info, ridx);
}
@ -155,7 +155,7 @@ class HistMaker: public IUpdater {
RegTree *p_tree) {
this->InitData(gpair, *p_fmat, info.root_index, *p_tree);
this->UpdateNode2WorkIndex(*p_tree);
for (int depth = 0; depth < param.max_depth; ++depth) {
for (int depth = 0; depth < param.max_depth; ++depth) {
this->FindSplit(depth, gpair, p_fmat, info, p_tree);
this->UpdateQueueExpand(*p_tree);
this->UpdateNode2WorkIndex(*p_tree);
@ -278,6 +278,7 @@ class HistMaker: public IUpdater {
SplitEntry *best,
TStats *left_sum) {
if (hist.size == 0) return;
double root_gain = node_sum.CalcGain(param);
TStats s(param), c(param);
for (bst_uint i = 0; i < hist.size; ++i) {
@ -383,7 +384,7 @@ class QuantileHistMaker: public HistMaker<TStats> {
sketchs.resize(this->qexpand.size() * tree.param.num_feature);
for (size_t i = 0; i < sketchs.size(); ++i) {
sketchs[i].Init(info.num_row, this->param.sketch_eps);
}
}
// start accumulating statistics
utils::IIterator<RowBatch> *iter = p_fmat->RowIterator();
iter->BeforeFirst();
@ -453,14 +454,21 @@ class QuantileHistMaker: public HistMaker<TStats> {
this->wspace.rptr.clear();
this->wspace.rptr.push_back(0);
for (size_t wid = 0; wid < this->qexpand.size(); ++wid) {
for (size_t fid = 0; fid < tree.param.num_feature; ++fid) {
for (int fid = 0; fid < tree.param.num_feature; ++fid) {
const WXQSketch::Summary a = summary_array[wid * tree.param.num_feature + fid];
for (size_t i = 0; i < a.size; ++i) {
bst_float cpt = a.data[i].value + rt_eps;
if (i == 0 || cpt > this->wspace.cut.back()) {
for (size_t i = 1; i < a.size; ++i) {
bst_float cpt = a.data[i].value - rt_eps;
if (i == 1 || cpt > this->wspace.cut.back()) {
this->wspace.cut.push_back(cpt);
}
}
// push a value that is greater than anything
if (a.size != 0) {
bst_float cpt = a.data[a.size - 1].value;
// this must be bigger than last value in a scale
bst_float last = cpt + fabs(cpt);
this->wspace.cut.push_back(last);
}
this->wspace.rptr.push_back(this->wspace.cut.size());
}
// reserve last value for global statistics

View File

@ -10,6 +10,7 @@
#include <cstring>
#include <algorithm>
#include <iostream>
#include "./io.h"
#include "./utils.h"
namespace xgboost {
@ -481,7 +482,11 @@ class QuantileSketchTemplate {
/*! \brief same as summary, but use STL to backup the space */
struct SummaryContainer : public Summary {
std::vector<Entry> space;
SummaryContainer(void) : Summary(NULL, 0) {
explicit SummaryContainer(void) : Summary(NULL, 0) {
}
explicit SummaryContainer(const SummaryContainer &src) : Summary(NULL, src.size) {
this->space = src.space;
this->data = BeginPtr(this->space);
}
/*! \brief reserve space for summary */
inline void Reserve(size_t size) {