diff --git a/Makefile b/Makefile index 40d86fd37..0715ec379 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ export LDFLAGS= -pthread -lm ifeq ($(no_omp),1) export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -DDISABLE_OPENMP -funroll-loops else - export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fopenmp -funroll-loops + export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fopenmp -funroll-loops endif # expose these flags to R CMD SHLIB @@ -18,11 +18,11 @@ BIN = xgboost OBJ = SLIB = wrapper/libxgboostwrapper.so RLIB = wrapper/libxgboostR.so -.PHONY: clean all R +.PHONY: clean all R python all: $(BIN) wrapper/libxgboostwrapper.so R: wrapper/libxgboostR.so - +python: wrapper/libxgboostwrapper.so xgboost: src/xgboost_main.cpp src/io/io.cpp src/data.h src/tree/*.h src/tree/*.hpp src/gbm/*.h src/gbm/*.hpp src/utils/*.h src/learner/*.h src/learner/*.hpp # now the wrapper takes in two files. io and wrapper part wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp src/io/io.cpp src/*.h src/*/*.hpp src/*/*.h diff --git a/src/learner/evaluation-inl.hpp b/src/learner/evaluation-inl.hpp index 4df9cec4c..2d316162a 100644 --- a/src/learner/evaluation-inl.hpp +++ b/src/learner/evaluation-inl.hpp @@ -110,7 +110,7 @@ struct EvalAMS : public IEvaluator { } virtual float Eval(const std::vector &preds, const MetaInfo &info) const { - const unsigned ndata = static_cast(preds.size()); + const unsigned ndata = static_cast(info.labels.size()); utils::Check(info.weights.size() == ndata, "we need weight to evaluate ams"); std::vector< std::pair > rec(ndata); @@ -207,10 +207,13 @@ struct EvalPrecisionRatio : public IEvaluator{ struct EvalAuc : public IEvaluator { virtual float Eval(const std::vector &preds, const MetaInfo &info) const { - utils::Check(preds.size() == info.labels.size(), "label size predict size not match"); - std::vector tgptr(2, 0); tgptr[1] = preds.size(); + + utils::Check(info.labels.size() != 0, "label set cannot be empty"); + utils::Check(preds.size() % info.labels.size() == 0, + "label size predict size not match"); + std::vector tgptr(2, 0); tgptr[1] = info.labels.size(); const std::vector &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr; - utils::Check(gptr.back() == preds.size(), + utils::Check(gptr.back() == info.labels.size(), "EvalAuc: group structure must match number of prediction"); const unsigned ngroup = static_cast(gptr.size() - 1); // sum statictis diff --git a/src/learner/objective-inl.hpp b/src/learner/objective-inl.hpp index 41af8b605..7122f6398 100644 --- a/src/learner/objective-inl.hpp +++ b/src/learner/objective-inl.hpp @@ -123,7 +123,7 @@ class RegLossObj : public IObjFunction{ float p = loss.PredTransform(preds[i]); float w = info.GetWeight(j); if (info.labels[j] == 1.0f) w *= scale_pos_weight; - gpair[j] = bst_gpair(loss.FirstOrderGradient(p, info.labels[j]) * w, + gpair[i] = bst_gpair(loss.FirstOrderGradient(p, info.labels[j]) * w, loss.SecondOrderGradient(p, info.labels[j]) * w); } } diff --git a/src/tree/model.h b/src/tree/model.h index af99a5145..f91e453f8 100644 --- a/src/tree/model.h +++ b/src/tree/model.h @@ -270,6 +270,7 @@ class TreeModel { param.num_nodes = param.num_roots; nodes.resize(param.num_nodes); stats.resize(param.num_nodes); + leaf_vector.resize(param.num_nodes * param.size_leaf_vector, 0.0f); for (int i = 0; i < param.num_nodes; i ++) { nodes[i].set_leaf(0.0f); nodes[i].set_parent(-1); diff --git a/src/tree/param.h b/src/tree/param.h index 6639c39bd..52c273749 100644 --- a/src/tree/param.h +++ b/src/tree/param.h @@ -100,9 +100,9 @@ struct TrainParam{ double w = CalcWeight(sum_grad, sum_hess); double ret = test_grad * w + 0.5 * (test_hess + reg_lambda) * Sqr(w); if (reg_alpha == 0.0f) { - return 2.0 * ret; + return - 2.0 * ret; } else { - return 2.0 * (ret + reg_alpha * std::abs(w)); + return - 2.0 * (ret + reg_alpha * std::abs(w)); } } // calculate weight given the statistics @@ -206,15 +206,15 @@ struct GradStats { }; /*! \brief vectorized cv statistics */ -template +template struct CVGradStats : public GradStats { // additional statistics GradStats train[vsize], valid[vsize]; // constructor - explicit CVGradStats(const TrainParam ¶m) - : GradStats(param) { + explicit CVGradStats(const TrainParam ¶m) { utils::Check(param.size_leaf_vector == vsize, "CVGradStats: vsize must match size_leaf_vector"); + this->Clear(); } /*! \brief check if necessary information is ready */ inline static void CheckInfo(const BoosterInfo &info) { @@ -224,7 +224,7 @@ struct CVGradStats : public GradStats { /*! \brief clear the statistics */ inline void Clear(void) { GradStats::Clear(); - for (int i = 0; i < vsize; ++i) { + for (unsigned i = 0; i < vsize; ++i) { train[i].Clear(); valid[i].Clear(); } } @@ -233,7 +233,7 @@ struct CVGradStats : public GradStats { bst_uint ridx) { GradStats::Add(gpair[ridx].grad, gpair[ridx].hess); const size_t step = info.fold_index.size(); - for (int i = 0; i < vsize; ++i) { + for (unsigned i = 0; i < vsize; ++i) { const bst_gpair &b = gpair[(i + 1) * step + ridx]; if (info.fold_index[ridx] == i) { valid[i].Add(b.grad, b.hess); @@ -245,18 +245,18 @@ struct CVGradStats : public GradStats { /*! \brief calculate gain of the solution */ inline double CalcGain(const TrainParam ¶m) const { double ret = 0.0; - for (int i = 0; i < vsize; ++i) { + for (unsigned i = 0; i < vsize; ++i) { ret += param.CalcGain(train[i].sum_grad, train[i].sum_hess, vsize * valid[i].sum_grad, vsize * valid[i].sum_hess); } - return ret; + return ret / vsize; } /*! \brief add statistics to the data */ inline void Add(const CVGradStats &b) { GradStats::Add(b); - for (int i = 0; i < vsize; ++i) { + for (unsigned i = 0; i < vsize; ++i) { train[i].Add(b.train[i]); valid[i].Add(b.valid[i]); } diff --git a/src/tree/updater_colmaker-inl.hpp b/src/tree/updater_colmaker-inl.hpp index 33c688abc..e1e90c5c5 100644 --- a/src/tree/updater_colmaker-inl.hpp +++ b/src/tree/updater_colmaker-inl.hpp @@ -82,7 +82,6 @@ class ColMaker: public IUpdater { RegTree *p_tree) { this->InitData(gpair, fmat, info.root_index, *p_tree); this->InitNewNode(qexpand, gpair, fmat, info, *p_tree); - for (int depth = 0; depth < param.max_depth; ++depth) { this->FindSplit(depth, this->qexpand, gpair, fmat, info, p_tree); this->ResetPosition(this->qexpand, fmat, *p_tree);