a fixed version

This commit is contained in:
tqchen 2014-08-24 21:17:13 -07:00
parent 6daa1c365d
commit ce97f2fdf8
6 changed files with 22 additions and 19 deletions

View File

@ -7,7 +7,7 @@ export LDFLAGS= -pthread -lm
ifeq ($(no_omp),1) ifeq ($(no_omp),1)
export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -DDISABLE_OPENMP -funroll-loops export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -DDISABLE_OPENMP -funroll-loops
else else
export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fopenmp -funroll-loops export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fopenmp -funroll-loops
endif endif
# expose these flags to R CMD SHLIB # expose these flags to R CMD SHLIB
@ -18,11 +18,11 @@ BIN = xgboost
OBJ = OBJ =
SLIB = wrapper/libxgboostwrapper.so SLIB = wrapper/libxgboostwrapper.so
RLIB = wrapper/libxgboostR.so RLIB = wrapper/libxgboostR.so
.PHONY: clean all R .PHONY: clean all R python
all: $(BIN) wrapper/libxgboostwrapper.so all: $(BIN) wrapper/libxgboostwrapper.so
R: wrapper/libxgboostR.so R: wrapper/libxgboostR.so
python: wrapper/libxgboostwrapper.so
xgboost: src/xgboost_main.cpp src/io/io.cpp src/data.h src/tree/*.h src/tree/*.hpp src/gbm/*.h src/gbm/*.hpp src/utils/*.h src/learner/*.h src/learner/*.hpp xgboost: src/xgboost_main.cpp src/io/io.cpp src/data.h src/tree/*.h src/tree/*.hpp src/gbm/*.h src/gbm/*.hpp src/utils/*.h src/learner/*.h src/learner/*.hpp
# now the wrapper takes in two files. io and wrapper part # now the wrapper takes in two files. io and wrapper part
wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp src/io/io.cpp src/*.h src/*/*.hpp src/*/*.h wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp src/io/io.cpp src/*.h src/*/*.hpp src/*/*.h

View File

@ -110,7 +110,7 @@ struct EvalAMS : public IEvaluator {
} }
virtual float Eval(const std::vector<float> &preds, virtual float Eval(const std::vector<float> &preds,
const MetaInfo &info) const { const MetaInfo &info) const {
const unsigned ndata = static_cast<unsigned>(preds.size()); const unsigned ndata = static_cast<unsigned>(info.labels.size());
utils::Check(info.weights.size() == ndata, "we need weight to evaluate ams"); utils::Check(info.weights.size() == ndata, "we need weight to evaluate ams");
std::vector< std::pair<float, unsigned> > rec(ndata); std::vector< std::pair<float, unsigned> > rec(ndata);
@ -207,10 +207,13 @@ struct EvalPrecisionRatio : public IEvaluator{
struct EvalAuc : public IEvaluator { struct EvalAuc : public IEvaluator {
virtual float Eval(const std::vector<float> &preds, virtual float Eval(const std::vector<float> &preds,
const MetaInfo &info) const { const MetaInfo &info) const {
utils::Check(preds.size() == info.labels.size(), "label size predict size not match");
std::vector<unsigned> tgptr(2, 0); tgptr[1] = preds.size(); utils::Check(info.labels.size() != 0, "label set cannot be empty");
utils::Check(preds.size() % info.labels.size() == 0,
"label size predict size not match");
std::vector<unsigned> tgptr(2, 0); tgptr[1] = info.labels.size();
const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr; const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
utils::Check(gptr.back() == preds.size(), utils::Check(gptr.back() == info.labels.size(),
"EvalAuc: group structure must match number of prediction"); "EvalAuc: group structure must match number of prediction");
const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1); const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
// sum statictis // sum statictis

View File

@ -123,7 +123,7 @@ class RegLossObj : public IObjFunction{
float p = loss.PredTransform(preds[i]); float p = loss.PredTransform(preds[i]);
float w = info.GetWeight(j); float w = info.GetWeight(j);
if (info.labels[j] == 1.0f) w *= scale_pos_weight; if (info.labels[j] == 1.0f) w *= scale_pos_weight;
gpair[j] = bst_gpair(loss.FirstOrderGradient(p, info.labels[j]) * w, gpair[i] = bst_gpair(loss.FirstOrderGradient(p, info.labels[j]) * w,
loss.SecondOrderGradient(p, info.labels[j]) * w); loss.SecondOrderGradient(p, info.labels[j]) * w);
} }
} }

View File

@ -270,6 +270,7 @@ class TreeModel {
param.num_nodes = param.num_roots; param.num_nodes = param.num_roots;
nodes.resize(param.num_nodes); nodes.resize(param.num_nodes);
stats.resize(param.num_nodes); stats.resize(param.num_nodes);
leaf_vector.resize(param.num_nodes * param.size_leaf_vector, 0.0f);
for (int i = 0; i < param.num_nodes; i ++) { for (int i = 0; i < param.num_nodes; i ++) {
nodes[i].set_leaf(0.0f); nodes[i].set_leaf(0.0f);
nodes[i].set_parent(-1); nodes[i].set_parent(-1);

View File

@ -100,9 +100,9 @@ struct TrainParam{
double w = CalcWeight(sum_grad, sum_hess); double w = CalcWeight(sum_grad, sum_hess);
double ret = test_grad * w + 0.5 * (test_hess + reg_lambda) * Sqr(w); double ret = test_grad * w + 0.5 * (test_hess + reg_lambda) * Sqr(w);
if (reg_alpha == 0.0f) { if (reg_alpha == 0.0f) {
return 2.0 * ret; return - 2.0 * ret;
} else { } else {
return 2.0 * (ret + reg_alpha * std::abs(w)); return - 2.0 * (ret + reg_alpha * std::abs(w));
} }
} }
// calculate weight given the statistics // calculate weight given the statistics
@ -206,15 +206,15 @@ struct GradStats {
}; };
/*! \brief vectorized cv statistics */ /*! \brief vectorized cv statistics */
template<int vsize> template<unsigned vsize>
struct CVGradStats : public GradStats { struct CVGradStats : public GradStats {
// additional statistics // additional statistics
GradStats train[vsize], valid[vsize]; GradStats train[vsize], valid[vsize];
// constructor // constructor
explicit CVGradStats(const TrainParam &param) explicit CVGradStats(const TrainParam &param) {
: GradStats(param) {
utils::Check(param.size_leaf_vector == vsize, utils::Check(param.size_leaf_vector == vsize,
"CVGradStats: vsize must match size_leaf_vector"); "CVGradStats: vsize must match size_leaf_vector");
this->Clear();
} }
/*! \brief check if necessary information is ready */ /*! \brief check if necessary information is ready */
inline static void CheckInfo(const BoosterInfo &info) { inline static void CheckInfo(const BoosterInfo &info) {
@ -224,7 +224,7 @@ struct CVGradStats : public GradStats {
/*! \brief clear the statistics */ /*! \brief clear the statistics */
inline void Clear(void) { inline void Clear(void) {
GradStats::Clear(); GradStats::Clear();
for (int i = 0; i < vsize; ++i) { for (unsigned i = 0; i < vsize; ++i) {
train[i].Clear(); valid[i].Clear(); train[i].Clear(); valid[i].Clear();
} }
} }
@ -233,7 +233,7 @@ struct CVGradStats : public GradStats {
bst_uint ridx) { bst_uint ridx) {
GradStats::Add(gpair[ridx].grad, gpair[ridx].hess); GradStats::Add(gpair[ridx].grad, gpair[ridx].hess);
const size_t step = info.fold_index.size(); const size_t step = info.fold_index.size();
for (int i = 0; i < vsize; ++i) { for (unsigned i = 0; i < vsize; ++i) {
const bst_gpair &b = gpair[(i + 1) * step + ridx]; const bst_gpair &b = gpair[(i + 1) * step + ridx];
if (info.fold_index[ridx] == i) { if (info.fold_index[ridx] == i) {
valid[i].Add(b.grad, b.hess); valid[i].Add(b.grad, b.hess);
@ -245,18 +245,18 @@ struct CVGradStats : public GradStats {
/*! \brief calculate gain of the solution */ /*! \brief calculate gain of the solution */
inline double CalcGain(const TrainParam &param) const { inline double CalcGain(const TrainParam &param) const {
double ret = 0.0; double ret = 0.0;
for (int i = 0; i < vsize; ++i) { for (unsigned i = 0; i < vsize; ++i) {
ret += param.CalcGain(train[i].sum_grad, ret += param.CalcGain(train[i].sum_grad,
train[i].sum_hess, train[i].sum_hess,
vsize * valid[i].sum_grad, vsize * valid[i].sum_grad,
vsize * valid[i].sum_hess); vsize * valid[i].sum_hess);
} }
return ret; return ret / vsize;
} }
/*! \brief add statistics to the data */ /*! \brief add statistics to the data */
inline void Add(const CVGradStats &b) { inline void Add(const CVGradStats &b) {
GradStats::Add(b); GradStats::Add(b);
for (int i = 0; i < vsize; ++i) { for (unsigned i = 0; i < vsize; ++i) {
train[i].Add(b.train[i]); train[i].Add(b.train[i]);
valid[i].Add(b.valid[i]); valid[i].Add(b.valid[i]);
} }

View File

@ -82,7 +82,6 @@ class ColMaker: public IUpdater<FMatrix> {
RegTree *p_tree) { RegTree *p_tree) {
this->InitData(gpair, fmat, info.root_index, *p_tree); this->InitData(gpair, fmat, info.root_index, *p_tree);
this->InitNewNode(qexpand, gpair, fmat, info, *p_tree); this->InitNewNode(qexpand, gpair, fmat, info, *p_tree);
for (int depth = 0; depth < param.max_depth; ++depth) { for (int depth = 0; depth < param.max_depth; ++depth) {
this->FindSplit(depth, this->qexpand, gpair, fmat, info, p_tree); this->FindSplit(depth, this->qexpand, gpair, fmat, info, p_tree);
this->ResetPosition(this->qexpand, fmat, *p_tree); this->ResetPosition(this->qexpand, fmat, *p_tree);