From f6c763a2a7a9fcae6798246004f345bb808e475f Mon Sep 17 00:00:00 2001 From: "tqchen@graphlab.com" Date: Mon, 18 Aug 2014 10:53:15 -0700 Subject: [PATCH] fix base score, and print message --- Makefile | 2 +- src/gbm/gbtree-inl.hpp | 6 +++++- src/learner/learner-inl.hpp | 9 ++++++++- src/learner/objective-inl.hpp | 9 ++++++--- src/learner/objective.h | 4 ++-- src/tree/updater_prune-inl.hpp | 21 +++++++++++++++------ src/utils/omp.h | 2 ++ 7 files changed, 39 insertions(+), 14 deletions(-) diff --git a/Makefile b/Makefile index 6fae8eb0a..ca58f0eb3 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ export CXX = g++ export LDFLAGS= -pthread -lm ifeq ($(no_omp),1) - export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas + export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -DDISABLE_OPENMP else export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fopenmp endif diff --git a/src/gbm/gbtree-inl.hpp b/src/gbm/gbtree-inl.hpp index 216240b74..b0bd0f99a 100644 --- a/src/gbm/gbtree-inl.hpp +++ b/src/gbm/gbtree-inl.hpp @@ -233,7 +233,7 @@ class GBTree : public IGradBooster { pred_counter[bid] = static_cast(trees.size()); pred_buffer[bid] = psum; } - return psum; + return psum + mparam.base_score; } // initialize thread local space for prediction inline void InitThreadTemp(int nthread) { @@ -296,6 +296,8 @@ class GBTree : public IGradBooster { }; /*! \brief model parameters */ struct ModelParam { + /*! \brief base prediction score of everything */ + float base_score; /*! \brief number of trees */ int num_trees; /*! \brief number of root: default 0, means single tree */ @@ -314,6 +316,7 @@ class GBTree : public IGradBooster { int reserved[32]; /*! \brief constructor */ ModelParam(void) { + base_score = 0.0f; num_trees = 0; num_roots = num_feature = 0; num_pbuffer = 0; @@ -326,6 +329,7 @@ class GBTree : public IGradBooster { * \param val value of the parameter */ inline void SetParam(const char *name, const char *val) { + if (!strcmp("base_score", name)) base_score = static_cast(atof(val)); if (!strcmp("num_pbuffer", name)) num_pbuffer = atol(val); if (!strcmp("num_output_group", name)) num_output_group = atol(val); if (!strcmp("bst:num_roots", name)) num_roots = atoi(val); diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp index fe6f1aa43..09167d8bf 100644 --- a/src/learner/learner-inl.hpp +++ b/src/learner/learner-inl.hpp @@ -85,15 +85,22 @@ class BoostLearner { if (!strcmp(name, "booster")) name_gbm_ = val; mparam.SetParam(name, val); } + if (gbm_ != NULL) gbm_->SetParam(name, val); + if (obj_ != NULL) obj_->SetParam(name, val); cfg_.push_back(std::make_pair(std::string(name), std::string(val))); } /*! * \brief initialize the model */ inline void InitModel(void) { + // initialize model this->InitObjGBM(); - // adapt the base score + // reset the base score mparam.base_score = obj_->ProbToMargin(mparam.base_score); + char tmp[32]; + snprintf(tmp, sizeof(tmp), "%g", mparam.base_score); + this->SetParam("base_score", tmp); + // initialize GBM model gbm_->InitModel(); } /*! diff --git a/src/learner/objective-inl.hpp b/src/learner/objective-inl.hpp index 163d0d283..e45250950 100644 --- a/src/learner/objective-inl.hpp +++ b/src/learner/objective-inl.hpp @@ -124,7 +124,7 @@ class RegLossObj : public IObjFunction{ loss.SecondOrderGradient(p, info.labels[j]) * w); } } - virtual const char* DefaultEvalMetric(void) { + virtual const char* DefaultEvalMetric(void) const { return loss.DefaultEvalMetric(); } virtual void PredTransform(std::vector *io_preds) { @@ -135,6 +135,9 @@ class RegLossObj : public IObjFunction{ preds[j] = loss.PredTransform(preds[j]); } } + virtual float ProbToMargin(float base_score) const { + return loss.ProbToMargin(base_score); + } protected: float scale_pos_weight; @@ -192,7 +195,7 @@ class SoftmaxMultiClassObj : public IObjFunction { virtual void EvalTransform(std::vector *io_preds) { this->Transform(io_preds, 0); } - virtual const char* DefaultEvalMetric(void) { + virtual const char* DefaultEvalMetric(void) const { return "merror"; } @@ -320,7 +323,7 @@ class LambdaRankObj : public IObjFunction { } } } - virtual const char* DefaultEvalMetric(void) { + virtual const char* DefaultEvalMetric(void) const { return "map"; } diff --git a/src/learner/objective.h b/src/learner/objective.h index ff870c034..513219093 100644 --- a/src/learner/objective.h +++ b/src/learner/objective.h @@ -32,7 +32,7 @@ class IObjFunction{ int iter, std::vector *out_gpair) = 0; /*! \return the default evaluation metric for the objective */ - virtual const char* DefaultEvalMetric(void) = 0; + virtual const char* DefaultEvalMetric(void) const = 0; // the following functions are optional, most of time default implementation is good enough /*! * \brief transform prediction values, this is only called when Prediction is called @@ -53,7 +53,7 @@ class IObjFunction{ * used by gradient boosting * \return transformed value */ - virtual float ProbToMargin(float base_score) { + virtual float ProbToMargin(float base_score) const { return base_score; } }; diff --git a/src/tree/updater_prune-inl.hpp b/src/tree/updater_prune-inl.hpp index bfb71b727..b5205080b 100644 --- a/src/tree/updater_prune-inl.hpp +++ b/src/tree/updater_prune-inl.hpp @@ -19,6 +19,7 @@ class TreePruner: public IUpdater { // set training parameter virtual void SetParam(const char *name, const char *val) { param.SetParam(name, val); + if (!strcmp(name, "silent")) silent = atoi(val); } // update the tree, do pruning virtual void Update(const std::vector &gpair, @@ -32,33 +33,41 @@ class TreePruner: public IUpdater { private: // try to prune off current leaf - inline void TryPruneLeaf(RegTree &tree, int nid, int depth) { - if (tree[nid].is_root()) return; + inline int TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) { + if (tree[nid].is_root()) return npruned; int pid = tree[nid].parent(); RegTree::NodeStat &s = tree.stat(pid); ++s.leaf_child_cnt; - if (s.leaf_child_cnt >= 2 && param.need_prune(s.loss_chg, depth - 1)) { // need to be pruned tree.ChangeToLeaf(pid, param.learning_rate * s.base_weight); // tail recursion - this->TryPruneLeaf(tree, pid, depth - 1); - } + return this->TryPruneLeaf(tree, pid, depth - 1, npruned+2); + } else { + return npruned; + } } /*! \brief do prunning of a tree */ inline void DoPrune(RegTree &tree) { + int npruned = 0; // initialize auxiliary statistics for (int nid = 0; nid < tree.param.num_nodes; ++nid) { tree.stat(nid).leaf_child_cnt = 0; } for (int nid = 0; nid < tree.param.num_nodes; ++nid) { if (tree[nid].is_leaf()) { - this->TryPruneLeaf(tree, nid, tree.GetDepth(nid)); + npruned = this->TryPruneLeaf(tree, nid, tree.GetDepth(nid), npruned); } } + if (silent == 0) { + printf("tree prunning end, %d roots, %d extra nodes, %d pruned nodes ,max_depth=%d\n", + tree.param.num_roots, tree.num_extra_nodes(), npruned, tree.MaxDepth()); + } } private: + // shutup + int silent; // training parameter TrainParam param; }; diff --git a/src/utils/omp.h b/src/utils/omp.h index 46127f631..0423448e2 100644 --- a/src/utils/omp.h +++ b/src/utils/omp.h @@ -8,7 +8,9 @@ #if defined(_OPENMP) #include #else +#ifndef DISABLE_OPENMP #warning "OpenMP is not available, compile to single thread code" +#endif inline int omp_get_thread_num() { return 0; } inline int omp_get_num_threads() { return 1; } inline void omp_set_num_threads(int nthread) {}