add ntree limit

2014-09-01 15:10:19 -07:00
parent 4c451de90b
commit 4592e500cb
10 changed files with 53 additions and 23 deletions
--- a/src/gbm/gblinear-inl.hpp
+++ b/src/gbm/gblinear-inl.hpp
@@ -105,7 +105,10 @@ class GBLinear : public IGradBooster {
  virtual void Predict(IFMatrix *p_fmat,
                       int64_t buffer_offset,
                       const BoosterInfo &info,
-                       std::vector<float> *out_preds) {
+                       std::vector<float> *out_preds,
+                       unsigned ntree_limit = 0) {
+    utils::Check(ntree_limit == 0,
+                 "GBLinear::Predict ntrees is only valid for gbtree predictor");
    std::vector<float> &preds = *out_preds;
    preds.resize(0);
    // start collecting the prediction
--- a/src/gbm/gbm.h
+++ b/src/gbm/gbm.h
@@ -57,11 +57,14 @@ class IGradBooster {
   *  the size of buffer is set by convention using IGradBooster.SetParam("num_pbuffer","size")
   * \param info extra side information that may be needed for prediction
   * \param out_preds output vector to hold the predictions
+   * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means 
+   *    we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
   */
  virtual void Predict(IFMatrix *p_fmat,
                       int64_t buffer_offset,
                       const BoosterInfo &info,
-                       std::vector<float> *out_preds) = 0;
+                       std::vector<float> *out_preds,
+                       unsigned ntree_limit = 0) = 0;
  /*!
   * \brief dump the model in text format
   * \param fmap feature map that may help give interpretations of feature
--- a/src/gbm/gbtree-inl.hpp
+++ b/src/gbm/gbtree-inl.hpp
@@ -105,7 +105,8 @@ class GBTree : public IGradBooster {
  virtual void Predict(IFMatrix *p_fmat,
                       int64_t buffer_offset,
                       const BoosterInfo &info,
-                       std::vector<float> *out_preds) {
+                       std::vector<float> *out_preds,
+                       unsigned ntree_limit = 0) {
    int nthread;
    #pragma omp parallel
    {
@@ -137,7 +138,8 @@ class GBTree : public IGradBooster {
          this->Pred(batch[i],
                     buffer_offset < 0 ? -1 : buffer_offset + ridx,
                     gid, info.GetRoot(ridx), &feats,
-                     &preds[ridx * mparam.num_output_group + gid], stride);
+                     &preds[ridx * mparam.num_output_group + gid], stride, 
+                     ntree_limit);
        }
      }
    }
@@ -212,14 +214,16 @@ class GBTree : public IGradBooster {
                   int bst_group,
                   unsigned root_index,
                   tree::RegTree::FVec *p_feats,
-                   float *out_pred, size_t stride) {
+                   float *out_pred, size_t stride, unsigned ntree_limit) {
    size_t itop = 0;
    float  psum = 0.0f;
    // sum of leaf vector 
    std::vector<float> vec_psum(mparam.size_leaf_vector, 0.0f);
    const int64_t bid = mparam.BufferOffset(buffer_index, bst_group);
+    // number of valid trees
+    unsigned treeleft = ntree_limit == 0 ? std::numeric_limits<unsigned>::max() : ntree_limit;
    // load buffered results if any
-    if (bid >= 0) {
+    if (bid >= 0 && ntree_limit == 0) {
      itop = pred_counter[bid];
      psum = pred_buffer[bid];
      for (int i = 0; i < mparam.size_leaf_vector; ++i) {
@@ -235,12 +239,13 @@ class GBTree : public IGradBooster {
          for (int j = 0; j < mparam.size_leaf_vector; ++j) {
            vec_psum[j] += trees[i]->leafvec(tid)[j];
          }
+          if(--treeleft == 0) break;
        }
      }
      p_feats->Drop(inst);
    }
    // updated the buffered results
-    if (bid >= 0) {
+    if (bid >= 0 && ntree_limit == 0) {
      pred_counter[bid] = static_cast<unsigned>(trees.size());
      pred_buffer[bid] = psum;
      for (int i = 0; i < mparam.size_leaf_vector; ++i) {
--- a/src/learner/learner-inl.hpp
+++ b/src/learner/learner-inl.hpp
@@ -212,11 +212,14 @@ class BoostLearner {
   * \param data input data
   * \param output_margin whether to only predict margin value instead of transformed prediction
   * \param out_preds output vector that stores the prediction
+   * \param ntree_limit limit number of trees used for boosted tree
+   *   predictor, when it equals 0, this means we are using all the trees
   */
  inline void Predict(const DMatrix &data,
                      bool output_margin,
-                      std::vector<float> *out_preds) const {
-    this->PredictRaw(data, out_preds);
+                      std::vector<float> *out_preds,
+                      unsigned ntree_limit = 0) const {
+    this->PredictRaw(data, out_preds, ntree_limit);
    if (!output_margin) {
      obj_->PredTransform(out_preds);
    }
@@ -246,11 +249,14 @@ class BoostLearner {
   * \brief get un-transformed prediction
   * \param data training data matrix
   * \param out_preds output vector that stores the prediction
+   * \param ntree_limit limit number of trees used for boosted tree
+   *   predictor, when it equals 0, this means we are using all the trees   
   */
  inline void PredictRaw(const DMatrix &data,
-                         std::vector<float> *out_preds) const {
+                         std::vector<float> *out_preds,
+                         unsigned ntree_limit = 0) const {
    gbm_->Predict(data.fmat(), this->FindBufferOffset(data),
-                  data.info.info, out_preds);
+                  data.info.info, out_preds, ntree_limit);
    // add base margin
    std::vector<float> &preds = *out_preds;
    const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());