add single instance prediction

2015-01-19 08:07:22 -08:00 · 2015-01-19 08:07:22 -08:00 · 1211ea40c9
commit 1211ea40c9
parent 748389f052
5 changed files with 69 additions and 2 deletions
--- a/3
+++ b/3
@ -17,7 +17,7 @@ else
 endif

 # specify tensor path
-BIN = xgboost 
+BIN = xgboost xgbpred
 MOCKBIN = xgboost.mock
 OBJ = updater.o gbm.o io.o main.o 
 MPIBIN = xgboost.mpi
@ -37,6 +37,7 @@ main.o: src/xgboost_main.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner
 xgboost.mpi:  updater.o gbm.o io.o main.o subtree/rabit/lib/librabit_mpi.a
 xgboost.mock: updater.o gbm.o io.o main.o subtree/rabit/lib/librabit_mock.a
 xgboost:  updater.o gbm.o io.o main.o subtree/rabit/lib/librabit.a
+xgbpred:  updater.o gbm.o io.o src/xgbpred.cpp subtree/rabit/lib/librabit.a
 wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h  updater.o gbm.o io.o subtree/rabit/lib/librabit.a

 # dependency on rabit
--- a/src/gbm/gblinear-inl.hpp
+++ b/src/gbm/gblinear-inl.hpp
@ -136,6 +136,15 @@ class GBLinear : public IGradBooster {
      }
    }
  }
+  virtual void Predict(const SparseBatch::Inst &inst,
+                       std::vector<float> *out_preds,
+                       unsigned ntree_limit,
+                       unsigned root_index) {
+    const int ngroup = model.param.num_output_group;
+    for (int gid = 0; gid < ngroup; ++gid) {
+      this->Pred(inst, BeginPtr(*out_preds));
+    }
+  }
  virtual void PredictLeaf(IFMatrix *p_fmat,
                           const BoosterInfo &info,
                           std::vector<float> *out_preds,
--- a/src/gbm/gbm.h
+++ b/src/gbm/gbm.h
@ -83,7 +83,23 @@ class IGradBooster {
                       int64_t buffer_offset,
                       const BoosterInfo &info,
                       std::vector<float> *out_preds,
-                       unsigned ntree_limit = 0) = 0;
+                       unsigned ntree_limit = 0) = 0;  
+  /*!
+   * \brief online prediction funciton, predict score for one instance at a time
+   *  NOTE: use the batch prediction interface if possible, batch prediction is usually
+   *        more efficient than online prediction
+   *        This function is NOT threadsafe, make sure you only call from one thread
+   *    
+   * \param inst the instance you want to predict
+   * \param out_preds output vector to hold the predictions
+   * \param ntree_limit limit the number of trees used in prediction
+   * \param root_index the root index
+   * \sa Predict
+   */
+  virtual void Predict(const SparseBatch::Inst &inst,
+                       std::vector<float> *out_preds,
+                       unsigned ntree_limit = 0,
+                       unsigned root_index = 0)  = 0;
  /*!
   * \brief predict the leaf index of each tree, the output will be nsample * ntree vector
   *        this is only valid in gbtree predictor
--- a/src/gbm/gbtree-inl.hpp
+++ b/src/gbm/gbtree-inl.hpp
@ -160,6 +160,22 @@ class GBTree : public IGradBooster {
      }
    }
  }  
+  virtual void Predict(const SparseBatch::Inst &inst,
+                       std::vector<float> *out_preds,
+                       unsigned ntree_limit,
+                       unsigned root_index) {
+    if (thread_temp.size() == 0) {
+      thread_temp.resize(1, tree::RegTree::FVec());
+      thread_temp[0].Init(mparam.num_feature);
+    }
+    out_preds->resize(mparam.num_output_group * (mparam.size_leaf_vector+1));
+    // loop over output groups
+    for (int gid = 0; gid < mparam.num_output_group; ++gid) {
+      this->Pred(inst, -1, gid, root_index, &thread_temp[0],
+                 &(*out_preds)[gid], mparam.num_output_group, 
+                 ntree_limit);
+    }
+  }  
  virtual void PredictLeaf(IFMatrix *p_fmat,
                           const BoosterInfo &info,
                           std::vector<float> *out_preds,
--- a/src/learner/learner-inl.hpp
+++ b/src/learner/learner-inl.hpp
@ -334,6 +334,31 @@ class BoostLearner : public rabit::ISerializable {
      }
    }
  }
+  /*!
+   * \brief online prediction funciton, predict score for one instance at a time
+   *  NOTE: use the batch prediction interface if possible, batch prediction is usually
+   *        more efficient than online prediction
+   *        This function is NOT threadsafe, make sure you only call from one thread
+   *    
+   * \param inst the instance you want to predict
+   * \param output_margin whether to only predict margin value instead of transformed prediction
+   * \param out_preds output vector to hold the predictions
+   * \param ntree_limit limit the number of trees used in prediction
+   * \param root_index the root index
+   * \sa Predict
+   */
+  inline void Predict(const SparseBatch::Inst &inst,
+                      bool output_margin,
+                      std::vector<float> *out_preds,
+                      unsigned ntree_limit = 0) const {
+    gbm_->Predict(inst, out_preds, ntree_limit);
+    if (out_preds->size() == 1) {
+      (*out_preds)[0] += mparam.base_score;
+    }
+    if (!output_margin) {
+      obj_->PredTransform(out_preds);
+    }
+  }
  /*! \brief dump model out */
  inline std::vector<std::string> DumpModel(const utils::FeatMap& fmap, int option) {
    return gbm_->DumpModel(fmap, option);