diff --git a/Makefile b/Makefile
index 6f5e6223f..f3e834e07 100644
--- a/Makefile
+++ b/Makefile
@@ -17,7 +17,7 @@ else
 endif
 
 # specify tensor path
-BIN = xgboost 
+BIN = xgboost xgbpred
 MOCKBIN = xgboost.mock
 OBJ = updater.o gbm.o io.o main.o 
 MPIBIN = xgboost.mpi
@@ -37,6 +37,7 @@ main.o: src/xgboost_main.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner
 xgboost.mpi:  updater.o gbm.o io.o main.o subtree/rabit/lib/librabit_mpi.a
 xgboost.mock: updater.o gbm.o io.o main.o subtree/rabit/lib/librabit_mock.a
 xgboost:  updater.o gbm.o io.o main.o subtree/rabit/lib/librabit.a
+xgbpred:  updater.o gbm.o io.o src/xgbpred.cpp subtree/rabit/lib/librabit.a
 wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h  updater.o gbm.o io.o subtree/rabit/lib/librabit.a
 
 # dependency on rabit
diff --git a/src/gbm/gblinear-inl.hpp b/src/gbm/gblinear-inl.hpp
index 8cbe8becf..de9ee6173 100644
--- a/src/gbm/gblinear-inl.hpp
+++ b/src/gbm/gblinear-inl.hpp
@@ -136,6 +136,15 @@ class GBLinear : public IGradBooster {
       }
     }
   }
+  virtual void Predict(const SparseBatch::Inst &inst,
+                       std::vector<float> *out_preds,
+                       unsigned ntree_limit,
+                       unsigned root_index) {
+    const int ngroup = model.param.num_output_group;
+    for (int gid = 0; gid < ngroup; ++gid) {
+      this->Pred(inst, BeginPtr(*out_preds));
+    }
+  }
   virtual void PredictLeaf(IFMatrix *p_fmat,
                            const BoosterInfo &info,
                            std::vector<float> *out_preds,
diff --git a/src/gbm/gbm.h b/src/gbm/gbm.h
index 57b8c0573..f07d277ac 100644
--- a/src/gbm/gbm.h
+++ b/src/gbm/gbm.h
@@ -83,7 +83,23 @@ class IGradBooster {
                        int64_t buffer_offset,
                        const BoosterInfo &info,
                        std::vector<float> *out_preds,
-                       unsigned ntree_limit = 0) = 0;
+                       unsigned ntree_limit = 0) = 0;  
+  /*!
+   * \brief online prediction funciton, predict score for one instance at a time
+   *  NOTE: use the batch prediction interface if possible, batch prediction is usually
+   *        more efficient than online prediction
+   *        This function is NOT threadsafe, make sure you only call from one thread
+   *    
+   * \param inst the instance you want to predict
+   * \param out_preds output vector to hold the predictions
+   * \param ntree_limit limit the number of trees used in prediction
+   * \param root_index the root index
+   * \sa Predict
+   */
+  virtual void Predict(const SparseBatch::Inst &inst,
+                       std::vector<float> *out_preds,
+                       unsigned ntree_limit = 0,
+                       unsigned root_index = 0)  = 0;
   /*!
    * \brief predict the leaf index of each tree, the output will be nsample * ntree vector
    *        this is only valid in gbtree predictor
diff --git a/src/gbm/gbtree-inl.hpp b/src/gbm/gbtree-inl.hpp
index c08d15dd7..66b03dd87 100644
--- a/src/gbm/gbtree-inl.hpp
+++ b/src/gbm/gbtree-inl.hpp
@@ -160,6 +160,22 @@ class GBTree : public IGradBooster {
       }
     }
   }  
+  virtual void Predict(const SparseBatch::Inst &inst,
+                       std::vector<float> *out_preds,
+                       unsigned ntree_limit,
+                       unsigned root_index) {
+    if (thread_temp.size() == 0) {
+      thread_temp.resize(1, tree::RegTree::FVec());
+      thread_temp[0].Init(mparam.num_feature);
+    }
+    out_preds->resize(mparam.num_output_group * (mparam.size_leaf_vector+1));
+    // loop over output groups
+    for (int gid = 0; gid < mparam.num_output_group; ++gid) {
+      this->Pred(inst, -1, gid, root_index, &thread_temp[0],
+                 &(*out_preds)[gid], mparam.num_output_group, 
+                 ntree_limit);
+    }
+  }  
   virtual void PredictLeaf(IFMatrix *p_fmat,
                            const BoosterInfo &info,
                            std::vector<float> *out_preds,
diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp
index 82a56e1fc..630f8fa20 100644
--- a/src/learner/learner-inl.hpp
+++ b/src/learner/learner-inl.hpp
@@ -334,6 +334,31 @@ class BoostLearner : public rabit::ISerializable {
       }
     }
   }
+  /*!
+   * \brief online prediction funciton, predict score for one instance at a time
+   *  NOTE: use the batch prediction interface if possible, batch prediction is usually
+   *        more efficient than online prediction
+   *        This function is NOT threadsafe, make sure you only call from one thread
+   *    
+   * \param inst the instance you want to predict
+   * \param output_margin whether to only predict margin value instead of transformed prediction
+   * \param out_preds output vector to hold the predictions
+   * \param ntree_limit limit the number of trees used in prediction
+   * \param root_index the root index
+   * \sa Predict
+   */
+  inline void Predict(const SparseBatch::Inst &inst,
+                      bool output_margin,
+                      std::vector<float> *out_preds,
+                      unsigned ntree_limit = 0) const {
+    gbm_->Predict(inst, out_preds, ntree_limit);
+    if (out_preds->size() == 1) {
+      (*out_preds)[0] += mparam.base_score;
+    }
+    if (!output_margin) {
+      obj_->PredTransform(out_preds);
+    }
+  }
   /*! \brief dump model out */
   inline std::vector<std::string> DumpModel(const utils::FeatMap& fmap, int option) {
     return gbm_->DumpModel(fmap, option);