diff --git a/R-package/R/predict.xgb.Booster.R b/R-package/R/predict.xgb.Booster.R
index b51a1b19c..87666a55f 100644
--- a/R-package/R/predict.xgb.Booster.R
+++ b/R-package/R/predict.xgb.Booster.R
@@ -11,7 +11,8 @@ setClass("xgb.Booster")
 #'   value of sum of functions, when outputmargin=TRUE, the prediction is 
 #'   untransformed margin value. In logistic regression, outputmargin=T will
 #'   output value before logistic transformation.
-#' 
+#' @param ntreelimit limit number of trees used in prediction, this parameter is only valid for gbtree, but not for gblinear.
+#'   set it to be value bigger than 0
 #' @examples
 #' data(iris)
 #' bst <- xgboost(as.matrix(iris[,1:4]),as.numeric(iris[,5]), nrounds = 2)
@@ -19,11 +20,18 @@ setClass("xgb.Booster")
 #' @export
 #' 
 setMethod("predict", signature = "xgb.Booster", 
-          definition = function(object, newdata, outputmargin = FALSE) {
+          definition = function(object, newdata, outputmargin = FALSE, ntreelimit = NULL) {
   if (class(newdata) != "xgb.DMatrix") {
     newdata <- xgb.DMatrix(newdata)
   }
-  ret <- .Call("XGBoosterPredict_R", object, newdata, as.integer(outputmargin), PACKAGE = "xgboost")
+  if (is.null(ntreelimit)) {
+    ntreelimit <- 0
+  } else {
+    if (ntreelimit < 1){
+      stop("predict: ntreelimit must be greater equal than 1")
+    }
+  }
+  ret <- .Call("XGBoosterPredict_R", object, newdata, as.integer(outputmargin), as.integer(ntreelimit), PACKAGE = "xgboost")
   return(ret)
 })
  
diff --git a/R-package/src/xgboost_R.cpp b/R-package/src/xgboost_R.cpp
index 96d3871b1..b03410a4c 100644
--- a/R-package/src/xgboost_R.cpp
+++ b/R-package/src/xgboost_R.cpp
@@ -247,12 +247,13 @@ extern "C" {
                                          &vec_dmats[0], &vec_sptr[0], len));
     _WrapperEnd();
   }
-  SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin) {
+  SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin, SEXP ntree_limit) {
     _WrapperBegin();
     bst_ulong olen;
     const float *res = XGBoosterPredict(R_ExternalPtrAddr(handle),
                                         R_ExternalPtrAddr(dmat),
                                         asInteger(output_margin),
+                                        asInteger(ntree_limit),
                                         &olen);
     SEXP ret = PROTECT(allocVector(REALSXP, olen));
     for (size_t i = 0; i < olen; ++i) {
diff --git a/R-package/src/xgboost_R.h b/R-package/src/xgboost_R.h
index 8b4a3372c..c988ff1e5 100644
--- a/R-package/src/xgboost_R.h
+++ b/R-package/src/xgboost_R.h
@@ -107,8 +107,9 @@ extern "C" {
    * \param handle handle
    * \param dmat data matrix
    * \param output_margin whether only output raw margin value
+   * \param ntree_limit limit number of trees used in prediction
    */
-  SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin);
+  SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP output_margin, SEXP ntree_limit);
   /*!
    * \brief load model from existing file
    * \param handle handle
diff --git a/src/gbm/gblinear-inl.hpp b/src/gbm/gblinear-inl.hpp
index e9566f87e..a9d4c8d62 100644
--- a/src/gbm/gblinear-inl.hpp
+++ b/src/gbm/gblinear-inl.hpp
@@ -105,7 +105,10 @@ class GBLinear : public IGradBooster {
   virtual void Predict(IFMatrix *p_fmat,
                        int64_t buffer_offset,
                        const BoosterInfo &info,
-                       std::vector<float> *out_preds) {
+                       std::vector<float> *out_preds,
+                       unsigned ntree_limit = 0) {
+    utils::Check(ntree_limit == 0,
+                 "GBLinear::Predict ntrees is only valid for gbtree predictor");
     std::vector<float> &preds = *out_preds;
     preds.resize(0);
     // start collecting the prediction
diff --git a/src/gbm/gbm.h b/src/gbm/gbm.h
index c548cab94..07dade4ac 100644
--- a/src/gbm/gbm.h
+++ b/src/gbm/gbm.h
@@ -57,11 +57,14 @@ class IGradBooster {
    *  the size of buffer is set by convention using IGradBooster.SetParam("num_pbuffer","size")
    * \param info extra side information that may be needed for prediction
    * \param out_preds output vector to hold the predictions
+   * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means 
+   *    we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
    */
   virtual void Predict(IFMatrix *p_fmat,
                        int64_t buffer_offset,
                        const BoosterInfo &info,
-                       std::vector<float> *out_preds) = 0;
+                       std::vector<float> *out_preds,
+                       unsigned ntree_limit = 0) = 0;
   /*!
    * \brief dump the model in text format
    * \param fmap feature map that may help give interpretations of feature
diff --git a/src/gbm/gbtree-inl.hpp b/src/gbm/gbtree-inl.hpp
index f66b49d00..8fea28727 100644
--- a/src/gbm/gbtree-inl.hpp
+++ b/src/gbm/gbtree-inl.hpp
@@ -105,7 +105,8 @@ class GBTree : public IGradBooster {
   virtual void Predict(IFMatrix *p_fmat,
                        int64_t buffer_offset,
                        const BoosterInfo &info,
-                       std::vector<float> *out_preds) {
+                       std::vector<float> *out_preds,
+                       unsigned ntree_limit = 0) {
     int nthread;
     #pragma omp parallel
     {
@@ -137,7 +138,8 @@ class GBTree : public IGradBooster {
           this->Pred(batch[i],
                      buffer_offset < 0 ? -1 : buffer_offset + ridx,
                      gid, info.GetRoot(ridx), &feats,
-                     &preds[ridx * mparam.num_output_group + gid], stride);
+                     &preds[ridx * mparam.num_output_group + gid], stride, 
+                     ntree_limit);
         }
       }
     }
@@ -212,14 +214,16 @@ class GBTree : public IGradBooster {
                    int bst_group,
                    unsigned root_index,
                    tree::RegTree::FVec *p_feats,
-                   float *out_pred, size_t stride) {
+                   float *out_pred, size_t stride, unsigned ntree_limit) {
     size_t itop = 0;
     float  psum = 0.0f;
     // sum of leaf vector 
     std::vector<float> vec_psum(mparam.size_leaf_vector, 0.0f);
     const int64_t bid = mparam.BufferOffset(buffer_index, bst_group);
+    // number of valid trees
+    unsigned treeleft = ntree_limit == 0 ? std::numeric_limits<unsigned>::max() : ntree_limit;
     // load buffered results if any
-    if (bid >= 0) {
+    if (bid >= 0 && ntree_limit == 0) {
       itop = pred_counter[bid];
       psum = pred_buffer[bid];
       for (int i = 0; i < mparam.size_leaf_vector; ++i) {
@@ -235,12 +239,13 @@ class GBTree : public IGradBooster {
           for (int j = 0; j < mparam.size_leaf_vector; ++j) {
             vec_psum[j] += trees[i]->leafvec(tid)[j];
           }
+          if(--treeleft == 0) break;
         }
       }
       p_feats->Drop(inst);
     }
     // updated the buffered results
-    if (bid >= 0) {
+    if (bid >= 0 && ntree_limit == 0) {
       pred_counter[bid] = static_cast<unsigned>(trees.size());
       pred_buffer[bid] = psum;
       for (int i = 0; i < mparam.size_leaf_vector; ++i) {
diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp
index f252abedf..60e1fccf1 100644
--- a/src/learner/learner-inl.hpp
+++ b/src/learner/learner-inl.hpp
@@ -212,11 +212,14 @@ class BoostLearner {
    * \param data input data
    * \param output_margin whether to only predict margin value instead of transformed prediction
    * \param out_preds output vector that stores the prediction
+   * \param ntree_limit limit number of trees used for boosted tree
+   *   predictor, when it equals 0, this means we are using all the trees
    */
   inline void Predict(const DMatrix &data,
                       bool output_margin,
-                      std::vector<float> *out_preds) const {
-    this->PredictRaw(data, out_preds);
+                      std::vector<float> *out_preds,
+                      unsigned ntree_limit = 0) const {
+    this->PredictRaw(data, out_preds, ntree_limit);
     if (!output_margin) {
       obj_->PredTransform(out_preds);
     }
@@ -246,11 +249,14 @@ class BoostLearner {
    * \brief get un-transformed prediction
    * \param data training data matrix
    * \param out_preds output vector that stores the prediction
+   * \param ntree_limit limit number of trees used for boosted tree
+   *   predictor, when it equals 0, this means we are using all the trees   
    */
   inline void PredictRaw(const DMatrix &data,
-                         std::vector<float> *out_preds) const {
+                         std::vector<float> *out_preds,
+                         unsigned ntree_limit = 0) const {
     gbm_->Predict(data.fmat(), this->FindBufferOffset(data),
-                  data.info.info, out_preds);
+                  data.info.info, out_preds, ntree_limit);
     // add base margin
     std::vector<float> &preds = *out_preds;
     const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
diff --git a/wrapper/xgboost.py b/wrapper/xgboost.py
index e2cbdba2e..a6999a39f 100644
--- a/wrapper/xgboost.py
+++ b/wrapper/xgboost.py
@@ -192,15 +192,16 @@ class Booster:
         return xglib.XGBoosterEvalOneIter(self.handle, it, dmats, evnames, len(evals))
     def eval(self, mat, name = 'eval', it = 0):
         return self.eval_set( [(mat,name)], it)
-    def predict(self, data, output_margin=False):
+    def predict(self, data, output_margin=False, ntree_limit=0):
         """
         predict with data
             data: the dmatrix storing the input
             output_margin: whether output raw margin value that is untransformed
+            ntree_limit: limit number of trees in prediction, default to 0, 0 means using all the trees
         """
         length = ctypes.c_ulong()
         preds = xglib.XGBoosterPredict(self.handle, data.handle,
-                                       int(output_margin), ctypes.byref(length))
+                                       int(output_margin), ntree_limit, ctypes.byref(length))
         return ctypes2numpy(preds, length.value, 'float32')
     def save_model(self, fname):
         """ save model to file """
diff --git a/wrapper/xgboost_wrapper.cpp b/wrapper/xgboost_wrapper.cpp
index 70c7e87b0..3f45c1438 100644
--- a/wrapper/xgboost_wrapper.cpp
+++ b/wrapper/xgboost_wrapper.cpp
@@ -25,9 +25,9 @@ class Booster: public learner::BoostLearner {
     this->init_model = false;
     this->SetCacheData(mats);
   }
-  const float *Pred(const DataMatrix &dmat, int output_margin, bst_ulong *len) {
+  inline const float *Pred(const DataMatrix &dmat, int output_margin, unsigned ntree_limit, bst_ulong *len) {
     this->CheckInitModel();
-    this->Predict(dmat, output_margin != 0, &this->preds_);
+    this->Predict(dmat, output_margin != 0, &this->preds_, ntree_limit);
     *len = static_cast<bst_ulong>(this->preds_.size());
     return &this->preds_[0];
   }
@@ -249,8 +249,8 @@ extern "C"{
     bst->eval_str = bst->EvalOneIter(iter, mats, names);
     return bst->eval_str.c_str();
   }
-  const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, bst_ulong *len) {
-    return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, len);
+  const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, unsigned ntree_limit, bst_ulong *len) {
+    return static_cast<Booster*>(handle)->Pred(*static_cast<DataMatrix*>(dmat), output_margin, ntree_limit, len);
   }
   void XGBoosterLoadModel(void *handle, const char *fname) {
     static_cast<Booster*>(handle)->LoadModel(fname);
diff --git a/wrapper/xgboost_wrapper.h b/wrapper/xgboost_wrapper.h
index 65446aea6..9687ec0a3 100644
--- a/wrapper/xgboost_wrapper.h
+++ b/wrapper/xgboost_wrapper.h
@@ -165,9 +165,11 @@ extern "C" {
    * \param handle handle
    * \param dmat data matrix
    * \param output_margin whether only output raw margin value
+   * \param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees
+   *    when the parameter is set to 0, we will use all the trees
    * \param len used to store length of returning result
    */
-  XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, bst_ulong *len);
+  XGB_DLL const float *XGBoosterPredict(void *handle, void *dmat, int output_margin, unsigned ntree_limit, bst_ulong *len);
   /*!
    * \brief load model from existing file
    * \param handle handle