mv code into src

2014-08-15 21:04:23 -07:00
parent 3589e8252f
commit 34dd409c5b
25 changed files with 1 additions and 28 deletions
--- a/src/data.h
+++ b/src/data.h
@@ -0,0 +1,293 @@
+#ifndef XGBOOST_UNITY_DATA_H
+#define XGBOOST_UNITY_DATA_H
+/*!
+ * \file data.h
+ * \brief the input data structure for gradient boosting
+ * \author Tianqi Chen
+ */
+#include <cstdio>
+#include <vector>
+#include <limits>
+#include <algorithm>
+#include "utils/io.h"
+#include "utils/utils.h"
+#include "utils/iterator.h"
+#include "utils/matrix_csr.h"
+
+namespace xgboost {
+/*! 
+ * \brief unsigned interger type used in boost, 
+ *        used for feature index and row index 
+ */
+typedef unsigned bst_uint;
+/*! \brief float type, used for storing statistics */
+typedef float bst_float;
+const float rt_eps = 1e-5f;
+// min gap between feature values to allow a split happen
+const float rt_2eps = rt_eps * 2.0f;
+
+/*! \brief gradient statistics pair usually needed in gradient boosting */
+struct bst_gpair{
+  /*! \brief gradient statistics */
+  bst_float grad;
+  /*! \brief second order gradient statistics */
+  bst_float hess;
+  bst_gpair(void) {}
+  bst_gpair(bst_float grad, bst_float hess) : grad(grad), hess(hess) {}
+};
+
+/*! \brief read-only sparse instance batch in CSR format */
+struct SparseBatch {
+  /*! \brief an entry of sparse vector */
+  struct Entry {
+    /*! \brief feature index */
+    bst_uint findex;
+    /*! \brief feature value */
+    bst_float fvalue;
+    // default constructor
+    Entry(void) {}
+    Entry(bst_uint findex, bst_float fvalue) : findex(findex), fvalue(fvalue) {}
+    /*! \brief reversely compare feature values */
+    inline static bool CmpValue(const Entry &a, const Entry &b) {
+      return a.fvalue < b.fvalue;
+    }
+  };
+  /*! \brief an instance of sparse vector in the batch */
+  struct Inst {
+    /*! \brief pointer to the elements*/
+    const Entry *data;
+    /*! \brief length of the instance */
+    const bst_uint length;
+    /*! \brief constructor */
+    Inst(const Entry *data, bst_uint length) : data(data), length(length) {}
+    /*! \brief get i-th pair in the sparse vector*/
+    inline const Entry& operator[](size_t i) const {
+      return data[i];
+    }
+  };
+  /*! \brief batch size */
+  size_t size;
+  /*! \brief the offset of rowid of this batch */
+  size_t base_rowid;
+  /*! \brief array[size+1], row pointer of each of the elements */
+  const size_t *row_ptr;
+  /*! \brief array[row_ptr.back()], content of the sparse element */
+  const Entry *data_ptr;
+  /*! \brief get i-th row from the batch */
+  inline Inst operator[](size_t i) const {
+    return Inst(data_ptr + row_ptr[i], row_ptr[i+1] - row_ptr[i]);
+  }
+};
+
+/**
+ * \brief This is a interface convention via template, defining the way to access features,
+ *        column access rule is defined by template, for efficiency purpose, 
+ *        row access is defined by iterator of sparse batches
+ * \tparam Derived type of actual implementation
+ */
+template<typename Derived>
+class FMatrixInterface {
+ public:
+  /*! \brief example iterator over one column */
+  struct ColIter{
+    /*!
+     * \brief move to next position
+     * \return whether there is element in next position
+     */
+    inline bool Next(void);
+    /*! \return row index of current position  */
+    inline bst_uint rindex(void) const;
+    /*! \return feature value in current position */
+    inline bst_float fvalue(void) const;
+  };
+  /*! \brief backward iterator over column */
+  struct ColBackIter : public ColIter {};
+ public:
+  // column access is needed by some of tree construction algorithms
+  /*!
+   * \brief get column iterator, the columns must be sorted by feature value
+   * \param cidx column index
+   * \return column iterator
+   */
+  inline ColIter GetSortedCol(size_t cidx) const;
+  /*!
+   * \brief get column backward iterator, starts from biggest fvalue, and iterator back
+   * \param cidx column index
+   * \return reverse column iterator
+   */
+  inline ColBackIter GetReverseSortedCol(size_t cidx) const;
+  /*!
+   * \brief get number of columns
+   * \return number of columns
+   */
+  inline size_t NumCol(void) const;
+  /*! 
+   * \brief check if column access is supported, if not, initialize column access 
+   * \param max_rows maximum number of rows allowed in constructor 
+   */
+  inline void InitColAccess(void);
+  /*! \return whether column access is enabled */
+  inline bool HaveColAccess(void) const;
+  /*! \breif return #entries-in-col */
+  inline size_t GetColSize(size_t cidx) const;
+  /*!
+   * \breif return #entries-in-col / #rows
+   * \param cidx column index 
+   *   this function is used to help speedup, 
+   *   doese not necessarily implement it if not sure, return 0.0;
+   * \return column density
+   */
+  inline float GetColDensity(size_t cidx) const;
+  /*! \brief get the row iterator associated with FMatrix */
+  virtual utils::IIterator<SparseBatch>* RowIterator(void) const = 0;
+};
+
+/*!
+ * \brief sparse matrix that support column access, CSC
+ */
+class FMatrixS : public FMatrixInterface<FMatrixS>{
+ public:
+  typedef SparseBatch::Entry Entry;
+  /*! \brief row iterator */
+  struct ColIter{
+    const Entry *dptr_, *end_;
+    ColIter(const Entry* begin, const Entry* end)
+        :dptr_(begin), end_(end) {}
+    inline bool Next(void) {
+      if (dptr_ == end_) {
+        return false;
+      } else {
+        ++dptr_; return true;
+      }
+    }
+    inline bst_uint rindex(void) const {
+      return dptr_->findex;
+    }
+    inline bst_float fvalue(void) const {
+      return dptr_->fvalue;
+    }
+  };
+  /*! \brief reverse column iterator */
+  struct ColBackIter : public ColIter {
+    ColBackIter(const Entry* dptr, const Entry* end) : ColIter(dptr, end) {}
+    // shadows ColIter::Next
+    inline bool Next(void) {
+      if (dptr_ == end_) {
+        return false;
+      } else {
+        --dptr_; return true;
+      }
+    }
+  };
+  /*! \brief constructor */
+  explicit FMatrixS(utils::IIterator<SparseBatch> *base_iter)
+      : iter_(base_iter) {}
+  // destructor
+  virtual ~FMatrixS(void) {
+    delete iter_;
+  }
+  /*! \return whether column access is enabled */
+  inline bool HaveColAccess(void) const {
+    return col_ptr_.size() != 0;
+  }
+  /*! \brief get number of colmuns */
+  inline size_t NumCol(void) const {
+    utils::Check(this->HaveColAccess(), "NumCol:need column access");
+    return col_ptr_.size() - 1;
+  }
+  /*! \brief get col sorted iterator */
+  inline ColIter GetSortedCol(size_t cidx) const {
+    utils::Assert(cidx < this->NumCol(), "col id exceed bound");
+    return ColIter(&col_data_[col_ptr_[cidx]] - 1,
+                   &col_data_[col_ptr_[cidx + 1]] - 1);
+  }
+  /*! 
+   * \brief get reversed col iterator, 
+   *   this function will be deprecated at some point 
+   */
+  inline ColBackIter GetReverseSortedCol(size_t cidx) const {
+    utils::Assert(cidx < this->NumCol(), "col id exceed bound");
+    return ColBackIter(&col_data_[col_ptr_[cidx + 1]],
+                       &col_data_[col_ptr_[cidx]]);
+  }
+  /*! \brief get col size */
+  inline size_t GetColSize(size_t cidx) const {
+    return col_ptr_[cidx+1] - col_ptr_[cidx];
+  }
+  /*! \brief get column density */
+  inline float GetColDensity(size_t cidx) const {
+    size_t nmiss = num_buffered_row_ - (col_ptr_[cidx+1] - col_ptr_[cidx]);
+    return 1.0f - (static_cast<float>(nmiss)) / num_buffered_row_;
+  }
+  virtual void InitColAccess(void) {
+    if (this->HaveColAccess()) return;
+    const size_t max_nrow = std::numeric_limits<bst_uint>::max();
+    this->InitColData(max_nrow);
+  }
+  /*! \brief get the row iterator associated with FMatrix */
+  virtual utils::IIterator<SparseBatch>* RowIterator(void) const {
+    return iter_;
+  }
+
+ protected:
+  /*!
+   * \brief intialize column data 
+   * \param max_nrow maximum number of rows supported 
+   */
+  inline void InitColData(size_t max_nrow) {
+    // note: this part of code is serial, todo, parallelize this transformer
+    utils::SparseCSRMBuilder<SparseBatch::Entry> builder(col_ptr_, col_data_);
+    builder.InitBudget(0);
+    // start working
+    iter_->BeforeFirst();
+    num_buffered_row_ = 0;
+    while (iter_->Next()) {
+      const SparseBatch &batch = iter_->Value();
+      if (batch.base_rowid >= max_nrow) break;
+      const size_t nbatch = std::min(batch.size, max_nrow - batch.base_rowid);
+      for (size_t i = 0; i < nbatch; ++i, ++num_buffered_row_) {
+        SparseBatch::Inst inst = batch[i];
+        for (bst_uint j = 0; j < batch.size; ++j) {
+          builder.AddBudget(inst[j].findex);
+        }
+      }
+    }
+
+    builder.InitStorage();
+
+    iter_->BeforeFirst();
+    while (iter_->Next()) {
+      const SparseBatch &batch = iter_->Value();
+      if (batch.base_rowid >= max_nrow) break;
+      const size_t nbatch = std::min(batch.size, max_nrow - batch.base_rowid);
+      for (size_t i = 0; i < nbatch; ++i) {
+        SparseBatch::Inst inst = batch[i];
+        for (bst_uint j = 0; j < batch.size; ++j) {
+          builder.PushElem(inst[j].findex,
+                           Entry((bst_uint)(batch.base_rowid+j),
+                                 inst[j].fvalue));
+        }
+      }
+    }
+
+    // sort columns
+    unsigned ncol = static_cast<unsigned>(this->NumCol());
+    #pragma omp parallel for schedule(static)
+    for (unsigned i = 0; i < ncol; ++i) {
+      std::sort(&col_data_[col_ptr_[i]],
+                &col_data_[col_ptr_[i + 1]], Entry::CmpValue);
+    }
+  }
+
+ private:
+  // --- data structure used to support InitColAccess --
+  utils::IIterator<SparseBatch> *iter_;
+  /*! \brief number */
+  size_t num_buffered_row_;
+  /*! \brief column pointer of CSC format */
+  std::vector<size_t>  col_ptr_;
+  /*! \brief column datas in CSC format */
+  std::vector<SparseBatch::Entry>  col_data_;
+};
+}  // namespace xgboost
+#endif
--- a/src/gbm/gbm.h
+++ b/src/gbm/gbm.h
@@ -0,0 +1,82 @@
+#ifndef XGBOOST_GBM_GBM_H_
+#define XGBOOST_GBM_GBM_H_
+/*!
+ * \file gbm.h
+ * \brief interface of gradient booster, that learns through gradient statistics
+ * \author Tianqi Chen
+ */
+#include <vector>
+#include "../data.h"
+
+namespace xgboost {
+/*! \brief namespace for gradient booster */
+namespace gbm {
+/*! 
+ * \brief interface of gradient boosting model
+ * \tparam FMatrix the data type updater taking
+ */
+template<typename FMatrix>
+class IGradBooster {
+ public:
+  /*!
+   * \brief set parameters from outside
+   * \param name name of the parameter
+   * \param val  value of the parameter
+   */
+  virtual void SetParam(const char *name, const char *val) = 0;
+  /*!
+   * \brief load model from stream
+   * \param fi input stream
+   */
+  virtual void LoadModel(utils::IStream &fi) = 0;
+  /*!
+   * \brief save model to stream
+   * \param fo output stream
+   */
+  virtual void SaveModel(utils::IStream &fo) const = 0;
+  /*!
+   * \brief initialize the model
+   */
+  virtual void InitModel(void) = 0;
+  /*!
+   * \brief peform update to the model(boosting)
+   * \param gpair the gradient pair statistics of the data
+   * \param fmat feature matrix that provide access to features
+   * \param root_index pre-partitioned root_index of each instance,
+   *   root_index.size() can be 0 which indicates that no pre-partition involved
+   */
+  virtual void DoBoost(const std::vector<bst_gpair> &gpair,
+                       FMatrix &fmat,
+                       const std::vector<unsigned> &root_index) = 0;
+  /*!
+   * \brief generate predictions for given feature matrix
+   * \param fmat feature matrix
+   * \param buffer_offset buffer index offset of these instances, if equals -1
+   *        this means we do not have buffer index allocated to the gbm
+   *  a buffer index is assigned to each instance that requires repeative prediction
+   *  the size of buffer is set by convention using IGradBooster.SetParam("num_pbuffer","size")
+   * \param root_index pre-partitioned root_index of each instance,
+   *   root_index.size() can be 0 which indicates that no pre-partition involved
+   * \param out_preds output vector to hold the predictions
+   */
+  virtual void Predict(const FMatrix &fmat,
+                       int64_t buffer_offset,
+                       const std::vector<unsigned> &root_index,
+                       std::vector<float> *out_preds) = 0;
+  // destrcutor
+  virtual ~IGradBooster(void){}
+};
+}  // namespace gbm
+}  // namespace xgboost
+#include "gbtree-inl.hpp"
+namespace xgboost {
+namespace gbm {
+template<typename FMatrix>
+inline IGradBooster<FMatrix>* CreateGradBooster(const char *name) {
+  if (!strcmp("gbtree", name)) return new GBTree<FMatrix>();
+  utils::Error("unknown booster type: %s", name);
+  return NULL;
+}
+}  // namespace gbm
+}  // namespace xgboost
+#endif  // XGBOOST_GBM_GBM_H_
--- a/src/gbm/gbtree-inl.hpp
+++ b/src/gbm/gbtree-inl.hpp
@@ -0,0 +1,365 @@
+#ifndef XGBOOST_GBM_GBTREE_INL_HPP_
+#define XGBOOST_GBM_GBTREE_INL_HPP_
+/*!
+ * \file gbtree-inl.hpp
+ * \brief gradient boosted tree implementation
+ * \author Tianqi Chen
+ */
+#include <vector>
+#include <utility>
+#include <string>
+#include "./gbm.h"
+#include "../tree/updater.h"
+
+namespace xgboost {
+namespace gbm {
+/*!
+ * \brief gradient boosted tree
+ * \tparam FMatrix the data type updater taking
+ */
+template<typename FMatrix>
+class GBTree : public IGradBooster<FMatrix> {
+ public:
+  virtual ~GBTree(void) {
+    this->Clear();
+  }
+  virtual void SetParam(const char *name, const char *val) {
+    if (!strncmp(name, "bst:", 4)) {
+      cfg.push_back(std::make_pair(std::string(name+4), std::string(val)));
+      // set into updaters, if already intialized
+      for (size_t i = 0; i < updaters.size(); ++i) {
+        updaters[i]->SetParam(name+4, val);
+      }
+    }
+    if (!strcmp(name, "silent")) {
+      this->SetParam("bst:silent", val);
+    }
+    tparam.SetParam(name, val);
+    if (trees.size() == 0) mparam.SetParam(name, val);
+  }
+  virtual void LoadModel(utils::IStream &fi) {
+    this->Clear();
+    utils::Check(fi.Read(&mparam, sizeof(ModelParam)) != 0,
+                 "GBTree: invalid model file");
+    trees.resize(mparam.num_trees);
+    for (size_t i = 0; i < trees.size(); ++i) {
+      trees[i] = new tree::RegTree();
+      trees[i]->LoadModel(fi);
+    }
+    tree_info.resize(mparam.num_trees);
+    if (mparam.num_trees != 0) {
+      utils::Check(fi.Read(&tree_info[0], sizeof(int) * mparam.num_trees) != 0,
+                   "GBTree: invalid model file");
+    }
+    if (mparam.num_pbuffer != 0) {
+      pred_buffer.resize(mparam.PredBufferSize());
+      pred_counter.resize(mparam.PredBufferSize());
+      utils::Check(fi.Read(&pred_buffer[0], pred_buffer.size() * sizeof(float)) != 0,
+                   "GBTree: invalid model file");
+      utils::Check(fi.Read(&pred_counter[0], pred_counter.size() * sizeof(unsigned)) != 0,
+                   "GBTree: invalid model file");
+    }
+  }
+  virtual void SaveModel(utils::IStream &fo) const {
+    utils::Assert(mparam.num_trees == static_cast<int>(trees.size()), "GBTree");
+    fo.Write(&mparam, sizeof(ModelParam));
+    for (size_t i = 0; i < trees.size(); ++i) {
+      trees[i]->SaveModel(fo);
+    }
+    if (tree_info.size() != 0) {
+      fo.Write(&tree_info[0], sizeof(int) * tree_info.size());
+    }
+    if (mparam.num_pbuffer != 0) {
+      fo.Write(&pred_buffer[0], pred_buffer.size() * sizeof(float));
+      fo.Write(&pred_counter[0], pred_counter.size() * sizeof(unsigned));
+    }
+  }
+  // initialize the predic buffer
+  virtual void InitModel(void) {
+    pred_buffer.clear(); pred_counter.clear();
+    pred_buffer.resize(mparam.PredBufferSize(), 0.0f);
+    pred_counter.resize(mparam.PredBufferSize(), 0);
+    utils::Assert(mparam.num_trees == 0, "GBTree: model already initialized");
+    utils::Assert(trees.size() == 0, "GBTree: model already initialized");
+  }
+  virtual void DoBoost(const std::vector<bst_gpair> &gpair,
+                       FMatrix &fmat,
+                       const std::vector<unsigned> &root_index) {
+    if (mparam.num_output_group == 1) {
+      this->BoostNewTrees(gpair, fmat, root_index, 0);
+    } else {
+      const int ngroup = mparam.num_output_group;
+      utils::Check(gpair.size() % ngroup == 0,
+                   "must have exactly ngroup*nrow gpairs");
+      std::vector<bst_gpair> tmp(gpair.size()/ngroup);
+      for (int gid = 0; gid < ngroup; ++gid) {
+        #pragma omp parallel for schedule(static)
+        for (size_t i = 0; i < tmp.size(); ++i) {
+          tmp[i] = gpair[i * ngroup + gid];
+        }
+        this->BoostNewTrees(tmp, fmat, root_index, gid);
+      }
+    }
+  }
+  virtual void Predict(const FMatrix &fmat,
+                       int64_t buffer_offset,
+                       const std::vector<unsigned> &root_index,
+                       std::vector<float> *out_preds) {
+    int nthread;
+    #pragma omp parallel
+    {
+      nthread = omp_get_num_threads();
+    }
+    this->InitThreadTemp(nthread);
+    std::vector<float> &preds = *out_preds;
+    preds.resize(0);
+    // start collecting the prediction
+    utils::IIterator<SparseBatch> *iter = fmat.RowIterator();
+    iter->BeforeFirst();
+    while (iter->Next()) {
+      const SparseBatch &batch = iter->Value();
+      utils::Assert(batch.base_rowid * mparam.num_output_group == preds.size(),
+                    "base_rowid is not set correctly");
+      // output convention: nrow * k, where nrow is number of rows
+      // k is number of group
+      preds.resize(preds.size() + batch.size * mparam.num_output_group);
+      // parallel over local batch
+      const unsigned nsize = static_cast<unsigned>(batch.size);
+      #pragma omp parallel for schedule(static)
+      for (unsigned i = 0; i < nsize; ++i) {
+        const int tid = omp_get_thread_num();
+        std::vector<float> &feats = thread_temp[tid];
+        const size_t ridx = batch.base_rowid + i;
+        const unsigned root_idx = root_index.size() == 0 ? 0 : root_index[ridx];
+        // loop over output groups
+        for (int gid = 0; gid < mparam.num_output_group; ++gid) {
+          preds[ridx * mparam.num_output_group + gid] =
+              this->Pred(batch[i],
+                         buffer_offset < 0 ? -1 : buffer_offset+ridx,
+                         gid, root_idx, &feats);
+        }
+      }
+    }
+  }
+
+ protected:
+  // clear the model
+  inline void Clear(void) {
+    for (size_t i = 0; i < trees.size(); ++i) {
+      delete trees[i];
+    }
+    trees.clear();
+    pred_buffer.clear();
+    pred_counter.clear();
+  }
+  // initialize updater before using them
+  inline void InitUpdater(void) {
+    if (tparam.updater_initialized != 0) return;
+    for (size_t i = 0; i < updaters.size(); ++i) {
+      delete updaters[i];
+    }
+    updaters.clear();
+    std::string tval = tparam.updater_seq;
+    char *saveptr, *pstr;
+    pstr = strtok_r(&tval[0], ",", &saveptr);
+    while (pstr != NULL) {
+      updaters.push_back(tree::CreateUpdater<FMatrix>(pstr));
+      for (size_t j = 0; j < cfg.size(); ++j) {
+        // set parameters
+        updaters.back()->SetParam(cfg[j].first.c_str(), cfg[j].second.c_str());
+      }
+      pstr = strtok_r(NULL, ",", &saveptr);
+    }
+    tparam.updater_initialized = 1;
+  }
+  // do group specific group
+  inline void BoostNewTrees(const std::vector<bst_gpair> &gpair,
+                            FMatrix &fmat,
+                            const std::vector<unsigned> &root_index,
+                            int bst_group) {
+    this->InitUpdater();
+    // create the trees
+    std::vector<tree::RegTree *> new_trees;
+    for (int i = 0; i < tparam.num_parallel_tree; ++i) {
+      new_trees.push_back(new tree::RegTree());
+      for (size_t j = 0; j < cfg.size(); ++j) {
+        new_trees.back()->param.SetParam(cfg[j].first.c_str(), cfg[j].second.c_str());
+      }
+      new_trees.back()->InitModel();
+    }
+    // update the trees
+    for (size_t i = 0; i < updaters.size(); ++i) {
+      updaters[i]->Update(gpair, fmat, root_index, new_trees);
+    }
+    // push back to model
+    for (size_t i = 0; i < new_trees.size(); ++i) {
+      trees.push_back(new_trees[i]);
+      tree_info.push_back(bst_group);
+    }
+    mparam.num_trees += tparam.num_parallel_tree;
+  }
+  // make a prediction for a single instance
+  inline float Pred(const SparseBatch::Inst &inst,
+                    int64_t buffer_index,
+                    int bst_group,
+                    unsigned root_index,
+                    std::vector<float> *p_feats) {
+    size_t itop = 0;
+    float  psum = 0.0f;
+    const int bid = mparam.BufferOffset(buffer_index, bst_group);
+    // load buffered results if any
+    if (bid >= 0) {
+      itop = pred_counter[bid];
+      psum = pred_buffer[bid];
+    }
+    if (itop != trees.size()) {
+      FillThreadTemp(inst, p_feats);
+      for (size_t i = itop; i < trees.size(); ++i) {
+        if (tree_info[i] == bst_group) {
+          psum += trees[i]->Predict(*p_feats, root_index);
+        }
+      }
+      DropThreadTemp(inst, p_feats);
+    }
+    // updated the buffered results
+    if (bid >= 0) {
+      pred_counter[bid] = static_cast<unsigned>(trees.size());
+      pred_buffer[bid] = psum;
+    }
+    return psum;
+  }
+  // initialize thread local space for prediction
+  inline void InitThreadTemp(int nthread) {
+    thread_temp.resize(nthread);
+    for (size_t i = 0; i < thread_temp.size(); ++i) {
+      thread_temp[i].resize(mparam.num_feature);
+      std::fill(thread_temp[i].begin(), thread_temp[i].end(), NAN);
+    }
+  }
+  // fill in a thread local dense vector using a sparse instance
+  inline static void FillThreadTemp(const SparseBatch::Inst &inst,
+                                    std::vector<float> *p_feats) {
+    std::vector<float> &feats = *p_feats;
+    for (bst_uint i = 0; i < inst.length; ++i) {
+      feats[inst[i].findex] = inst[i].fvalue;
+    }
+  }
+  // clear up a thread local dense vector
+  inline static void DropThreadTemp(const SparseBatch::Inst &inst,
+                                    std::vector<float> *p_feats) {
+    std::vector<float> &feats = *p_feats;
+    for (bst_uint i = 0; i < inst.length; ++i) {
+      feats[inst[i].findex] = NAN;
+    }
+  }
+  // --- data structure ---
+  /*! \brief training parameters */
+  struct TrainParam {
+    /*! \brief number of threads */
+    int nthread;
+    /*!
+     * \brief number of parallel trees constructed each iteration
+     *  use this option to support boosted random forest
+     */
+    int num_parallel_tree;
+    /*! \brief whether updater is already initialized */
+    int updater_initialized;
+    /*! \brief tree updater sequence */
+    std::string updater_seq;
+    // construction
+    TrainParam(void) {
+      nthread = 0;
+      updater_seq = "grow_colmaker,prune";
+      num_parallel_tree = 1;
+      updater_initialized = 0;
+    }
+    inline void SetParam(const char *name, const char *val){
+      if (!strcmp(name, "updater") &&
+          strcmp(updater_seq.c_str(), val) != 0) {
+        updater_seq = val;
+        updater_initialized = 0;
+      }
+      if (!strcmp(name, "nthread")) {
+        omp_set_num_threads(nthread);
+        nthread = atoi(val);
+      }
+      if (!strcmp(name, "num_parallel_tree")) {
+        num_parallel_tree = atoi(val);
+      }
+    }
+  };
+  /*! \brief model parameters */
+  struct ModelParam {
+    /*! \brief number of trees */
+    int num_trees;
+    /*! \brief number of root: default 0, means single tree */
+    int num_roots;
+    /*! \brief number of features to be used by trees */
+    int num_feature;
+    /*! \brief size of predicton buffer allocated used for buffering */
+    int64_t num_pbuffer;
+    /*! 
+     * \brief how many output group a single instance can produce
+     *  this affects the behavior of number of output we have:
+     *    suppose we have n instance and k group, output will be k*n 
+     */
+    int num_output_group;
+    /*! \brief reserved parameters */
+    int reserved[32];
+    /*! \brief constructor */
+    ModelParam(void) {
+      num_trees = 0;
+      num_roots = num_feature = 0;
+      num_pbuffer = 0;
+      num_output_group = 1;
+      memset(reserved, 0, sizeof(reserved));
+    }
+    /*!
+     * \brief set parameters from outside
+     * \param name name of the parameter
+     * \param val  value of the parameter
+     */
+    inline void SetParam(const char *name, const char *val) {
+      if (!strcmp("num_pbuffer", name)) num_pbuffer = atol(val);
+      if (!strcmp("num_output_group", name)) num_output_group = atol(val);
+      if (!strcmp("bst:num_roots", name)) num_roots = atoi(val);
+      if (!strcmp("bst:num_feature", name)) num_feature = atoi(val);
+    }
+    /*! \return size of prediction buffer actually needed */
+    inline size_t PredBufferSize(void) const {
+      return num_output_group * num_pbuffer;
+    }
+    /*! 
+     * \brief get the buffer offset given a buffer index and group id  
+     * \return calculated buffer offset
+     */
+    inline size_t BufferOffset(int64_t buffer_index, int bst_group) const {
+      if (buffer_index < 0) return -1;
+      utils::Check(buffer_index < num_pbuffer, "buffer_index exceed num_pbuffer");
+      return buffer_index + num_pbuffer * bst_group;
+    }
+  };
+  // training parameter
+  TrainParam tparam;
+  // model parameter
+  ModelParam mparam;
+  /*! \brief vector of trees stored in the model */
+  std::vector<tree::RegTree*> trees;
+  /*! \brief some information indicator of the tree, reserved */
+  std::vector<int> tree_info;
+  /*! \brief prediction buffer */
+  std::vector<float>  pred_buffer;
+  /*! \brief prediction buffer counter, remember the prediction */
+  std::vector<unsigned> pred_counter;
+  // ----training fields----
+  // configurations for tree
+  std::vector< std::pair<std::string, std::string> > cfg;
+  // temporal storage for per thread
+  std::vector< std::vector<float> > thread_temp;
+  // the updaters that can be applied to each of tree
+  std::vector< tree::IUpdater<FMatrix>* > updaters;
+};
+
+}  // namespace gbm
+}  // namespace xgboost
+#endif  // XGBOOST_GBM_GBTREE_INL_HPP_
--- a/src/learner/dmatrix.h
+++ b/src/learner/dmatrix.h
@@ -0,0 +1,84 @@
+#ifndef XGBOOST_LEARNER_DMATRIX_H_
+#define XGBOOST_LEARNER_DMATRIX_H_
+/*!
+ * \file dmatrix.h
+ * \brief meta data and template data structure 
+ *        used for regression/classification/ranking
+ * \author Tianqi Chen
+ */
+#include "../data.h"
+
+namespace xgboost {
+namespace learner {
+/*! 
+ * \brief meta information needed in training, including label, weight
+ */
+struct MetaInfo {
+  /*! \brief label of each instance */
+  std::vector<float> labels;
+  /*!
+   * \brief the index of begin and end of a group
+   * needed when the learning task is ranking
+   */
+  std::vector<bst_uint> group_ptr;
+  /*! \brief weights of each instance, optional */
+  std::vector<float> weights;
+  /*!
+   * \brief specified root index of each instance,
+   *  can be used for multi task setting
+   */
+  std::vector<unsigned> root_index;
+  /*! \brief get weight of each instances */
+  inline float GetWeight(size_t i) const {
+    if(weights.size() != 0) {
+      return weights[i];
+    } else {
+      return 1.0f;
+    }
+  }
+  /*! \brief get root index of i-th instance */
+  inline float GetRoot(size_t i) const {
+    if(root_index.size() != 0) {
+      return static_cast<float>(root_index[i]);
+    } else {
+      return 0;
+    }
+  }
+  inline void SaveBinary(utils::IStream &fo) {
+    fo.Write(labels);
+    fo.Write(group_ptr);
+    fo.Write(weights);
+    fo.Write(root_index);
+  }
+  inline void LoadBinary(utils::IStream &fi) {
+    utils::Check(fi.Read(&labels), "MetaInfo: invalid format");
+    utils::Check(fi.Read(&group_ptr), "MetaInfo: invalid format");
+    utils::Check(fi.Read(&weights), "MetaInfo: invalid format");
+    utils::Check(fi.Read(&root_index), "MetaInfo: invalid format");
+  }
+};
+
+/*! 
+ * \brief data object used for learning,
+ * \tparam FMatrix type of feature data source
+ */
+template<typename FMatrix>
+struct DMatrix {
+  /*! \brief meta information about the dataset */
+  MetaInfo info;
+  /*! \brief number of rows in the DMatrix */
+  size_t num_row;
+  /*! \brief feature matrix about data content */
+  FMatrix fmat;
+  /*! 
+   * \brief cache pointer to verify if the data structure is cached in some learner
+   *  used to verify if DMatrix is cached
+   */
+  void *cache_learner_ptr_;
+  /*! \brief default constructor */
+  DMatrix(void) : cache_learner_ptr_(NULL) {}
+};
+
+}  // namespace learner
+}  // namespace xgboost
+#endif  // XGBOOST_LEARNER_DMATRIX_H_
--- a/src/learner/evaluation-inl.hpp
+++ b/src/learner/evaluation-inl.hpp
@@ -0,0 +1,346 @@
+#ifndef XGBOOST_LEARNER_EVALUATION_INL_HPP_
+#define XGBOOST_LEARNER_EVALUATION_INL_HPP_
+/*!
+* \file xgboost_evaluation-inl.hpp
+* \brief evaluation metrics for regression and classification and rank
+* \author Kailong Chen, Tianqi Chen
+*/
+#include <vector>
+#include <utility>
+#include <string>
+#include <climits>
+#include <algorithm>
+#include "./evaluation.h"
+#include "./helper_utils.h"
+
+namespace xgboost {
+namespace learner {
+/*! 
+ * \brief base class of elementwise evaluation 
+ * \tparam Derived the name of subclass
+ */
+template<typename Derived>
+struct EvalEWiseBase : public IEvaluator {
+  virtual float Eval(const std::vector<float> &preds,
+                     const MetaInfo &info) const {
+    utils::Check(preds.size() == info.labels.size(),
+                 "label and prediction size not match");
+    const unsigned ndata = static_cast<unsigned>(preds.size());
+    float sum = 0.0, wsum = 0.0;
+    #pragma omp parallel for reduction(+:sum, wsum) schedule(static)
+    for (unsigned i = 0; i < ndata; ++i) {
+      const float wt = info.GetWeight(i);
+      sum += Derived::EvalRow(info.labels[i], preds[i]) * wt;
+      wsum += wt;
+    }
+    return Derived::GetFinal(sum, wsum);
+  }
+  /*! 
+   * \brief to be implemented by subclass, 
+   *   get evaluation result from one row 
+   * \param label label of current instance
+   * \param pred prediction value of current instance
+   * \param weight weight of current instance
+   */
+  inline static float EvalRow(float label, float pred);
+  /*! 
+   * \brief to be overide by subclas, final trasnformation 
+   * \param esum the sum statistics returned by EvalRow
+   * \param wsum sum of weight
+   */
+  inline static float GetFinal(float esum, float wsum) {
+    return esum / wsum;
+  }
+};
+
+/*! \brief RMSE */
+struct EvalRMSE : public EvalEWiseBase<EvalRMSE> {
+  virtual const char *Name(void) const {
+    return "rmse";
+  }
+  inline static float EvalRow(float label, float pred) {
+    float diff = label - pred;
+    return diff * diff;
+  }
+  inline static float GetFinal(float esum, float wsum) {
+    return std::sqrt(esum / wsum);
+  }
+};
+
+/*! \brief logloss */
+struct EvalLogLoss : public EvalEWiseBase<EvalLogLoss> {
+  virtual const char *Name(void) const {
+    return "logloss";
+  }
+  inline static float EvalRow(float y, float py) {
+    return - y * std::log(py) - (1.0f - y) * std::log(1 - py);
+  }
+};
+
+/*! \brief error */
+struct EvalError : public EvalEWiseBase<EvalError> {
+  virtual const char *Name(void) const {
+    return "error";
+  }
+  inline static float EvalRow(float label, float pred) {
+    // assume label is in [0,1]
+    return pred > 0.5f ? 1.0f - label : label;
+  }
+};
+
+/*! \brief match error */
+struct EvalMatchError : public EvalEWiseBase<EvalMatchError> {
+  virtual const char *Name(void) const {
+    return "merror";
+  }
+  inline static float EvalRow(float label, float pred) {
+    return static_cast<int>(pred) != static_cast<int>(label);
+  }
+};
+
+/*! \brief AMS: also records best threshold */
+struct EvalAMS : public IEvaluator {
+ public:
+  explicit EvalAMS(const char *name) {
+    name_ = name;
+    // note: ams@0 will automatically select which ratio to go
+    utils::Check(sscanf(name, "ams@%f", &ratio_) == 1, "invalid ams format");
+  }
+  virtual float Eval(const std::vector<float> &preds,
+                     const MetaInfo &info) const {
+    const unsigned ndata = static_cast<unsigned>(preds.size());
+    utils::Check(info.weights.size() == ndata, "we need weight to evaluate ams");
+    std::vector< std::pair<float, unsigned> > rec(ndata);
+
+    #pragma omp parallel for schedule(static)
+    for (unsigned i = 0; i < ndata; ++i) {
+      rec[i] = std::make_pair(preds[i], i);
+    }
+    std::sort(rec.begin(), rec.end(), CmpFirst);
+    unsigned ntop = static_cast<unsigned>(ratio_ * ndata);
+    if (ntop == 0) ntop = ndata;
+    const double br = 10.0;
+    unsigned thresindex = 0;
+    double s_tp = 0.0, b_fp = 0.0, tams = 0.0;
+    for (unsigned i = 0; i < ndata-1 && i < ntop; ++i) {
+      const unsigned ridx = rec[i].second;
+      const float wt = info.weights[ridx];
+      if (info.labels[ridx] > 0.5f) {
+        s_tp += wt;
+      } else {
+        b_fp += wt;
+      }
+      if (rec[i].first != rec[i+1].first) {
+        double ams = sqrtf(2*((s_tp+b_fp+br) * log(1.0 + s_tp/(b_fp+br)) - s_tp));
+        if (tams < ams) {
+          thresindex = i;
+          tams = ams;
+        }
+      }
+    }
+    if (ntop == ndata) {
+      fprintf(stderr, "\tams-ratio=%g", static_cast<float>(thresindex) / ndata);
+      return tams;
+    } else {
+      return sqrtf(2*((s_tp+b_fp+br) * log(1.0 + s_tp/(b_fp+br)) - s_tp));
+    }
+  }
+  virtual const char *Name(void) const {
+    return name_.c_str();
+  }
+
+ private:
+  std::string name_;
+  float ratio_;
+};
+
+/*! \brief Area under curve, for both classification and rank */
+struct EvalAuc : public IEvaluator {
+  virtual float Eval(const std::vector<float> &preds,
+                     const MetaInfo &info) const {
+    utils::Check(preds.size() == info.labels.size(), "label size predict size not match");
+    std::vector<unsigned> tgptr(2, 0); tgptr[1] = preds.size();
+    const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
+    utils::Check(gptr.back() == preds.size(),
+                 "EvalAuc: group structure must match number of prediction");
+    const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
+    // sum statictis
+    double sum_auc = 0.0f;
+    #pragma omp parallel reduction(+:sum_auc)
+    {
+      // each thread takes a local rec
+      std::vector< std::pair<float, unsigned> > rec;
+      #pragma omp for schedule(static)
+      for (unsigned k = 0; k < ngroup; ++k) {
+        rec.clear();
+        for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
+          rec.push_back(std::make_pair(preds[j], j));
+        }
+        std::sort(rec.begin(), rec.end(), CmpFirst);
+        // calculate AUC
+        double sum_pospair = 0.0;
+        double sum_npos = 0.0, sum_nneg = 0.0, buf_pos = 0.0, buf_neg = 0.0;
+        for (size_t j = 0; j < rec.size(); ++j) {
+          const float wt = info.GetWeight(rec[j].second);
+          const float ctr = info.labels[rec[j].second];
+          // keep bucketing predictions in same bucket
+          if (j != 0 && rec[j].first != rec[j - 1].first) {
+            sum_pospair += buf_neg * (sum_npos + buf_pos *0.5);
+            sum_npos += buf_pos; sum_nneg += buf_neg;
+            buf_neg = buf_pos = 0.0f;
+          }
+          buf_pos += ctr * wt; buf_neg += (1.0f - ctr) * wt;
+        }
+        sum_pospair += buf_neg * (sum_npos + buf_pos *0.5);
+        sum_npos += buf_pos; sum_nneg += buf_neg;
+        // check weird conditions
+        utils::Check(sum_npos > 0.0 && sum_nneg > 0.0,
+                     "AUC: the dataset only contains pos or neg samples");
+        // this is the AUC
+        sum_auc += sum_pospair / (sum_npos*sum_nneg);
+      }
+    }
+    // return average AUC over list
+    return static_cast<float>(sum_auc) / ngroup;
+  }
+  virtual const char *Name(void) const {
+    return "auc";
+  }
+};
+
+/*! \brief Evaluate rank list */
+struct EvalRankList : public IEvaluator {
+ public:
+  virtual float Eval(const std::vector<float> &preds,
+                     const MetaInfo &info) const {
+    utils::Check(preds.size() == info.labels.size(),
+                  "label size predict size not match");
+    const std::vector<unsigned> &gptr = info.group_ptr;
+    utils::Assert(gptr.size() != 0, "must specify group when constructing rank file");
+    utils::Assert(gptr.back() == preds.size(),
+                   "EvalRanklist: group structure must match number of prediction");
+    const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
+    // sum statistics
+    double sum_metric = 0.0f;
+    #pragma omp parallel reduction(+:sum_metric)
+    {
+      // each thread takes a local rec
+      std::vector< std::pair<float, unsigned> > rec;
+      #pragma omp for schedule(static)
+      for (unsigned k = 0; k < ngroup; ++k) {
+        rec.clear();
+        for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
+          rec.push_back(std::make_pair(preds[j], static_cast<int>(info.labels[j])));
+        }
+        sum_metric += this->EvalMetric(rec);
+      }
+    }
+    return static_cast<float>(sum_metric) / ngroup;
+  }
+  virtual const char *Name(void) const {
+    return name_.c_str();
+  }
+
+ protected:
+  explicit EvalRankList(const char *name) {
+    name_ = name;
+    minus_ = false;
+    if (sscanf(name, "%*[^@]@%u[-]?", &topn_) != 1) {
+      topn_ = UINT_MAX;
+    }    
+    if (name[strlen(name) - 1] == '-') {
+      minus_ = true;
+    }
+  }
+  /*! \return evaluation metric, given the pair_sort record, (pred,label) */
+  virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &pair_sort) const = 0;
+
+ protected:
+  unsigned topn_;
+  std::string name_;
+  bool minus_;
+};
+
+/*! \brief Precison at N, for both classification and rank */
+struct EvalPrecision : public EvalRankList{
+ public:
+  explicit EvalPrecision(const char *name) : EvalRankList(name) {}
+
+ protected:
+  virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &rec) const {
+    // calculate Preicsion
+    std::sort(rec.begin(), rec.end(), CmpFirst);
+    unsigned nhit = 0;
+    for (size_t j = 0; j < rec.size() && j < this->topn_; ++j) {
+      nhit += (rec[j].second != 0);
+    }
+    return static_cast<float>(nhit) / topn_;
+  }
+};
+
+/*! \brief NDCG */
+struct EvalNDCG : public EvalRankList{
+ public:
+  explicit EvalNDCG(const char *name) : EvalRankList(name) {}
+
+ protected:
+  inline float CalcDCG(const std::vector< std::pair<float, unsigned> > &rec) const {
+    double sumdcg = 0.0;
+    for (size_t i = 0; i < rec.size() && i < this->topn_; ++i) {
+      const unsigned rel = rec[i].second;
+      if (rel != 0) { 
+        sumdcg += ((1 << rel) - 1) / logf(i + 2);
+      }
+    }
+    return static_cast<float>(sumdcg);
+  }
+  virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &rec) const {
+    std::stable_sort(rec.begin(), rec.end(), CmpFirst);
+    float dcg = this->CalcDCG(rec);
+    std::stable_sort(rec.begin(), rec.end(), CmpSecond);
+    float idcg = this->CalcDCG(rec);
+    if (idcg == 0.0f) {
+      if (minus_) {
+        return 0.0f;
+      } else {
+        return 1.0f;
+      }
+    }
+    return dcg/idcg;
+  }
+};
+
+/*! \brief Precison at N, for both classification and rank */
+struct EvalMAP : public EvalRankList {
+ public:
+  explicit EvalMAP(const char *name) : EvalRankList(name) {}
+
+ protected:
+  virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &rec) const {
+    std::sort(rec.begin(), rec.end(), CmpFirst);
+    unsigned nhits = 0;
+    double sumap = 0.0;
+    for (size_t i = 0; i < rec.size(); ++i) {
+      if (rec[i].second != 0) {
+        nhits += 1;
+        if (i < this->topn_) {
+          sumap += static_cast<float>(nhits) / (i+1);
+        }
+      }
+    }
+    if (nhits != 0) {
+      sumap /= nhits;
+      return static_cast<float>(sumap);
+    } else {
+      if (minus_) {
+        return 0.0f;
+      } else {
+        return 1.0f;
+      }
+    }
+  }
+};
+
+}  // namespace learner
+}  // namespace xgboost
+#endif  // XGBOOST_LEARNER_EVALUATION_INL_HPP_
--- a/src/learner/evaluation.h
+++ b/src/learner/evaluation.h
@@ -0,0 +1,82 @@
+#ifndef XGBOOST_LEARNER_EVALUATION_H_
+#define XGBOOST_LEARNER_EVALUATION_H_
+/*!
+ * \file evaluation.h
+ * \brief interface of evaluation function supported in xgboost
+ * \author Tianqi Chen, Kailong Chen
+ */
+#include <string>
+#include <vector>
+#include "../utils/utils.h"
+
+namespace xgboost {
+namespace learner {
+/*! \brief evaluator that evaluates the loss metrics */
+struct IEvaluator{
+  /*!
+   * \brief evaluate a specific metric
+   * \param preds prediction
+   * \param info information, including label etc.
+   */
+  virtual float Eval(const std::vector<float> &preds,
+                     const MetaInfo &info) const = 0;
+  /*! \return name of metric */
+  virtual const char *Name(void) const = 0;
+  /*! \brief virtual destructor */
+  virtual ~IEvaluator(void) {}
+};
+}  // namespace learner
+}  // namespace xgboost
+
+// include implementations of evaluation functions
+#include "evaluation-inl.hpp"
+// factory function
+namespace xgboost {
+namespace learner {
+inline IEvaluator* CreateEvaluator(const char *name) {
+  if (!strcmp(name, "rmse")) return new EvalRMSE();
+  if (!strcmp(name, "error")) return new EvalError();
+  if (!strcmp(name, "merror")) return new EvalMatchError();
+  if (!strcmp(name, "logloss")) return new EvalLogLoss();
+  if (!strcmp(name, "auc")) return new EvalAuc();
+  if (!strncmp(name, "ams@",4)) return new EvalAMS(name);
+  if (!strncmp(name, "pre@", 4)) return new EvalPrecision(name);
+  if (!strncmp(name, "map", 3)) return new EvalMAP(name);
+  if (!strncmp(name, "ndcg", 3)) return new EvalNDCG(name);
+  utils::Error("unknown evaluation metric type: %s", name);
+  return NULL;
+}
+
+/*! \brief a set of evaluators */
+class EvalSet{
+ public:
+  inline void AddEval(const char *name) {
+    for (size_t i = 0; i < evals_.size(); ++i) {
+      if (!strcmp(name, evals_[i]->Name())) return;
+    }
+    evals_.push_back(CreateEvaluator(name));
+  }
+  ~EvalSet(void) {
+    for (size_t i = 0; i < evals_.size(); ++i) {
+      delete evals_[i];
+    }
+  }
+  inline std::string Eval(const char *evname,
+                          const std::vector<float> &preds,
+                          const MetaInfo &info) const {
+    std::string result = "";
+    for (size_t i = 0; i < evals_.size(); ++i) {
+      float res = evals_[i]->Eval(preds, info);
+      char tmp[1024];
+      snprintf(tmp, sizeof(tmp), "\t%s-%s:%f", evname, evals_[i]->Name(), res);
+      result += tmp;
+    }
+    return result;
+  }
+
+ private:
+  std::vector<const IEvaluator*> evals_;
+};
+}  // namespace learner
+}  // namespace xgboost
+#endif  // XGBOOST_LEARNER_EVALUATION_H_
--- a/src/learner/helper_utils.h
+++ b/src/learner/helper_utils.h
@@ -0,0 +1,50 @@
+#ifndef XGBOOST_LEARNER_HELPER_UTILS_H_
+#define XGBOOST_LEARNER_HELPER_UTILS_H_
+/*!
+ * \file helper_utils.h
+ * \brief useful helper functions
+ * \author Tianqi Chen, Kailong Chen
+ */
+#include <utility>
+#include <vector>
+#include <algorithm>
+namespace xgboost {
+namespace learner {
+// simple helper function to do softmax
+inline static void Softmax(std::vector<float>* p_rec) {
+  std::vector<float> &rec = *p_rec;
+  float wmax = rec[0];
+  for (size_t i = 1; i < rec.size(); ++i) {
+    wmax = std::max(rec[i], wmax);
+  }
+  double wsum = 0.0f;
+  for (size_t i = 0; i < rec.size(); ++i) {
+    rec[i] = std::exp(rec[i]-wmax);
+    wsum += rec[i];
+  }
+  for (size_t i = 0; i < rec.size(); ++i) {
+    rec[i] /= static_cast<float>(wsum);
+  }
+}
+// simple helper function to do softmax
+inline static int FindMaxIndex(const std::vector<float>& rec) {
+  size_t mxid = 0;
+  for (size_t i = 1; i < rec.size(); ++i) {
+    if (rec[i] > rec[mxid] + 1e-6f) {
+      mxid = i;
+    }
+  }
+  return static_cast<int>(mxid);
+}
+
+inline static bool CmpFirst(const std::pair<float, unsigned> &a,
+                            const std::pair<float, unsigned> &b) {
+  return a.first > b.first;
+}
+inline static bool CmpSecond(const std::pair<float, unsigned> &a,
+                             const std::pair<float, unsigned> &b) {
+  return a.second > b.second;
+}
+}  // namespace learner
+}  // namespace xgboost
+#endif  // XGBOOST_LEARNER_HELPER_UTILS_H_
--- a/src/learner/learner-inl.hpp
+++ b/src/learner/learner-inl.hpp
@@ -0,0 +1,296 @@
+#ifndef XGBOOST_LEARNER_LEARNER_INL_HPP_
+#define XGBOOST_LEARNER_LEARNER_INL_HPP_
+/*!
+ * \file learner-inl.hpp
+ * \brief learning algorithm 
+ * \author Tianqi Chen
+ */
+#include <algorithm>
+#include <vector>
+#include <utility>
+#include <string>
+#include "./objective.h"
+#include "./evaluation.h"
+#include "../gbm/gbm.h"
+
+namespace xgboost {
+/*! \brief namespace for learning algorithm */
+namespace learner {
+/*! 
+ * \brief learner that takes do gradient boosting on specific objective functions
+ *  and do training and prediction
+ */
+template<typename FMatrix>
+class BoostLearner {
+ public:
+  BoostLearner(void) {
+    obj_ = NULL;
+    gbm_ = NULL;
+    name_obj_ = "reg:linear";
+    name_gbm_ = "gbtree";
+  }
+  ~BoostLearner(void) {
+    if (obj_ != NULL) delete obj_;
+    if (gbm_ != NULL) delete gbm_;
+  }
+  /*!
+   * \brief add internal cache space for mat, this can speedup prediction for matrix,
+   *        please cache prediction for training and eval data
+   *    warning: if the model is loaded from file from some previous training history
+   *             set cache data must be called with exactly SAME 
+   *             data matrices to continue training otherwise it will cause error
+   * \param mats array of pointers to matrix whose prediction result need to be cached
+   */          
+  inline void SetCacheData(const std::vector<DMatrix<FMatrix>*>& mats) {
+    // estimate feature bound
+    unsigned num_feature = 0;
+    // assign buffer index
+    size_t buffer_size = 0;
+    utils::Assert(cache_.size() == 0, "can only call cache data once");
+    for (size_t i = 0; i < mats.size(); ++i) {
+      bool dupilicate = false;
+      for (size_t j = 0; j < i; ++j) {
+        if (mats[i] == mats[j]) dupilicate = true;
+      }
+      if (dupilicate) continue;
+      // set mats[i]'s cache learner pointer to this
+      mats[i]->cache_learner_ptr_ = this;
+      cache_.push_back(CacheEntry(mats[i], buffer_size, mats[i]->num_row));
+      buffer_size += mats[i]->num_row;
+      num_feature = std::max(num_feature, static_cast<unsigned>(mats[i]->num_col));
+    }
+    char str_temp[25];
+    if (num_feature > mparam.num_feature) {
+      snprintf(str_temp, sizeof(str_temp), "%u", num_feature);
+      this->SetParam("bst:num_feature", str_temp);
+    }
+    snprintf(str_temp, sizeof(str_temp), "%lu", buffer_size);
+    this->SetParam("num_pbuffer", str_temp);
+    if (!silent) {
+      printf("buffer_size=%ld\n", buffer_size);
+    }
+  }
+  /*!
+   * \brief set parameters from outside
+   * \param name name of the parameter
+   * \param val  value of the parameter
+   */
+  inline void SetParam(const char *name, const char *val) {
+    if (!strcmp(name, "silent")) silent = atoi(val);
+    if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val);
+    if (gbm_ == NULL) {
+      if (!strcmp(name, "objective")) name_obj_ = val;
+      if (!strcmp(name, "booster")) name_gbm_ = val;
+      mparam.SetParam(name, val);
+    }
+    cfg_.push_back(std::make_pair(std::string(name), std::string(val)));
+  }
+  /*!
+   * \brief initialize the model
+   */
+  inline void InitModel(void) {
+    this->InitObjGBM();
+    // adapt the base score
+    mparam.base_score = obj_->ProbToMargin(mparam.base_score);
+    gbm_->InitModel();
+  }
+  /*!
+   * \brief load model from stream
+   * \param fi input stream
+   */
+  inline void LoadModel(utils::IStream &fi) {
+    utils::Check(fi.Read(&mparam, sizeof(ModelParam)) != 0,
+                 "BoostLearner: wrong model format");
+    utils::Check(fi.Read(&name_obj_), "BoostLearner: wrong model format");
+    utils::Check(fi.Read(&name_gbm_), "BoostLearner: wrong model format");
+    // delete existing gbm if any
+    if (obj_ != NULL) delete obj_;
+    if (gbm_ != NULL) delete gbm_;
+    this->InitObjGBM();
+    gbm_->LoadModel(fi);
+  }
+  /*!
+   * \brief load model from file
+   * \param fname file name
+   */
+  inline void LoadModel(const char *fname) {
+    utils::FileStream fi(utils::FopenCheck(fname, "rb"));
+    this->LoadModel(fi);
+    fi.Close();
+  }
+  inline void SaveModel(utils::IStream &fo) const {
+    fo.Write(&mparam, sizeof(ModelParam));
+    fo.Write(&name_obj_);
+    fo.Write(&name_gbm_);
+    gbm_->SaveModel(fo);
+  }
+  /*!
+   * \brief save model into file
+   * \param fname file name
+   */
+  inline void SaveModel(const char *fname) const {
+    utils::FileStream fo(utils::FopenCheck(fname, "wb"));
+    this->SaveModel(fo);
+    fo.Close();
+  }  
+  /*!
+   * \brief update the model for one iteration
+   * \param iter current iteration number
+   * \param p_train pointer to the data matrix
+   */
+  inline void UpdateOneIter(int iter, DMatrix<FMatrix> *p_train) {
+    this->PredictRaw(preds_, *p_train);
+    obj_->GetGradient(preds_, p_train->info, iter, &gpair_);
+    gbm_->DoBoost(gpair_, p_train->fmat, p_train->info.root_index);
+  }
+  /*!
+   * \brief evaluate the model for specific iteration
+   * \param iter iteration number
+   * \param evals datas i want to evaluate
+   * \param evname name of each dataset
+   * \return a string corresponding to the evaluation result
+   */
+  inline std::string EvalOneIter(int iter,
+                                 const std::vector<const DMatrix<FMatrix>*> &evals,
+                                 const std::vector<std::string> &evname) {
+    std::string res;
+    char tmp[256];
+    snprintf(tmp, sizeof(tmp), "[%d]", iter);
+    res = tmp;
+    for (size_t i = 0; i < evals.size(); ++i) {
+      this->PredictRaw(*evals[i], &preds_);
+      obj_->EvalTransform(&preds_);
+      res += evaluator_.Eval(evname[i].c_str(), preds_, evals[i]->info);
+    }
+    return res;
+  }
+  /*!
+   * \brief simple evaluation function, with a specified metric
+   * \param data input data
+   * \param metric name of metric
+   * \return a pair of <evaluation name, result>
+   */
+  std::pair<std::string, float> Evaluate(const DMatrix<FMatrix> &data, std::string metric) {
+    if (metric == "auto") metric = obj_->DefaultEvalMetric();
+    IEvaluator *ev = CreateEvaluator(metric.c_str());
+    this->PredictRaw(data, &preds_);
+    obj_->EvalTransform(&preds_);
+    float res = ev->Eval(preds_, data.info);
+    delete ev;
+    return std::make_pair(metric, res);
+  }
+  /*!
+   * \brief get prediction
+   * \param data input data
+   * \param out_preds output vector that stores the prediction
+   */
+  inline void Predict(const DMatrix<FMatrix> &data,
+                      std::vector<float> *out_preds) const {
+    this->PredictRaw(data, out_preds);
+    obj_->PredTransform(out_preds);
+  }
+
+ protected:
+  /*! 
+   * \brief initialize the objective function and GBM, 
+   * if not yet done
+   */
+  inline void InitObjGBM(void) {
+    if (obj_ != NULL) return;
+    utils::Assert(gbm_ == NULL, "GBM and obj should be NULL");
+    obj_ = CreateObjFunction(name_obj_.c_str());
+    gbm_ = gbm::CreateGradBooster<FMatrix>(name_gbm_.c_str());
+    for (size_t i = 0; i < cfg_.size(); ++i) {
+      obj_->SetParam(cfg_[i].first.c_str(), cfg_[i].second.c_str());
+      gbm_->SetParam(cfg_[i].first.c_str(), cfg_[i].second.c_str());
+    }
+    evaluator_.AddEval(obj_->DefaultEvalMetric());
+  }
+  /*! 
+   * \brief get un-transformed prediction
+   * \param data training data matrix
+   * \param out_preds output vector that stores the prediction
+   */
+  inline void PredictRaw(const DMatrix<FMatrix> &data,
+                         std::vector<float> *out_preds) {
+    gbm_->Predict(data.fmat, this->FindBufferOffset(data),
+                  data.info, out_preds);
+  }
+
+  /*! \brief training parameter for regression */
+  struct ModelParam{
+    /* \brief global bias */
+    float base_score;
+    /* \brief number of features  */
+    unsigned num_feature;
+    /* \brief number of class, if it is multi-class classification  */
+    int num_class;
+    /*! \brief reserved field */
+    int reserved[32];
+    /*! \brief constructor */
+    ModelParam(void) {
+      base_score = 0.5f;
+      num_feature = 0;
+      num_class = 0;
+      memset(reserved, 0, sizeof(reserved));
+    }
+    /*!
+     * \brief set parameters from outside
+     * \param name name of the parameter
+     * \param val value of the parameter
+     */
+    inline void SetParam(const char *name, const char *val) {
+      if (!strcmp("base_score", name)) base_score = static_cast<float>(atof(val));
+      if (!strcmp("num_class", name)) num_class = atoi(val);
+      if (!strcmp("bst:num_feature", name)) num_feature = atoi(val);
+    }
+  };
+  // data fields
+  // silent during training
+  int silent;
+  // evaluation set
+  EvalSet evaluator_;
+  // model parameter
+  ModelParam   mparam;
+  // gbm model that back everything
+  gbm::IGradBooster<FMatrix> *gbm_;
+  // name of gbm model used for training
+  std::string name_gbm_;
+  // objective fnction
+  IObjFunction *obj_;
+  // name of objective function
+  std::string name_obj_;
+  // configurations
+  std::vector< std::pair<std::string, std::string> > cfg_;
+  // temporal storages for prediciton
+  std::vector<float> preds_;
+  // gradient pairs
+  std::vector<bst_gpair> gpair_;
+
+ private:
+  // cache entry object that helps handle feature caching
+  struct CacheEntry {
+    const DMatrix<FMatrix> *mat_;
+    size_t buffer_offset_;
+    size_t num_row_;
+    CacheEntry(const DMatrix<FMatrix> *mat, size_t buffer_offset, size_t num_row)
+        :mat_(mat), buffer_offset_(buffer_offset), num_row_(num_row) {}
+  };
+  // find internal bufer offset for certain matrix, if not exist, return -1
+  inline int64_t FindBufferOffset(const DMatrix<FMatrix> &mat) const {
+    for (size_t i = 0; i < cache_.size(); ++i) {
+      if (cache_[i].mat_ == &mat && mat.cache_learner_ptr_ == this) {
+        if (cache_[i].num_row_ == mat.num_row) {
+          return cache_[i].buffer_offset_;
+        }
+      }
+    }
+    return -1;
+  }
+  // data structure field
+  /*! \brief the entries indicates that we have internal prediction cache */
+  std::vector<CacheEntry> cache_;
+};
+}  // namespace learner
+}  // namespace xgboost
+#endif  // XGBOOST_LEARNER_LEARNER_INL_HPP_
--- a/src/learner/objective-inl.hpp
+++ b/src/learner/objective-inl.hpp
@@ -0,0 +1,137 @@
+#ifndef XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
+#define XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
+/*!
+ * \file objective-inl.hpp
+ * \brief objective function implementations
+ * \author Tianqi Chen, Kailong Chen
+ */
+#include <vector>
+#include "./objective.h"
+
+namespace xgboost {
+namespace learner {
+/*! \brief defines functions to calculate some commonly used functions */
+struct LossType {
+  /*! \brief indicate which type we are using */
+  int loss_type;
+  // list of constants
+  static const int kLinearSquare = 0;
+  static const int kLogisticNeglik = 1;
+  static const int kLogisticClassify = 2;
+  static const int kLogisticRaw = 3;
+  /*!
+   * \brief transform the linear sum to prediction
+   * \param x linear sum of boosting ensemble
+   * \return transformed prediction
+   */
+  inline float PredTransform(float x) const {
+    switch (loss_type) {
+      case kLogisticRaw:
+      case kLinearSquare: return x;
+      case kLogisticClassify:
+      case kLogisticNeglik: return 1.0f / (1.0f + expf(-x));
+      default: utils::Error("unknown loss_type"); return 0.0f;
+    }
+  }
+  /*!
+   * \brief calculate first order gradient of loss, given transformed prediction
+   * \param predt transformed prediction
+   * \param label true label
+   * \return first order gradient
+   */
+  inline float FirstOrderGradient(float predt, float label) const {
+    switch (loss_type) {
+      case kLinearSquare: return predt - label;
+      case kLogisticRaw: predt = 1.0f / (1.0f + expf(-predt));
+      case kLogisticClassify:
+      case kLogisticNeglik: return predt - label;
+      default: utils::Error("unknown loss_type"); return 0.0f;
+    }
+  }
+  /*!
+   * \brief calculate second order gradient of loss, given transformed prediction
+   * \param predt transformed prediction
+   * \param label true label
+   * \return second order gradient
+   */
+  inline float SecondOrderGradient(float predt, float label) const {
+    switch (loss_type) {
+      case kLinearSquare: return 1.0f;
+      case kLogisticRaw: predt = 1.0f / (1.0f + expf(-predt));
+      case kLogisticClassify:
+      case kLogisticNeglik: return predt * (1 - predt);
+      default: utils::Error("unknown loss_type"); return 0.0f;
+    }
+  }
+  /*!
+   * \brief transform probability value back to margin
+   */
+  inline float ProbToMargin(float base_score) const {
+    if (loss_type == kLogisticRaw ||
+        loss_type == kLogisticClassify ||
+        loss_type == kLogisticNeglik ) {
+      utils::Check(base_score > 0.0f && base_score < 1.0f,
+                   "base_score must be in (0,1) for logistic loss");
+      base_score = -logf(1.0f / base_score - 1.0f);
+    }
+    return base_score;
+  }
+  /*! \brief get default evaluation metric for the objective */
+  inline const char *DefaultEvalMetric(void) const {
+    if (loss_type == kLogisticClassify) return "error";
+    if (loss_type == kLogisticRaw) return "auc";
+    return "rmse";
+  }
+};
+
+/*! \brief objective function that only need to */
+class RegLossObj : public IObjFunction{
+ public:
+  explicit RegLossObj(int loss_type) {
+    loss.loss_type = loss_type;
+    scale_pos_weight = 1.0f;
+  }
+  virtual ~RegLossObj(void) {}
+  virtual void SetParam(const char *name, const char *val) {
+    if (!strcmp("scale_pos_weight", name)) {
+      scale_pos_weight = static_cast<float>(atof(val));
+    }
+  }
+  virtual void GetGradient(const std::vector<float>& preds,
+                           const MetaInfo &info,
+                           int iter,
+                           std::vector<bst_gpair> *out_gpair) {
+    utils::Check(preds.size() == info.labels.size(),
+                 "labels are not correctly provided");
+    std::vector<bst_gpair> &gpair = *out_gpair;
+    gpair.resize(preds.size());
+    // start calculating gradient
+    const unsigned ndata = static_cast<unsigned>(preds.size());
+    #pragma omp parallel for schedule(static)
+    for (unsigned j = 0; j < ndata; ++j) {
+      float p = loss.PredTransform(preds[j]);
+      float w = info.GetWeight(j);
+      if (info.labels[j] == 1.0f) w *= scale_pos_weight;
+      gpair[j] = bst_gpair(loss.FirstOrderGradient(p, info.labels[j]) * w,
+                           loss.SecondOrderGradient(p, info.labels[j]) * w);
+    }
+  }
+  virtual const char* DefaultEvalMetric(void) {
+    return loss.DefaultEvalMetric();
+  }
+  virtual void PredTransform(std::vector<float> *io_preds) {
+    std::vector<float> &preds = *io_preds;
+    const unsigned ndata = static_cast<unsigned>(preds.size());
+    #pragma omp parallel for schedule(static)
+    for (unsigned j = 0; j < ndata; ++j) {
+      preds[j] = loss.PredTransform(preds[j]);
+    }
+  }
+
+ protected:
+  float scale_pos_weight;
+  LossType loss;
+};
+}  // namespace learner
+}  // namespace xgboost
+#endif  // XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
--- a/src/learner/objective.h
+++ b/src/learner/objective.h
@@ -0,0 +1,79 @@
+#ifndef XGBOOST_LEARNER_OBJECTIVE_H_
+#define XGBOOST_LEARNER_OBJECTIVE_H_
+/*!
+ * \file objective.h
+ * \brief interface of objective function used for gradient boosting
+ * \author Tianqi Chen, Kailong Chen
+ */
+#include "dmatrix.h"
+
+namespace xgboost {
+namespace learner {
+/*! \brief interface of objective function */
+class IObjFunction{
+ public:
+  /*! \brief virtual destructor */
+  virtual ~IObjFunction(void){}
+  /*!
+   * \brief set parameters from outside
+   * \param name name of the parameter
+   * \param val value of the parameter
+   */
+  virtual void SetParam(const char *name, const char *val) = 0;  
+  /*!
+   * \brief get gradient over each of predictions, given existing information
+   * \param preds prediction of current round
+   * \param info information about labels, weights, groups in rank
+   * \param iter current iteration number
+   * \param out_gpair output of get gradient, saves gradient and second order gradient in
+   */
+  virtual void GetGradient(const std::vector<float>& preds,
+                           const MetaInfo &info,
+                           int iter,
+                           std::vector<bst_gpair> *out_gpair) = 0;
+  /*! \return the default evaluation metric for the objective */
+  virtual const char* DefaultEvalMetric(void) = 0;
+  // the following functions are optional, most of time default implementation is good enough
+  /*!
+   * \brief transform prediction values, this is only called when Prediction is called
+   * \param io_preds prediction values, saves to this vector as well
+   */
+  virtual void PredTransform(std::vector<float> *io_preds){}
+  /*!
+   * \brief transform prediction values, this is only called when Eval is called, 
+   *  usually it redirect to PredTransform
+   * \param io_preds prediction values, saves to this vector as well
+   */
+  virtual void EvalTransform(std::vector<float> *io_preds) {
+    this->PredTransform(io_preds);
+  }
+  /*!
+   * \brief transform probability value back to margin
+   * this is used to transform user-set base_score back to margin 
+   * used by gradient boosting
+   * \return transformed value
+   */
+  virtual float ProbToMargin(float base_score) {
+    return base_score;
+  }
+};
+}  // namespace learner
+}  // namespace xgboost
+
+// this are implementations of objective functions
+#include "objective-inl.hpp"
+// factory function
+namespace xgboost {
+namespace learner {
+/*! \brief factory funciton to create objective function by name */
+inline IObjFunction* CreateObjFunction(const char *name) {
+  if (!strcmp("reg:linear", name)) return new RegLossObj(LossType::kLinearSquare);
+  if (!strcmp("reg:logistic", name)) return new RegLossObj(LossType::kLogisticNeglik);
+  if (!strcmp("binary:logistic", name)) return new RegLossObj(LossType::kLogisticClassify);
+  if (!strcmp("binary:logitraw", name)) return new RegLossObj(LossType::kLogisticRaw);
+  utils::Error("unknown objective function type: %s", name);
+  return NULL;
+}
+}  // namespace learner
+}  // namespace xgboost
+#endif  // XGBOOST_LEARNER_OBJECTIVE_H_
--- a/src/tree/model.h
+++ b/src/tree/model.h
@@ -0,0 +1,492 @@
+#ifndef XGBOOST_TREE_MODEL_H_
+#define XGBOOST_TREE_MODEL_H_
+/*!
+ * \file model.h
+ * \brief model structure for tree
+ * \author Tianqi Chen
+ */
+#include <string>
+#include <cstring>
+#include <sstream>
+#include <limits>
+#include <algorithm>
+#include <vector>
+#include <cmath>
+#include "../utils/io.h"
+#include "../utils/fmap.h"
+#include "../utils/utils.h"
+
+namespace xgboost {
+namespace tree {
+/*!
+ * \brief template class of TreeModel 
+ * \tparam TSplitCond data type to indicate split condition
+ * \tparam TNodeStat auxiliary statistics of node to help tree building
+ */
+template<typename TSplitCond, typename TNodeStat>
+class TreeModel {
+ public:
+  /*! \brief data type to indicate split condition */
+  typedef TNodeStat  NodeStat;
+  /*! \brief auxiliary statistics of node to help tree building */
+  typedef TSplitCond SplitCond;
+  /*! \brief parameters of the tree */
+  struct Param{
+    /*! \brief number of start root */
+    int num_roots;
+    /*! \brief total number of nodes */
+    int num_nodes;
+    /*!\brief number of deleted nodes */
+    int num_deleted;
+    /*! \brief maximum depth, this is a statistics of the tree */
+    int max_depth;
+    /*! \brief  number of features used for tree construction */
+    int num_feature;
+    /*! \brief reserved part */
+    int reserved[32];
+    /*! \brief constructor */
+    Param(void) {
+      max_depth = 0;
+      memset(reserved, 0, sizeof(reserved));
+    }
+    /*! 
+     * \brief set parameters from outside 
+     * \param name name of the parameter
+     * \param val  value of the parameter
+     */
+    inline void SetParam(const char *name, const char *val) {
+      if (!strcmp("num_roots", name)) num_roots = atoi(val);
+      if (!strcmp("num_feature", name)) num_feature = atoi(val);
+    }
+  };
+  /*! \brief tree node */
+  class Node{
+   public:
+    /*! \brief index of left child */
+    inline int cleft(void) const {
+      return this->cleft_;
+    }
+    /*! \brief index of right child */
+    inline int cright(void) const {
+      return this->cright_;
+    }
+    /*! \brief index of default child when feature is missing */
+    inline int cdefault(void) const {
+      return this->default_left() ? this->cleft() : this->cright();
+    }
+    /*! \brief feature index of split condition */
+    inline unsigned split_index(void) const {
+      return sindex_ & ((1U << 31) - 1U);
+    }
+    /*! \brief when feature is unknown, whether goes to left child */
+    inline bool default_left(void) const {
+      return (sindex_ >> 31) != 0;
+    }
+    /*! \brief whether current node is leaf node */
+    inline bool is_leaf(void) const {
+      return cleft_ == -1;
+    }
+    /*! \brief get leaf value of leaf node */
+    inline float leaf_value(void) const {
+      return (this->info_).leaf_value;
+    }
+    /*! \brief get split condition of the node */
+    inline TSplitCond split_cond(void) const {
+      return (this->info_).split_cond;
+    }
+    /*! \brief get parent of the node */
+    inline int parent(void) const {
+      return parent_ & ((1U << 31) - 1);
+    }
+    /*! \brief whether current node is left child */
+    inline bool is_left_child(void) const {
+      return (parent_ & (1U << 31)) != 0;
+    }
+    /*! \brief whether current node is root */
+    inline bool is_root(void) const {
+      return parent_ == -1;
+    }
+    /*! 
+     * \brief set the right child 
+     * \param nide node id to right child
+     */
+    inline void set_right_child(int nid) {
+      this->cright_ = nid;
+    }
+    /*! 
+     * \brief set split condition of current node 
+     * \param split_index feature index to split
+     * \param split_cond  split condition
+     * \param default_left the default direction when feature is unknown
+     */
+    inline void set_split(unsigned split_index, TSplitCond split_cond,
+                          bool default_left = false) {
+      if (default_left) split_index |= (1U << 31);
+      this->sindex_ = split_index;
+      (this->info_).split_cond = split_cond;
+    }
+    /*! 
+     * \brief set the leaf value of the node
+     * \param value leaf value
+     * \param right right index, could be used to store 
+     *        additional information
+     */
+    inline void set_leaf(float value, int right = -1) {
+      (this->info_).leaf_value = value;
+      this->cleft_ = -1;
+      this->cright_ = right;
+    }
+
+   private:
+    friend class TreeModel<TSplitCond, TNodeStat>;
+    /*! 
+     * \brief in leaf node, we have weights, in non-leaf nodes, 
+     *        we have split condition 
+     */
+    union Info{
+      float leaf_value;
+      TSplitCond split_cond;
+    };
+    // pointer to parent, highest bit is used to
+    // indicate whether it's a left child or not
+    int parent_;
+    // pointer to left, right
+    int cleft_, cright_;
+    // split feature index, left split or right split depends on the highest bit
+    unsigned sindex_;
+    // extra info
+    Info info_;
+    // set parent
+    inline void set_parent(int pidx, bool is_left_child = true) {
+      if (is_left_child) pidx |= (1U << 31);
+      this->parent_ = pidx;
+    }
+  };
+
+ protected:
+  // vector of nodes
+  std::vector<Node> nodes;
+  // stats of nodes
+  std::vector<TNodeStat> stats;
+  // free node space, used during training process
+  std::vector<int>  deleted_nodes;
+  // allocate a new node,
+  // !!!!!! NOTE: may cause BUG here, nodes.resize
+  inline int AllocNode(void) {
+    if (param.num_deleted != 0) {
+      int nd = deleted_nodes.back();
+      deleted_nodes.pop_back();
+      --param.num_deleted;
+      return nd;
+    }
+    int nd = param.num_nodes++;
+    utils::Check(param.num_nodes < std::numeric_limits<int>::max(),
+                 "number of nodes in the tree exceed 2^31");
+    nodes.resize(param.num_nodes);
+    stats.resize(param.num_nodes);
+    return nd;
+  }
+  // delete a tree node
+  inline void DeleteNode(int nid) {
+    utils::Assert(nid >= param.num_roots, "can not delete root");
+    deleted_nodes.push_back(nid);
+    nodes[nid].set_parent(-1);
+    ++param.num_deleted;
+  }
+
+ public:
+  /*! 
+   * \brief change a non leaf node to a leaf node, delete its children
+   * \param rid node id of the node
+   * \param new leaf value
+   */
+  inline void ChangeToLeaf(int rid, float value) {
+    utils::Assert(nodes[nodes[rid].cleft() ].is_leaf(),
+                  "can not delete a non termial child");
+    utils::Assert(nodes[nodes[rid].cright()].is_leaf(),
+                  "can not delete a non termial child");
+    this->DeleteNode(nodes[rid].cleft());
+    this->DeleteNode(nodes[rid].cright());
+    nodes[rid].set_leaf(value);
+  }
+  /*! 
+   * \brief collapse a non leaf node to a leaf node, delete its children
+   * \param rid node id of the node
+   * \param new leaf value
+   */
+  inline void CollapseToLeaf(int rid, float value) {
+    if (nodes[rid].is_leaf()) return;
+    if (!nodes[nodes[rid].cleft() ].is_leaf()) {
+      CollapseToLeaf(nodes[rid].cleft(), 0.0f);
+    }
+    if (!nodes[nodes[rid].cright() ].is_leaf()) {
+      CollapseToLeaf(nodes[rid].cright(), 0.0f);
+    }
+    this->ChangeToLeaf(rid, value);
+  }
+
+ public:
+  /*! \brief model parameter */
+  Param param;
+  /*! \brief constructor */
+  TreeModel(void) {
+    param.num_nodes = 1;
+    param.num_roots = 1;
+    param.num_deleted = 0;
+    nodes.resize(1);
+  }
+  /*! \brief get node given nid */
+  inline Node &operator[](int nid) {
+    return nodes[nid];
+  }
+  /*! \brief get node given nid */
+  inline const Node &operator[](int nid) const {
+    return nodes[nid];
+  }
+  /*! \brief get node statistics given nid */
+  inline NodeStat &stat(int nid) {
+    return stats[nid];
+  }
+  /*! \brief initialize the model */
+  inline void InitModel(void) {
+    param.num_nodes = param.num_roots;
+    nodes.resize(param.num_nodes);
+    stats.resize(param.num_nodes);
+    for (int i = 0; i < param.num_nodes; i ++) {
+      nodes[i].set_leaf(0.0f);
+      nodes[i].set_parent(-1);
+    }
+  }
+  /*! 
+   * \brief load model from stream
+   * \param fi input stream
+   */
+  inline void LoadModel(utils::IStream &fi) {
+    utils::Check(fi.Read(&param, sizeof(Param)) > 0,
+                 "TreeModel: wrong format");
+    nodes.resize(param.num_nodes); stats.resize(param.num_nodes);
+    utils::Check(fi.Read(&nodes[0], sizeof(Node) * nodes.size()) > 0,
+                 "TreeModel: wrong format");
+    utils::Check(fi.Read(&stats[0], sizeof(NodeStat) * stats.size()) > 0,
+                 "TreeModel: wrong format");
+    // chg deleted nodes
+    deleted_nodes.resize(0);
+    for (int i = param.num_roots; i < param.num_nodes; i ++) {
+      if (nodes[i].is_root()) deleted_nodes.push_back(i);
+    }
+    utils::Assert(static_cast<int>(deleted_nodes.size()) == param.num_deleted,
+                  "number of deleted nodes do not match");
+  }
+  /*! 
+   * \brief save model to stream
+   * \param fo output stream
+   */
+  inline void SaveModel(utils::IStream &fo) const {
+    utils::Assert(param.num_nodes == static_cast<int>(nodes.size()),
+                  "Tree::SaveModel");
+    utils::Assert(param.num_nodes == static_cast<int>(stats.size()),
+                  "Tree::SaveModel");
+    fo.Write(&param, sizeof(Param));
+    fo.Write(&nodes[0], sizeof(Node) * nodes.size());
+    fo.Write(&stats[0], sizeof(NodeStat) * nodes.size());
+  }
+  /*! 
+   * \brief add child nodes to node
+   * \param nid node id to add childs
+   */
+  inline void AddChilds(int nid) {
+    int pleft  = this->AllocNode();
+    int pright = this->AllocNode();
+    nodes[nid].cleft_  = pleft;
+    nodes[nid].cright_ = pright;
+    nodes[nodes[nid].cleft() ].set_parent(nid, true);
+    nodes[nodes[nid].cright()].set_parent(nid, false);
+  }
+  /*! 
+   * \brief only add a right child to a leaf node 
+   * \param node id to add right child
+   */
+  inline void AddRightChild(int nid) {
+    int pright = this->AllocNode();
+    nodes[nid].right  = pright;
+    nodes[nodes[nid].right].set_parent(nid, false);
+  }
+  /*!
+   * \brief get current depth
+   * \param nid node id
+   * \param pass_rchild whether right child is not counted in depth
+   */
+  inline int GetDepth(int nid, bool pass_rchild = false) const {
+    int depth = 0;
+    while (!nodes[nid].is_root()) {
+      if (!pass_rchild || nodes[nid].is_left_child()) ++depth;
+      nid = nodes[nid].parent();
+    }
+    return depth;
+  }
+  /*!
+   * \brief get maximum depth
+   * \param nid node id
+   */
+  inline int MaxDepth(int nid) const {
+    if (nodes[nid].is_leaf()) return 0;
+    return std::max(MaxDepth(nodes[nid].cleft())+1,
+                     MaxDepth(nodes[nid].cright())+1);
+  }
+  /*!
+   * \brief get maximum depth
+   */
+  inline int MaxDepth(void) {
+    int maxd = 0;
+    for (int i = 0; i < param.num_roots; ++i) {
+      maxd = std::max(maxd, MaxDepth(i));
+    }
+    return maxd;
+  }
+  /*! \brief number of extra nodes besides the root */
+  inline int num_extra_nodes(void) const {
+    return param.num_nodes - param.num_roots - param.num_deleted;
+  }
+  /*! 
+   * \brief dump model to text string
+   * \param fmap feature map of feature types
+   * \param with_stats whether dump out statistics as well
+   * \return the string of dumped model
+   */
+  inline std::string DumpModel(const utils::FeatMap& fmap, bool with_stats) {
+    std::stringstream fo("");
+    for (int i = 0; i < param.num_roots; ++i) {
+      this->Dump(i, fo, fmap, 0, with_stats);
+    }
+    return fo.str();
+  }
+
+ private:
+  void Dump(int nid, std::stringstream &fo,
+            const utils::FeatMap& fmap, int depth, bool with_stats) {
+    for (int i = 0;  i < depth; ++i) {
+      fo << '\t';
+    }
+    if (nodes[nid].is_leaf()) {
+      fo << nid << ":leaf=" << nodes[nid].leaf_value();
+      if (with_stats) {
+        stat(nid).Print(fo, true);
+      }
+      fo << '\n';
+    } else {
+      // right then left,
+      TSplitCond cond = nodes[nid].split_cond();
+      const unsigned split_index = nodes[nid].split_index();
+      if (split_index < fmap.size()) {
+        switch (fmap.type(split_index)) {
+          case utils::FeatMap::kIndicator: {
+            int nyes = nodes[nid].default_left() ?
+                nodes[nid].cright() : nodes[nid].cleft();
+            fo << nid << ":[" << fmap.name(split_index) << "] yes=" << nyes
+               << ",no=" << nodes[nid].cdefault();
+            break;
+          }
+          case utils::FeatMap::kInteger: {
+            fo << nid << ":[" << fmap.name(split_index) << "<"
+               << int(float(cond)+1.0f)
+               << "] yes=" << nodes[nid].cleft()
+               << ",no=" << nodes[nid].cright()
+               << ",missing=" << nodes[nid].cdefault();
+            break;
+          }
+          case utils::FeatMap::kFloat:
+          case utils::FeatMap::kQuantitive: {
+            fo << nid << ":[" << fmap.name(split_index) << "<"<< float(cond)
+               << "] yes=" << nodes[nid].cleft()
+               << ",no=" << nodes[nid].cright()
+               << ",missing=" << nodes[nid].cdefault();
+            break;
+          }
+          default: utils::Error("unknown fmap type");
+        }
+      } else {
+        fo << nid << ":[f" << split_index << "<"<< float(cond)
+           << "] yes=" << nodes[nid].cleft()
+           << ",no=" << nodes[nid].cright()
+           << ",missing=" << nodes[nid].cdefault();
+      }
+      if (with_stats) {
+        fo << ' ';
+        stat(nid).Print(fo, false);
+      }
+      fo << '\n';
+      this->Dump(nodes[nid].cleft(), fo, fmap, depth+1, with_stats);
+      this->Dump(nodes[nid].cright(), fo, fmap, depth+1, with_stats);
+    }
+  }
+};
+
+/*! \brief node statistics used in regression tree */
+struct RTreeNodeStat{
+  /*! \brief loss chg caused by current split */
+  float loss_chg;
+  /*! \brief sum of hessian values, used to measure coverage of data */
+  float sum_hess;
+  /*! \brief weight of current node */
+  float base_weight;
+  /*! \brief number of child that is leaf node known up to now */
+  int   leaf_child_cnt;
+  /*! \brief print information of current stats to fo */
+  inline void Print(std::stringstream &fo, bool is_leaf) const {
+    if (!is_leaf) {
+      fo << "gain=" << loss_chg << ",cover=" << sum_hess;
+    } else {
+      fo << "cover=" << sum_hess;
+    }
+  }
+};
+
+/*! \brief define regression tree to be the most common tree model */
+class RegTree: public TreeModel<bst_float, RTreeNodeStat>{
+ public:
+  /*!
+   * \brief get the leaf index 
+   * \param feats dense feature vector, if the feature is missing the field is set to NaN
+   * \param root_gid starting root index of the instance
+   * \return the leaf index of the given feature 
+   */
+  inline int GetLeafIndex(const std::vector<float> &feat, unsigned root_id = 0) const {
+    // start from groups that belongs to current data
+    int pid = static_cast<int>(root_id);
+    // tranverse tree
+    while (!(*this)[ pid ].is_leaf()) {
+      unsigned split_index = (*this)[pid].split_index();
+      const float fvalue = feat[split_index];
+      pid = this->GetNext(pid, fvalue, std::isnan(fvalue));
+    }
+    return pid;
+  }
+  /*!
+   * \brief get the prediction of regression tree, only accepts dense feature vector
+   * \param feats dense feature vector, if the feature is missing the field is set to NaN
+   * \param root_gid starting root index of the instance
+   * \return the leaf index of the given feature 
+   */
+  inline float Predict(const std::vector<float> &feat, unsigned root_id = 0) const {
+    int pid = this->GetLeafIndex(feat, root_id);
+    return (*this)[pid].leaf_value();
+  }
+ private:
+  /*! \brief get next position of the tree given current pid */
+  inline int GetNext(int pid, float fvalue, bool is_unknown) const {
+    float split_value = (*this)[pid].split_cond();
+    if (is_unknown) {
+      return (*this)[pid].cdefault();
+    } else {
+      if (fvalue < split_value) {
+        return (*this)[pid].cleft();
+      } else {
+        return (*this)[pid].cright();
+      }
+    }
+  }
+};
+
+}  // namespace tree
+}  // namespace xgboost
+#endif  // XGBOOST_TREE_MODEL_H_
--- a/src/tree/param.h
+++ b/src/tree/param.h
@@ -0,0 +1,262 @@
+#ifndef XGBOOST_TREE_PARAM_H_
+#define XGBOOST_TREE_PARAM_H_
+/*!
+ * \file param.h
+ * \brief training parameters, statistics used to support tree construction
+ * \author Tianqi Chen
+ */
+#include <cstring>
+#include "../data.h"
+
+namespace xgboost {
+namespace tree {
+
+/*! \brief core statistics used for tree construction */
+struct GradStats {
+  /*! \brief sum gradient statistics */
+  double sum_grad;
+  /*! \brief sum hessian statistics */
+  double sum_hess;
+  /*! \brief constructor */
+  GradStats(void) {
+    this->Clear();
+  }
+  /*! \brief clear the statistics */
+  inline void Clear(void) {
+    sum_grad = sum_hess = 0.0f;
+  }
+  /*! \brief add statistics to the data */
+  inline void Add(double grad, double hess) {
+    sum_grad += grad; sum_hess += hess;
+  }
+  /*! \brief add statistics to the data */
+  inline void Add(const bst_gpair& b) {
+    this->Add(b.grad, b.hess);
+  }
+  /*! \brief add statistics to the data */
+  inline void Add(const GradStats &b) {
+    this->Add(b.sum_grad, b.sum_hess);
+  }
+  /*! \brief substract the statistics by b */
+  inline GradStats Substract(const GradStats &b) const {
+    GradStats res;
+    res.sum_grad = this->sum_grad - b.sum_grad;
+    res.sum_hess = this->sum_hess - b.sum_hess;
+    return res;
+  }
+  /*! \return whether the statistics is not used yet */
+  inline bool Empty(void) const {
+    return sum_hess == 0.0;
+  }
+};
+
+/*! \brief training parameters for regression tree */
+struct TrainParam{
+  // learning step size for a time
+  float learning_rate;
+  // minimum loss change required for a split
+  float min_split_loss;
+  // maximum depth of a tree
+  int max_depth;
+  //----- the rest parameters are less important ----
+  // minimum amount of hessian(weight) allowed in a child
+  float min_child_weight;
+  // weight decay parameter used to control leaf fitting
+  float reg_lambda;
+  // reg method
+  int reg_method;
+  // default direction choice
+  int default_direction;
+  // whether we want to do subsample
+  float subsample;
+  // whether to subsample columns each split, in each level
+  float colsample_bylevel;
+  // whether to subsample columns during tree construction
+  float colsample_bytree;
+  // speed optimization for dense column
+  float opt_dense_col;
+  // number of threads to be used for tree construction,
+  // if OpenMP is enabled, if equals 0, use system default
+  int nthread;
+  /*! \brief constructor */
+  TrainParam(void) {
+    learning_rate = 0.3f;
+    min_child_weight = 1.0f;
+    max_depth = 6;
+    reg_lambda = 1.0f;
+    reg_method = 2;
+    default_direction = 0;
+    subsample = 1.0f;
+    colsample_bytree = 1.0f;
+    colsample_bylevel = 1.0f;
+    opt_dense_col = 1.0f;
+    nthread = 0;
+  }
+  /*! 
+   * \brief set parameters from outside 
+   * \param name name of the parameter
+   * \param val  value of the parameter
+   */            
+  inline void SetParam(const char *name, const char *val) {
+    // sync-names
+    if (!strcmp(name, "gamma")) min_split_loss = static_cast<float>(atof(val));
+    if (!strcmp(name, "eta")) learning_rate = static_cast<float>(atof(val));
+    if (!strcmp(name, "lambda")) reg_lambda = static_cast<float>(atof(val));
+    if (!strcmp(name, "learning_rate")) learning_rate = static_cast<float>(atof(val));
+    if (!strcmp(name, "min_child_weight")) min_child_weight = static_cast<float>(atof(val));
+    if (!strcmp(name, "min_split_loss")) min_split_loss = static_cast<float>(atof(val));
+    if (!strcmp(name, "reg_lambda")) reg_lambda = static_cast<float>(atof(val));
+    if (!strcmp(name, "reg_method")) reg_method = static_cast<float>(atof(val));
+    if (!strcmp(name, "subsample")) subsample = static_cast<float>(atof(val));
+    if (!strcmp(name, "colsample_bylevel")) colsample_bylevel = static_cast<float>(atof(val));
+    if (!strcmp(name, "colsample_bytree")) colsample_bytree  = static_cast<float>(atof(val));
+    if (!strcmp(name, "opt_dense_col")) opt_dense_col = static_cast<float>(atof(val));
+    if (!strcmp(name, "max_depth")) max_depth = atoi(val);
+    if (!strcmp(name, "nthread")) nthread = atoi(val);
+    if (!strcmp(name, "default_direction")) {
+      if (!strcmp(val, "learn")) default_direction = 0;
+      if (!strcmp(val, "left")) default_direction = 1;
+      if (!strcmp(val, "right")) default_direction = 2;
+    }
+  }
+  // calculate the cost of loss function
+  inline double CalcGain(double sum_grad, double sum_hess) const {
+    if (sum_hess < min_child_weight) {
+      return 0.0;
+    }
+    switch (reg_method) {
+      case 1 : return Sqr(ThresholdL1(sum_grad, reg_lambda)) / sum_hess;
+      case 2 : return Sqr(sum_grad) / (sum_hess + reg_lambda);
+      case 3 : return
+          Sqr(ThresholdL1(sum_grad, 0.5 * reg_lambda)) /
+          (sum_hess + 0.5 * reg_lambda);
+      default: return Sqr(sum_grad) / sum_hess;
+    }
+  }
+  // calculate weight given the statistics
+  inline double CalcWeight(double sum_grad, double sum_hess) const {
+    if (sum_hess < min_child_weight) {
+      return 0.0;
+    } else {
+      switch (reg_method) {
+        case 1: return - ThresholdL1(sum_grad, reg_lambda) / sum_hess;
+        case 2: return - sum_grad / (sum_hess + reg_lambda);
+        case 3: return
+            - ThresholdL1(sum_grad, 0.5 * reg_lambda) /
+            (sum_hess + 0.5 * reg_lambda);
+        default: return - sum_grad / sum_hess;
+      }
+    }
+  }
+  /*! \brief whether need forward small to big search: default right */
+  inline bool need_forward_search(float col_density = 0.0f) const {
+    return this->default_direction == 2 ||
+        (default_direction == 0 && (col_density < opt_dense_col));
+  }
+  /*! \brief whether need backward big to small search: default left */
+  inline bool need_backward_search(float col_density = 0.0f) const {
+    return this->default_direction != 2;
+  }
+  /*! \brief given the loss change, whether we need to invode prunning */
+  inline bool need_prune(double loss_chg, int depth) const {
+    return loss_chg < this->min_split_loss;
+  }
+  /*! \brief whether we can split with current hessian */
+  inline bool cannot_split(double sum_hess, int depth) const {
+    return sum_hess < this->min_child_weight * 2.0;
+  }
+  // code support for template data
+  inline double CalcWeight(const GradStats &d) const {
+    return this->CalcWeight(d.sum_grad, d.sum_hess);
+  }
+  inline double CalcGain(const GradStats &d) const {
+    return this->CalcGain(d.sum_grad, d.sum_hess);
+  }
+
+ protected:
+  // functions for L1 cost
+  inline static double ThresholdL1(double w, double lambda) {
+    if (w > +lambda) return w - lambda;
+    if (w < -lambda) return w + lambda;
+    return 0.0;
+  }
+  inline static double Sqr(double a) {
+    return a * a;
+  }
+};
+
+/*! 
+ * \brief statistics that is helpful to store 
+ *   and represent a split solution for the tree
+ */
+struct SplitEntry{
+  /*! \brief loss change after split this node */
+  bst_float loss_chg;
+  /*! \brief split index */
+  unsigned sindex;
+  /*! \brief split value */
+  float split_value;
+  /*! \brief constructor */
+  SplitEntry(void) : loss_chg(0.0f), sindex(0), split_value(0.0f) {}
+  /*! 
+   * \brief decides whether a we can replace current entry with the statistics given 
+   *   This function gives better priority to lower index when loss_chg equals
+   *    not the best way, but helps to give consistent result during multi-thread execution
+   * \param loss_chg the loss reduction get through the split
+   * \param split_index the feature index where the split is on 
+   */
+  inline bool NeedReplace(bst_float loss_chg, unsigned split_index) const {
+    if (this->split_index() <= split_index) {
+      return loss_chg > this->loss_chg;
+    } else {
+      return !(this->loss_chg > loss_chg);
+    }
+  }
+  /*! 
+   * \brief update the split entry, replace it if e is better
+   * \param e candidate split solution
+   * \return whether the proposed split is better and can replace current split
+   */
+  inline bool Update(const SplitEntry &e) {
+    if (this->NeedReplace(e.loss_chg, e.split_index())) {
+      this->loss_chg = e.loss_chg;
+      this->sindex = e.sindex;
+      this->split_value = e.split_value;
+      return true;
+    } else {
+      return false;
+    }
+  }
+  /*! 
+   * \brief update the split entry, replace it if e is better
+   * \param loss_chg loss reduction of new candidate
+   * \param split_index feature index to split on
+   * \param split_value the split point
+   * \param default_left whether the missing value goes to left
+   * \return whether the proposed split is better and can replace current split
+   */
+  inline bool Update(bst_float loss_chg, unsigned split_index,
+                     float split_value, bool default_left) {
+    if (this->NeedReplace(loss_chg, split_index)) {
+      this->loss_chg = loss_chg;
+      if (default_left) split_index |= (1U << 31);
+      this->sindex = split_index;
+      this->split_value = split_value;
+      return true;
+    } else {
+      return false;
+    }
+  }
+  /*!\return feature index to split on */
+  inline unsigned split_index(void) const {
+    return sindex & ((1U << 31) - 1U);
+  }
+  /*!\return whether missing value goes to left branch */
+  inline bool default_left(void) const {
+    return (sindex >> 31) != 0;
+  }
+};
+
+}  // namespace tree
+}  // namespace xgboost
+#endif  // XGBOOST_TREE_PARAM_H_
--- a/src/tree/updater.h
+++ b/src/tree/updater.h
@@ -0,0 +1,70 @@
+#ifndef XGBOOST_TREE_UPDATER_H_
+#define XGBOOST_TREE_UPDATER_H_
+/*!
+ * \file updater.h
+ * \brief interface to update the tree
+ * \author Tianqi Chen
+ */
+#include <vector>
+
+#include "../data.h"
+#include "./model.h"
+
+namespace xgboost {
+namespace tree {
+/*! 
+ * \brief interface of tree update module, that performs update of a tree
+ * \tparam FMatrix the data type updater taking
+ */
+template<typename FMatrix>
+class IUpdater {
+ public:
+  /*!
+   * \brief set parameters from outside
+   * \param name name of the parameter
+   * \param val  value of the parameter
+   */  
+  virtual void SetParam(const char *name, const char *val) = 0;
+  /*!
+   * \brief peform update to the tree models
+   * \param gpair the gradient pair statistics of the data
+   * \param fmat feature matrix that provide access to features
+   * \param root_index pre-partitioned root_index of each instance,
+   *          root_index.size() can be 0 which indicates that no pre-partition involved
+   * \param trees pointer to the trese to be updated, upater will change the content of the tree
+   *   note: all the trees in the vector are updated, with the same statistics, 
+   *         but maybe different random seeds, usually one tree is passed in at a time, 
+   *         there can be multiple trees when we train random forest style model
+   */
+  virtual void Update(const std::vector<bst_gpair> &gpair,
+                      FMatrix &fmat,
+                      const std::vector<unsigned> &root_index,
+                      const std::vector<RegTree*> &trees) = 0;
+  // destructor
+  virtual ~IUpdater(void) {}
+};
+
+}  // namespace tree
+}  // namespace xgboost
+
+#include "./updater_prune-inl.hpp"
+#include "./updater_colmaker-inl.hpp"
+
+namespace xgboost {
+namespace tree {
+/*! 
+ * \brief create a updater based on name 
+ * \param name name of updater
+ * \return return the updater instance
+ */
+template<typename FMatrix>
+inline IUpdater<FMatrix>* CreateUpdater(const char *name) {
+  if (!strcmp(name, "prune")) return new TreePruner<FMatrix>();
+  if (!strcmp(name, "grow_colmaker")) return new ColMaker<FMatrix, GradStats>();
+  utils::Error("unknown updater:%s", name);
+  return NULL;
+}
+
+}  // namespace tree
+}  // namespace xgboost
+#endif  // XGBOOST_TREE_UPDATER_H_
--- a/src/tree/updater_colmaker-inl.hpp
+++ b/src/tree/updater_colmaker-inl.hpp
@@ -0,0 +1,357 @@
+#ifndef XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_
+#define XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_
+/*!
+ * \file updater_colmaker-inl.hpp
+ * \brief use columnwise update to construct a tree
+ * \author Tianqi Chen
+ */
+#include <vector>
+#include <algorithm>
+#include "./param.h"
+#include "./updater.h"
+#include "../utils/omp.h"
+#include "../utils/random.h"
+
+namespace xgboost {
+namespace tree {
+/*! \brief pruner that prunes a tree after growing finishs */
+template<typename FMatrix, typename TStats>
+class ColMaker: public IUpdater<FMatrix> {
+ public:
+  virtual ~ColMaker(void) {}
+  // set training parameter
+  virtual void SetParam(const char *name, const char *val) {
+    param.SetParam(name, val);
+  }
+  virtual void Update(const std::vector<bst_gpair> &gpair,
+                      FMatrix &fmat,
+                      const std::vector<unsigned> &root_index,
+                      const std::vector<RegTree*> &trees) {
+    
+    for (size_t i = 0; i < trees.size(); ++i) {
+      Builder builder(param);
+      builder.Update(gpair, fmat, root_index, trees[i]);
+    }
+  }
+
+ private:
+  // training parameter
+  TrainParam param;
+  // data structure
+  /*! \brief per thread x per node entry to store tmp data */
+  struct ThreadEntry {
+    /*! \brief statistics of data*/
+    TStats stats;
+    /*! \brief last feature value scanned */
+    float  last_fvalue;
+    /*! \brief current best solution */
+    SplitEntry best;
+    // constructor
+    ThreadEntry(void) {
+      stats.Clear();
+    }
+  };
+  struct NodeEntry {
+    /*! \brief statics for node entry */
+    TStats stats;
+    /*! \brief loss of this node, without split */
+    bst_float root_gain;
+    /*! \brief weight calculated related to current data */
+    float weight;
+    /*! \brief current best solution */
+    SplitEntry best;
+    // constructor
+    NodeEntry(void) : root_gain(0.0f), weight(0.0f){
+      stats.Clear();
+    }
+  };
+  // actual builder that runs the algorithm
+  struct Builder{
+   public:
+    // constructor
+    explicit Builder(const TrainParam &param) : param(param) {}
+    // update one tree, growing
+    virtual void Update(const std::vector<bst_gpair> &gpair, FMatrix &fmat,
+                        const std::vector<unsigned> &root_index,
+                        RegTree *p_tree) {
+      this->InitData(gpair, fmat, root_index, *p_tree);
+      this->InitNewNode(qexpand, gpair, *p_tree);
+      
+      for (int depth = 0; depth < param.max_depth; ++depth) {
+        this->FindSplit(depth, this->qexpand, gpair, fmat, p_tree);
+        this->ResetPosition(this->qexpand, fmat, *p_tree);
+        this->UpdateQueueExpand(*p_tree, &this->qexpand);
+        this->InitNewNode(qexpand, gpair, *p_tree);
+        // if nothing left to be expand, break
+        if (qexpand.size() == 0) break;
+      }    
+      // set all the rest expanding nodes to leaf
+      for (size_t i = 0; i < qexpand.size(); ++i) {
+        const int nid = qexpand[i];
+        (*p_tree)[nid].set_leaf(snode[nid].weight * param.learning_rate);
+      }
+      // remember auxiliary statistics in the tree node
+      for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) {
+        p_tree->stat(nid).loss_chg = snode[nid].best.loss_chg;
+        p_tree->stat(nid).base_weight = snode[nid].weight;
+        p_tree->stat(nid).sum_hess = static_cast<float>(snode[nid].stats.sum_hess);
+      }
+    }
+
+   private:
+    // initialize temp data structure
+    inline void InitData(const std::vector<bst_gpair> &gpair, FMatrix &fmat,
+                         const std::vector<unsigned> &root_index, const RegTree &tree) {
+      utils::Assert(tree.param.num_nodes == tree.param.num_roots, "ColMaker: can only grow new tree");
+      {// setup position
+        position.resize(gpair.size());
+        if (root_index.size() == 0) {
+          std::fill(position.begin(), position.end(), 0);
+        } else {
+          for (size_t i = 0; i < root_index.size(); ++i) {
+            position[i] = root_index[i];
+            utils::Assert(root_index[i] < (unsigned)tree.param.num_roots, "root index exceed setting");
+          }
+        }
+        // mark delete for the deleted datas
+        for (size_t i = 0; i < gpair.size(); ++i) {
+          if (gpair[i].hess < 0.0f) position[i] = -1;
+        }
+        // mark subsample
+        if (param.subsample < 1.0f) {
+          for (size_t i = 0; i < gpair.size(); ++i) {
+            if (gpair[i].hess < 0.0f) continue;
+            if (random::SampleBinary(param.subsample) == 0) position[i] = -1;
+          }
+        }
+      }
+    
+      {
+        // initialize feature index
+        unsigned ncol = static_cast<unsigned>(fmat.NumCol());
+        for (unsigned i = 0; i < ncol; ++i) {
+          if (fmat.GetColSize(i) != 0) feat_index.push_back(i);
+        }
+        unsigned n = static_cast<unsigned>(param.colsample_bytree * feat_index.size());
+        random::Shuffle(feat_index);
+        utils::Check(n > 0, "colsample_bytree is too small that no feature can be included");
+        feat_index.resize(n);
+      }
+      {// setup temp space for each thread
+        #pragma omp parallel
+        {
+          this->nthread = omp_get_num_threads();
+        }
+        // reserve a small space
+        stemp.clear();
+        stemp.resize(this->nthread, std::vector<ThreadEntry>());
+        for (size_t i = 0; i < stemp.size(); ++i) {
+          stemp[i].clear(); stemp[i].reserve(256);
+        }
+        snode.reserve(256);
+      }
+      {// expand query
+        qexpand.reserve(256); qexpand.clear();
+        for (int i = 0; i < tree.param.num_roots; ++i) {
+          qexpand.push_back(i);
+        }
+      }
+    }
+    /*! \brief initialize the base_weight, root_gain, and NodeEntry for all the new nodes in qexpand */
+    inline void InitNewNode(const std::vector<int> &qexpand,
+                            const std::vector<bst_gpair> &gpair,
+                            const RegTree &tree) {
+      {// setup statistics space for each tree node
+        for (size_t i = 0; i < stemp.size(); ++i) {
+          stemp[i].resize(tree.param.num_nodes, ThreadEntry());
+        }
+        snode.resize(tree.param.num_nodes, NodeEntry());
+      }
+      // setup position
+      const unsigned ndata = static_cast<unsigned>(position.size());
+      #pragma omp parallel for schedule(static)
+      for (unsigned i = 0; i < ndata; ++i) {
+        const int tid = omp_get_thread_num();
+        if (position[i] < 0) continue;
+        stemp[tid][position[i]].stats.Add(gpair[i]);
+      }
+      // sum the per thread statistics together
+      for (size_t j = 0; j < qexpand.size(); ++j) {
+        const int nid = qexpand[j];
+        TStats stats; stats.Clear();
+        for (size_t tid = 0; tid < stemp.size(); ++tid) {
+          stats.Add(stemp[tid][nid].stats);
+        }
+        // update node statistics
+        snode[nid].stats = stats;
+        snode[nid].root_gain = param.CalcGain(stats);
+        snode[nid].weight = param.CalcWeight(stats);
+      }
+    }
+    /*! \brief update queue expand add in new leaves */
+    inline void UpdateQueueExpand(const RegTree &tree, std::vector<int> *p_qexpand) {
+      std::vector<int> &qexpand = *p_qexpand;
+      std::vector<int> newnodes;
+      for (size_t i = 0; i < qexpand.size(); ++i) {
+        const int nid = qexpand[i];
+        if (!tree[ nid ].is_leaf()) {
+          newnodes.push_back(tree[nid].cleft());
+          newnodes.push_back(tree[nid].cright());
+        }
+      }
+      // use new nodes for qexpand
+      qexpand = newnodes;
+    }
+    // enumerate the split values of specific feature
+    template<typename Iter>
+    inline void EnumerateSplit(Iter it, unsigned fid,
+                               const std::vector<bst_gpair> &gpair,
+                               std::vector<ThreadEntry> &temp,
+                               bool is_forward_search) {
+      // clear all the temp statistics
+      for (size_t j = 0; j < qexpand.size(); ++j) {
+        temp[qexpand[j]].stats.Clear();
+      }
+      while (it.Next()) {
+        const bst_uint ridx = it.rindex();
+        const int nid = position[ridx];
+        if (nid < 0) continue;
+        // start working
+        const float fvalue = it.fvalue();
+        // get the statistics of nid
+        ThreadEntry &e = temp[nid];
+        // test if first hit, this is fine, because we set 0 during init
+        if (e.stats.Empty()) {
+          e.stats.Add(gpair[ridx]);
+          e.last_fvalue = fvalue;
+        } else {
+          // try to find a split
+          if (fabsf(fvalue - e.last_fvalue) > rt_2eps && e.stats.sum_hess >= param.min_child_weight) {
+            TStats c = snode[nid].stats.Substract(e.stats);
+            if (c.sum_hess >= param.min_child_weight) {
+              double loss_chg = param.CalcGain(e.stats) + param.CalcGain(c) - snode[nid].root_gain;
+              e.best.Update(loss_chg, fid, (fvalue + e.last_fvalue) * 0.5f, !is_forward_search);
+            }
+          }
+          // update the statistics
+          e.stats.Add(gpair[ridx]);
+          e.last_fvalue = fvalue;
+        }
+      }
+      // finish updating all statistics, check if it is possible to include all sum statistics
+      for (size_t i = 0; i < qexpand.size(); ++i) {
+        const int nid = qexpand[i];
+        ThreadEntry &e = temp[nid];
+        TStats c = snode[nid].stats.Substract(e.stats);
+        if (e.stats.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) {
+          const double loss_chg = param.CalcGain(e.stats) + param.CalcGain(c) - snode[nid].root_gain;
+          const float delta = is_forward_search ? rt_eps : -rt_eps;
+          e.best.Update(loss_chg, fid, e.last_fvalue + delta, !is_forward_search);
+        }
+      }
+    }
+    // find splits at current level, do split per level
+    inline void FindSplit(int depth, const std::vector<int> &qexpand,
+                          const std::vector<bst_gpair> &gpair, const FMatrix &fmat,
+                          RegTree *p_tree) {
+      std::vector<unsigned> feat_set = feat_index;
+      if (param.colsample_bylevel != 1.0f) {
+        random::Shuffle(feat_set);
+        unsigned n = static_cast<unsigned>(param.colsample_bylevel * feat_index.size());
+        utils::Check(n > 0, "colsample_bylevel is too small that no feature can be included");
+        feat_set.resize(n);
+      }
+      // start enumeration
+      const unsigned nsize = static_cast<unsigned>(feat_set.size());
+      #pragma omp parallel for schedule(dynamic, 1)
+      for (unsigned i = 0; i < nsize; ++i) {
+        const unsigned fid = feat_set[i];
+        const int tid = omp_get_thread_num();
+        if (param.need_forward_search(fmat.GetColDensity(fid))) {
+          this->EnumerateSplit(fmat.GetSortedCol(fid), fid, gpair, stemp[tid], true);
+        }
+        if (param.need_backward_search(fmat.GetColDensity(fid))) {
+          this->EnumerateSplit(fmat.GetReverseSortedCol(fid), fid, gpair, stemp[tid], false);
+        }
+      }
+      // after this each thread's stemp will get the best candidates, aggregate results
+      for (size_t i = 0; i < qexpand.size(); ++i) {
+        const int nid = qexpand[i];
+        NodeEntry &e = snode[nid];
+        for (int tid = 0; tid < this->nthread; ++tid) {
+          e.best.Update(stemp[tid][nid].best);
+        }
+        // now we know the solution in snode[nid], set split
+        if (e.best.loss_chg > rt_eps) {
+          p_tree->AddChilds(nid);
+          (*p_tree)[nid].set_split(e.best.split_index(), e.best.split_value, e.best.default_left());
+        } else {
+          (*p_tree)[nid].set_leaf(e.weight * param.learning_rate);
+        }
+      }
+    }
+    // reset position of each data points after split is created in the tree
+    inline void ResetPosition(const std::vector<int> &qexpand, const FMatrix &fmat, const RegTree &tree) {
+      // step 1, set default direct nodes to default, and leaf nodes to -1
+      const unsigned ndata = static_cast<unsigned>(position.size());
+      #pragma omp parallel for schedule(static)
+      for (unsigned i = 0; i < ndata; ++i) {
+        const int nid = position[i];
+        if (nid >= 0) {
+          if (tree[nid].is_leaf()) {
+            position[i] = -1;
+          } else {
+            // push to default branch, correct latter
+            position[i] = tree[nid].default_left() ? tree[nid].cleft(): tree[nid].cright();
+          }
+        }
+      }
+      // step 2, classify the non-default data into right places
+      std::vector<unsigned> fsplits;
+      for (size_t i = 0; i < qexpand.size(); ++i) {
+        const int nid = qexpand[i];
+        if (!tree[nid].is_leaf()) fsplits.push_back(tree[nid].split_index());
+      }
+      std::sort(fsplits.begin(), fsplits.end());
+      fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());
+      // start put things into right place
+      const unsigned nfeats = static_cast<unsigned>(fsplits.size());
+      #pragma omp parallel for schedule(dynamic, 1)
+      for (unsigned i = 0; i < nfeats; ++i) {
+        const unsigned fid = fsplits[i];
+        for (typename FMatrix::ColIter it = fmat.GetSortedCol(fid); it.Next();) {
+          const bst_uint ridx = it.rindex();
+          int nid = position[ridx];
+          if (nid == -1) continue;
+          // go back to parent, correct those who are not default
+          nid = tree[nid].parent();
+          if (tree[nid].split_index() == fid) {
+            if (it.fvalue() < tree[nid].split_cond()) {
+              position[ridx] = tree[nid].cleft();
+            } else {
+              position[ridx] = tree[nid].cright();
+            }
+          }
+        }
+      }
+    }
+    //--data fields--
+    const TrainParam &param;
+    // number of omp thread used during training
+    int nthread;
+    // Per feature: shuffle index of each feature index
+    std::vector<unsigned> feat_index;
+    // Instance Data: current node position in the tree of each instance
+    std::vector<int> position;
+    // PerThread x PerTreeNode: statistics for per thread construction
+    std::vector< std::vector<ThreadEntry> > stemp;
+    /*! \brief TreeNode Data: statistics for each constructed node */
+    std::vector<NodeEntry> snode;
+    /*! \brief queue of nodes to be expanded */
+    std::vector<int> qexpand;
+  };
+};
+
+}  // namespace tree
+}  // namespace xgboost
+#endif  // XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_
--- a/src/tree/updater_prune-inl.hpp
+++ b/src/tree/updater_prune-inl.hpp
@@ -0,0 +1,67 @@
+#ifndef XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_
+#define XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_
+/*!
+ * \file updater_prune-inl.hpp
+ * \brief prune a tree given the statistics 
+ * \author Tianqi Chen
+ */
+#include <vector>
+#include "./param.h"
+#include "./updater.h"
+
+namespace xgboost {
+namespace tree {
+/*! \brief pruner that prunes a tree after growing finishs */
+template<typename FMatrix>
+class TreePruner: public IUpdater<FMatrix> {
+ public:
+  virtual ~TreePruner(void) {}
+  // set training parameter
+  virtual void SetParam(const char *name, const char *val) {
+    param.SetParam(name, val);
+  }
+  // update the tree, do pruning
+  virtual void Update(const std::vector<bst_gpair> &gpair, FMatrix &fmat,
+                      const std::vector<unsigned> &root_index,
+                      const std::vector<RegTree*> &trees) {
+    for (size_t i = 0; i < trees.size(); ++i) {
+      this->DoPrune(*trees[i]);
+    }
+  }
+
+ private:
+  // try to prune off current leaf
+  inline void TryPruneLeaf(RegTree &tree, int nid, int depth) {
+    if (tree[nid].is_root()) return;
+    int pid = tree[nid].parent();
+    RegTree::NodeStat &s = tree.stat(pid);
+    ++s.leaf_child_cnt;
+
+    if (s.leaf_child_cnt >= 2 && param.need_prune(s.loss_chg, depth - 1)) {
+      // need to be pruned
+      tree.ChangeToLeaf(pid, param.learning_rate * s.base_weight);
+      // tail recursion
+      this->TryPruneLeaf(tree, pid, depth - 1);
+    }
+  }
+  /*! \brief do prunning of a tree */
+  inline void DoPrune(RegTree &tree) {
+    // initialize auxiliary statistics
+    for (int nid = 0; nid < tree.param.num_nodes; ++nid) {
+      tree.stat(nid).leaf_child_cnt = 0;
+    }
+    for (int nid = 0; nid < tree.param.num_nodes; ++nid) {
+      if (tree[nid].is_leaf()) {
+        this->TryPruneLeaf(tree, nid, tree.GetDepth(nid));
+      }
+    }
+  }
+
+ private:
+  // training parameter
+  TrainParam param;
+};
+
+}  // namespace tree
+}  // namespace xgboost
+#endif  // XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_
--- a/src/utils/config.h
+++ b/src/utils/config.h
@@ -0,0 +1,196 @@
+#ifndef XGBOOST_UTILS_CONFIG_H_
+#define XGBOOST_UTILS_CONFIG_H_
+/*!
+ * \file config.h
+ * \brief helper class to load in configures from file
+ * \author Tianqi Chen
+ */
+#include <cstdio>
+#include <cstring>
+#include <string>
+#include <istream>
+#include <fstream>
+#include "./utils.h"
+
+namespace xgboost {
+namespace utils {
+/*! 
+ * \brief base implementation of config reader
+ */
+class ConfigReaderBase {
+ public:
+  /*! 
+   * \brief get current name, called after Next returns true
+   * \return current parameter name 
+   */
+  inline const char *name(void) const {
+    return s_name;
+  }
+  /*! 
+   * \brief get current value, called after Next returns true
+   * \return current parameter value 
+   */
+  inline const char *val(void) const {
+    return s_val;
+  }
+  /*! 
+   * \brief move iterator to next position
+   * \return true if there is value in next position
+   */
+  inline bool Next(void) {
+    while (!this->IsEnd()) {
+      GetNextToken(s_name);
+      if (s_name[0] == '=') return false;
+      if (GetNextToken( s_buf ) || s_buf[0] != '=') return false;
+      if (GetNextToken( s_val ) || s_val[0] == '=') return false;
+      return true;
+    }
+    return false;
+  }
+  // called before usage
+  inline void Init(void) {
+    ch_buf = this->GetChar();
+  }
+
+ protected:
+  /*!
+   * \brief to be implemented by subclass,
+   * get next token, return EOF if end of file 
+   */
+  virtual char GetChar(void) = 0;
+  /*! \brief to be implemented by child, check if end of stream */
+  virtual bool IsEnd(void) = 0;
+
+ private:
+  char ch_buf;
+  char s_name[100000], s_val[100000], s_buf[100000];
+
+  inline void SkipLine(void) {
+    do {
+      ch_buf = this->GetChar();
+    } while (ch_buf != EOF && ch_buf != '\n' && ch_buf != '\r');
+  }
+
+  inline void ParseStr(char tok[]) {
+    int i = 0;
+    while ((ch_buf = this->GetChar()) != EOF) {
+      switch (ch_buf) {
+        case '\\': tok[i++] = this->GetChar(); break;
+        case '\"': tok[i++] = '\0'; return;
+        case '\r':
+        case '\n': Error("ConfigReader: unterminated string");
+        default: tok[i++] = ch_buf;
+      }
+    }
+    Error("ConfigReader: unterminated string");
+  }
+  inline void ParseStrML(char tok[]) {
+    int i = 0;
+    while ((ch_buf = this->GetChar()) != EOF) {
+      switch (ch_buf) {
+        case '\\': tok[i++] = this->GetChar(); break;
+        case '\'': tok[i++] = '\0'; return;
+        default: tok[i++] = ch_buf;
+      }
+    }
+    Error("unterminated string");
+  }
+  // return newline
+  inline bool GetNextToken(char tok[]) {
+    int i = 0;
+    bool new_line = false;
+    while (ch_buf != EOF) {
+      switch (ch_buf) {
+        case '#' : SkipLine(); new_line = true; break;
+        case '\"':
+          if (i == 0) {
+            ParseStr(tok); ch_buf = this->GetChar(); return new_line;
+          } else {
+            Error("ConfigReader: token followed directly by string");
+          }
+        case '\'':
+          if (i == 0) {
+            ParseStrML( tok ); ch_buf = this->GetChar(); return new_line;
+          } else {
+            Error("ConfigReader: token followed directly by string");
+          }
+        case '=':
+          if (i == 0) {
+            ch_buf = this->GetChar();
+            tok[0] = '=';
+            tok[1] = '\0';
+          } else {
+            tok[i] = '\0';
+          }
+          return new_line;
+        case '\r':
+        case '\n':
+          if (i == 0) new_line = true;
+        case '\t':
+        case ' ' :
+          ch_buf = this->GetChar();
+          if (i > 0) {
+            tok[i] = '\0';
+            return new_line;
+          }
+          break;
+        default:
+          tok[i++] = ch_buf;
+          ch_buf = this->GetChar();
+          break;
+      }
+    }
+    return true;
+  }
+};
+/*!
+ * \brief an iterator use stream base, allows use all types of istream
+ */
+class ConfigStreamReader: public ConfigReaderBase {
+ public:
+  /*! 
+   * \brief constructor 
+   * \param istream input stream 
+   */
+  explicit ConfigStreamReader(std::istream &fin) : fin(fin) {}
+
+ protected:
+  virtual char GetChar(void) {
+    return fin.get();
+  }
+  /*! \brief to be implemented by child, check if end of stream */
+  virtual bool IsEnd(void) {
+    return fin.eof();
+  }
+
+ private:
+  std::istream &fin;
+};
+
+/*! 
+ * \brief an iterator that iterates over a configure file and gets the configures
+ */
+class ConfigIterator: public ConfigStreamReader {
+ public:
+  /*! 
+   * \brief constructor 
+   * \param fname name of configure file
+   */
+  explicit ConfigIterator(const char *fname) : ConfigStreamReader(fi) {
+    fi.open(fname);
+    if (fi.fail()) {
+      utils::Error("cannot open file %s", fname);
+    }
+    ConfigReaderBase::Init();
+  }
+  /*! \brief destructor */
+  ~ConfigIterator(void) {
+    fi.close();
+  }
+
+ private:
+  std::ifstream fi;
+};
+}  // namespace utils
+}  // namespace xgboost
+#endif  // XGBOOST_UTILS_CONFIG_H_
--- a/src/utils/fmap.h
+++ b/src/utils/fmap.h
@@ -0,0 +1,80 @@
+#ifndef XGBOOST_UTILS_FMAP_H_
+#define XGBOOST_UTILS_FMAP_H_
+/*!
+ * \file fmap.h
+ * \brief helper class that holds the feature names and interpretations
+ * \author Tianqi Chen
+ */
+#include <vector>
+#include <string>
+#include <cstring>
+#include "./utils.h"
+
+namespace xgboost {
+namespace utils {
+/*! \brief helper class that holds the feature names and interpretations */
+class FeatMap {
+ public:
+  enum Type {
+    kIndicator = 0,
+    kQuantitive = 1,
+    kInteger = 2,
+    kFloat = 3
+  };
+  // function definitions
+  /*! \brief load feature map from text format */
+  inline void LoadText(const char *fname) {
+    FILE *fi = utils::FopenCheck(fname, "r");
+    this->LoadText(fi);
+    fclose(fi);
+  }
+  /*! \brief load feature map from text format */
+  inline void LoadText(FILE *fi) {
+    int fid;
+    char fname[1256], ftype[1256];
+    while (fscanf(fi, "%d\t%[^\t]\t%s\n", &fid, fname, ftype) == 3) {
+      this->PushBack(fid, fname, ftype);
+    }
+  }
+  /*!\brief push back feature map */
+  inline void PushBack(int fid, const char *fname, const char *ftype) {
+    utils::Check(fid == static_cast<int>(names_.size()), "invalid fmap format");
+    names_.push_back(std::string(fname));
+    types_.push_back(GetType(ftype));
+  }
+  inline void Clear(void) {
+    names_.clear(); types_.clear();
+  }
+  /*! \brief number of known features */
+  size_t size(void) const {
+    return names_.size();
+  }
+  /*! \brief return name of specific feature */
+  const char* name(size_t idx) const {
+    utils::Assert(idx < names_.size(), "utils::FMap::name feature index exceed bound");
+    return names_[idx].c_str();
+  }
+  /*! \brief return type of specific feature */
+  const Type& type(size_t idx) const {
+    utils::Assert(idx < names_.size(), "utils::FMap::name feature index exceed bound");
+    return types_[idx];
+  }
+
+ private:
+  inline static Type GetType(const char *tname) {
+    if (!strcmp("i", tname)) return kIndicator;
+    if (!strcmp("q", tname)) return kQuantitive;
+    if (!strcmp("int", tname)) return kInteger;
+    if (!strcmp("float", tname)) return kFloat;
+    utils::Error("unknown feature type, use i for indicator and q for quantity");
+    return kIndicator;
+  }
+  /*! \brief name of the feature */
+  std::vector<std::string> names_;
+  /*! \brief type of the feature */
+  std::vector<Type> types_;
+};
+
+}  // namespace utils
+}  // namespace xgboost
+#endif  // XGBOOST_FMAP_H_
--- a/src/utils/io.h
+++ b/src/utils/io.h
@@ -0,0 +1,104 @@
+#ifndef XGBOOST_UTILS_IO_H
+#define XGBOOST_UTILS_IO_H
+#include <cstdio>
+#include <vector>
+#include <string>
+#include "./utils.h"
+/*!
+ * \file io.h
+ * \brief general stream interface for serialization, I/O
+ * \author Tianqi Chen
+ */
+namespace xgboost {
+namespace utils {
+/*!
+ * \brief interface of stream I/O, used to serialize model
+ */
+class IStream {
+ public:
+  /*!
+   * \brief read data from stream
+   * \param ptr pointer to memory buffer
+   * \param size size of block
+   * \return usually is the size of data readed
+   */
+  virtual size_t Read(void *ptr, size_t size) = 0;
+  /*!
+   * \brief write data to stream
+   * \param ptr pointer to memory buffer
+   * \param size size of block
+   */
+  virtual void Write(const void *ptr, size_t size) = 0;
+  /*! \brief virtual destructor */
+  virtual ~IStream(void) {}
+
+ public:
+  // helper functions to write various of data structures
+  /*!
+   * \brief binary serialize a vector 
+   * \param vec vector to be serialized
+   */
+  template<typename T>
+  inline void Write(const std::vector<T> &vec) {
+    uint64_t sz = vec.size();
+    this->Write(&sz, sizeof(sz));
+    this->Write(&vec[0], sizeof(T) * sz);
+  }
+  /*!
+   * \brief binary load a vector 
+   * \param out_vec vector to be loaded
+   * \return whether load is successfull
+   */
+  template<typename T>
+  inline bool Read(std::vector<T> *out_vec) {
+    uint64_t sz;
+    if (this->Read(&sz, sizeof(sz)) == 0) return false;
+    out_vec->resize(sz);
+    if (this->Read(&(*out_vec)[0], sizeof(T) * sz) == 0) return false;
+    return true;
+  }
+  /*!
+   * \brief binary serialize a string
+   * \param str the string to be serialized
+   */ 
+  inline void Write(const std::string &str) {
+    uint64_t sz = str.length();
+    this->Write(&sz, sizeof(sz));
+    this->Write(&str[0], sizeof(char) * sz);
+  }
+  /*!
+   * \brief binary load a string
+   * \param out_str string to be loaded
+   * \return whether load is successful
+   */
+  inline bool Read(std::string *out_str) {
+    uint64_t sz;
+    if (this->Read(&sz, sizeof(sz)) == 0) return false;
+    out_str->resize(sz);
+    if (this->Read(&(*out_str)[0], sizeof(char) * sz) == 0) return false;
+    return true;
+  }
+};
+
+/*! \brief implementation of file i/o stream */
+class FileStream : public IStream {
+ private:
+  FILE *fp;
+ public:
+  explicit FileStream(FILE *fp) {
+    this->fp = fp;
+  }
+  virtual size_t Read(void *ptr, size_t size) {
+    return fread(ptr, size, 1, fp);
+  }
+  virtual void Write(const void *ptr, size_t size) {
+    fwrite(ptr, size, 1, fp);
+  }
+  inline void Close(void) {
+    fclose(fp);
+  }
+};
+
+}  // namespace utils
+}  // namespace xgboost
+#endif
--- a/src/utils/iterator.h
+++ b/src/utils/iterator.h
@@ -0,0 +1,40 @@
+#ifndef XGBOOST_UTILS_ITERATOR_H
+#define XGBOOST_UTILS_ITERATOR_H
+#include <cstdio>
+/*!
+ * \file iterator.h
+ * \brief itertator interface
+ * \author Tianqi Chen
+ */
+namespace xgboost {
+namespace utils {
+/*!
+ * \brief iterator interface
+ * \tparam DType data type
+ */
+template<typename DType>
+class IIterator {
+ public:
+  /*!
+   * \brief set the parameter 
+   * \param name name of parameter
+   * \param val  value of parameter
+   */
+  virtual void SetParam(const char *name, const char *val) = 0;
+  /*! \brief initalize the iterator so that we can use the iterator */
+  virtual void Init(void) = 0;
+  /*! \brief set before first of the item */
+  virtual void BeforeFirst(void) = 0;
+  /*! \brief move to next item */
+  virtual bool Next(void) = 0;
+  /*! \brief get current data */
+  virtual const DType &Value(void) const = 0;
+ public:
+  /*! \brief constructor */
+  virtual ~IIterator(void) {}
+};
+
+}  // namespace utils
+}  // namespace xgboost
+#endif
+
--- a/src/utils/matrix_csr.h
+++ b/src/utils/matrix_csr.h
@@ -0,0 +1,123 @@
+#ifndef XGBOOST_UTILS_MATRIX_CSR_H_
+#define XGBOOST_UTILS_MATRIX_CSR_H_
+/*!
+ * \file matrix_csr.h
+ * \brief this file defines some easy to use STL based class for in memory sparse CSR matrix
+ * \author Tianqi Chen
+ */
+#include <vector>
+#include <algorithm>
+#include "./utils.h"
+
+namespace xgboost {
+namespace utils {
+/*!
+ * \brief a class used to help construct CSR format matrix,
+ *        can be used to convert row major CSR to column major CSR
+ * \tparam IndexType type of index used to store the index position, usually unsigned or size_t
+ * \tparam whether enabling the usage of aclist, this option must be enabled manually
+ */
+template<typename IndexType, bool UseAcList = false>
+struct SparseCSRMBuilder {
+ private:
+  /*! \brief dummy variable used in the indicator matrix construction */
+  std::vector<size_t> dummy_aclist;
+  /*! \brief pointer to each of the row */
+  std::vector<size_t> &rptr;
+  /*! \brief index of nonzero entries in each row */
+  std::vector<IndexType> &findex;
+  /*! \brief a list of active rows, used when many rows are empty */
+  std::vector<size_t> &aclist;
+
+ public:
+  SparseCSRMBuilder(std::vector<size_t> &p_rptr,
+                    std::vector<IndexType> &p_findex)
+      :rptr(p_rptr), findex(p_findex), aclist(dummy_aclist) {
+    Assert(!UseAcList, "enabling bug");
+  }
+  /*! \brief use with caution! rptr must be cleaned before use */
+  SparseCSRMBuilder(std::vector<size_t> &p_rptr,
+                    std::vector<IndexType> &p_findex,
+                    std::vector<size_t> &p_aclist)
+      :rptr(p_rptr), findex(p_findex), aclist(p_aclist) {
+    Assert(UseAcList, "must manually enable the option use aclist");
+  }
+
+ public:
+  /*!
+   * \brief step 1: initialize the number of rows in the data, not necessary exact
+   * \nrows number of rows in the matrix, can be smaller than expected
+   */
+  inline void InitBudget(size_t nrows = 0) {
+    if (!UseAcList) {
+      rptr.clear();
+      rptr.resize(nrows + 1, 0);
+    } else {
+      Assert(nrows + 1 == rptr.size(), "rptr must be initialized already");
+      this->Cleanup();
+    }
+  }
+  /*!
+   * \brief step 2: add budget to each rows, this function is called when aclist is used
+   * \param row_id the id of the row
+   * \param nelem  number of element budget add to this row
+   */
+  inline void AddBudget(size_t row_id, size_t nelem = 1) {
+    if (rptr.size() < row_id + 2) {
+      rptr.resize(row_id + 2, 0);
+    }
+    if (UseAcList) {
+      if (rptr[row_id + 1] == 0) aclist.push_back(row_id);
+    }
+    rptr[row_id + 1] += nelem;
+  }
+  /*! \brief step 3: initialize the necessary storage */
+  inline void InitStorage(void) {
+    // initialize rptr to be beginning of each segment
+    size_t start = 0;
+    if (!UseAcList) {
+      for (size_t i = 1; i < rptr.size(); i++) {
+        size_t rlen = rptr[i];
+        rptr[i] = start;
+        start += rlen;
+      }
+    } else {
+      // case with active list
+      std::sort(aclist.begin(), aclist.end());
+      for (size_t i = 0; i < aclist.size(); i++) {
+        size_t ridx = aclist[i];
+        size_t rlen = rptr[ridx + 1];
+        rptr[ridx + 1] = start;
+        // set previous rptr to right position if previous feature is not active
+        if (i == 0 || ridx != aclist[i - 1] + 1) rptr[ridx] = start;
+        start += rlen;
+      }
+    }
+    findex.resize(start);
+  }
+  /*!
+   * \brief step 4:
+   * used in indicator matrix construction, add new
+   * element to each row, the number of calls shall be exactly same as add_budget
+   */
+  inline void PushElem(size_t row_id, IndexType col_id) {
+    size_t &rp = rptr[row_id + 1];
+    findex[rp++] = col_id;
+  }
+  /*!
+   * \brief step 5: only needed when aclist is used
+   * clean up the rptr for next usage
+   */
+  inline void Cleanup(void) {
+    Assert(UseAcList, "this function can only be called use AcList");
+    for (size_t i = 0; i < aclist.size(); i++) {
+      const size_t ridx = aclist[i];
+      rptr[ridx] = 0; rptr[ridx + 1] = 0;
+    }
+    aclist.clear();
+  }
+};
+
+}  // namespace utils
+}  // namespace xgboost
+#endif
--- a/src/utils/omp.h
+++ b/src/utils/omp.h
@@ -0,0 +1,16 @@
+#ifndef XGBOOST_UTILS_OMP_H_
+#define XGBOOST_UTILS_OMP_H_
+/*!
+ * \file omp.h
+ * \brief header to handle OpenMP compatibility issues
+ * \author Tianqi Chen
+ */
+#if defined(_OPENMP)
+#include <omp.h>
+#else
+#warning "OpenMP is not available, compile to single thread code"
+inline int omp_get_thread_num() { return 0; }
+inline int omp_get_num_threads() { return 1; }
+inline void omp_set_num_threads(int nthread) {}
+#endif
+#endif  // XGBOOST_UTILS_OMP_H_
--- a/src/utils/random.h
+++ b/src/utils/random.h
@@ -0,0 +1,102 @@
+#ifndef XGBOOST_UTILS_RANDOM_H_
+#define XGBOOST_UTILS_RANDOM_H_
+/*!
+ * \file xgboost_random.h
+ * \brief PRNG to support random number generation
+ * \author Tianqi Chen: tianqi.tchen@gmail.com
+ *
+ * Use standard PRNG from stdlib
+ */
+#include <cmath>
+#include <cstdlib>
+#include <vector>
+#include <algorithm>
+#include "./utils.h"
+
+/*! namespace of PRNG */
+namespace xgboost {
+namespace random {
+
+/*! \brief seed the PRNG */
+inline void Seed(uint32_t seed) {
+  srand(seed);
+}
+/*! \brief return a real number uniform in [0,1) */
+inline double NextDouble(void) {
+  return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX)+1.0);
+}
+/*! \brief return a real numer uniform in (0,1) */
+inline double NextDouble2(void) {
+  return (static_cast<double>(rand()) + 1.0) / (static_cast<double>(RAND_MAX)+2.0);
+}
+
+/*! \brief return a random number */
+inline uint32_t NextUInt32(void) {
+  return (uint32_t)rand();
+}
+/*! \brief return a random number in n */
+inline uint32_t NextUInt32(uint32_t n) {
+  return (uint32_t)floor(NextDouble() * n);
+}
+/*! \brief return  x~N(0,1) */
+inline double SampleNormal() {
+  double x, y, s;
+  do {
+    x = 2 * NextDouble2() - 1.0;
+    y = 2 * NextDouble2() - 1.0;
+    s = x*x + y*y;
+  } while (s >= 1.0 || s == 0.0);
+
+  return x * sqrt(-2.0 * log(s) / s);
+}
+
+/*! \brief return iid x,y ~N(0,1) */
+inline void SampleNormal2D(double &xx, double &yy) {
+  double x, y, s;
+  do {
+    x = 2 * NextDouble2() - 1.0;
+    y = 2 * NextDouble2() - 1.0;
+    s = x*x + y*y;
+  } while (s >= 1.0 || s == 0.0);
+  double t = sqrt(-2.0 * log(s) / s);
+  xx = x * t;
+  yy = y * t;
+}
+/*! \brief return  x~N(mu,sigma^2) */
+inline double SampleNormal(double mu, double sigma) {
+  return SampleNormal() * sigma + mu;
+}
+/*! \brief  return 1 with probability p, coin flip */
+inline int SampleBinary(double p) {
+  return NextDouble() < p;
+}
+
+template<typename T>
+inline void Shuffle(T *data, size_t sz) {
+  if (sz == 0) return;
+  for (uint32_t i = (uint32_t)sz - 1; i > 0; i--){
+    std::swap(data[i], data[NextUInt32(i + 1)]);
+  }
+}
+// random shuffle the data inside, require PRNG 
+template<typename T>
+inline void Shuffle(std::vector<T> &data) {
+  Shuffle(&data[0], data.size());
+}
+
+/*! \brief random number generator with independent random number seed*/
+struct Random{
+  /*! \brief set random number seed */
+  inline void Seed(unsigned sd) {
+    this->rseed = sd;
+  }
+  /*! \brief return a real number uniform in [0,1) */
+  inline double RandDouble(void) {
+    return static_cast<double>( rand_r( &rseed ) ) / (static_cast<double>( RAND_MAX )+1.0);
+  }
+  // random number seed
+  unsigned rseed;
+};
+}  // namespace random
+}  // namespace xgboost
+#endif  // XGBOOST_UTILS_RANDOM_H_
--- a/src/utils/utils.h
+++ b/src/utils/utils.h
@@ -0,0 +1,94 @@
+#ifndef XGBOOST_UTILS_UTILS_H_
+#define XGBOOST_UTILS_UTILS_H_
+/*!
+ * \file utils.h
+ * \brief simple utils to support the code
+ * \author Tianqi Chen
+ */
+#define _CRT_SECURE_NO_WARNINGS
+#ifdef _MSC_VER
+#define fopen64 fopen
+#else
+#ifdef _FILE_OFFSET_BITS
+#if _FILE_OFFSET_BITS == 32
+#warning "FILE OFFSET BITS defined to be 32 bit"
+#endif
+#endif
+
+#ifdef __APPLE__
+#define off64_t off_t
+#define fopen64 fopen
+#endif
+
+#define _FILE_OFFSET_BITS 64
+extern "C" {
+#include <sys/types.h>
+};
+#endif
+
+#ifdef _MSC_VER
+typedef unsigned char uint8_t;
+typedef unsigned short int uint16_t;
+typedef unsigned int uint32_t;
+typedef unsigned long uint64_t;
+typedef long int64_t;
+#else
+#include <inttypes.h>
+#endif
+
+
+#include <cstdio>
+#include <cstdarg>
+#include <cstdlib>
+
+namespace xgboost {
+/*! \brief namespace for helper utils of the project */
+namespace utils {
+
+/*! \brief assert an condition is true, use this to handle debug information */
+inline void Assert(bool exp, const char *fmt, ...) {
+  if (!exp) {
+    va_list args;
+    va_start(args, fmt);
+    fprintf(stderr, "AssertError:");
+    vfprintf(stderr, fmt, args);
+    va_end(args);
+    fprintf(stderr, "\n");
+    exit(-1);
+  }
+}
+
+/*!\brief same as assert, but this is intended to be used as message for user*/
+inline void Check(bool exp, const char *fmt, ...) {
+  if (!exp) {
+    va_list args;
+    va_start(args, fmt);
+    vfprintf(stderr, fmt, args);
+    va_end(args);
+    fprintf(stderr, "\n");
+    exit(-1);
+  }
+}
+
+/*! \brief report error message, same as check */
+inline void Error(const char *fmt, ...) {
+  {
+    va_list args;
+    va_start(args, fmt);
+    vfprintf(stderr, fmt, args);
+    va_end(args);
+    fprintf(stderr, "\n");
+    exit(-1);
+  }
+}
+
+/*! \brief replace fopen, report error when the file open fails */
+inline FILE *FopenCheck(const char *fname, const char *flag) {
+  FILE *fp = fopen64(fname, flag);
+  Check(fp != NULL, "can not open file \"%s\"\n", fname);
+  return fp;
+}
+
+}  // namespace utils
+}  // namespace xgboost
+#endif  // XGBOOST_UTILS_UTILS_H_