start unity refactor

2014-08-15 20:15:58 -07:00
parent 5b215742c2
commit 2a92c82b92
49 changed files with 3659 additions and 5803 deletions
--- a/learner/dmatrix.h
+++ b/learner/dmatrix.h
@@ -0,0 +1,84 @@
+#ifndef XGBOOST_LEARNER_DMATRIX_H_
+#define XGBOOST_LEARNER_DMATRIX_H_
+/*!
+ * \file dmatrix.h
+ * \brief meta data and template data structure 
+ *        used for regression/classification/ranking
+ * \author Tianqi Chen
+ */
+#include "../data.h"
+
+namespace xgboost {
+namespace learner {
+/*! 
+ * \brief meta information needed in training, including label, weight
+ */
+struct MetaInfo {
+  /*! \brief label of each instance */
+  std::vector<float> labels;
+  /*!
+   * \brief the index of begin and end of a group
+   * needed when the learning task is ranking
+   */
+  std::vector<bst_uint> group_ptr;
+  /*! \brief weights of each instance, optional */
+  std::vector<float> weights;
+  /*!
+   * \brief specified root index of each instance,
+   *  can be used for multi task setting
+   */
+  std::vector<unsigned> root_index;
+  /*! \brief get weight of each instances */
+  inline float GetWeight(size_t i) const {
+    if(weights.size() != 0) {
+      return weights[i];
+    } else {
+      return 1.0f;
+    }
+  }
+  /*! \brief get root index of i-th instance */
+  inline float GetRoot(size_t i) const {
+    if(root_index.size() != 0) {
+      return static_cast<float>(root_index[i]);
+    } else {
+      return 0;
+    }
+  }
+  inline void SaveBinary(utils::IStream &fo) {
+    fo.Write(labels);
+    fo.Write(group_ptr);
+    fo.Write(weights);
+    fo.Write(root_index);
+  }
+  inline void LoadBinary(utils::IStream &fi) {
+    utils::Check(fi.Read(&labels), "MetaInfo: invalid format");
+    utils::Check(fi.Read(&group_ptr), "MetaInfo: invalid format");
+    utils::Check(fi.Read(&weights), "MetaInfo: invalid format");
+    utils::Check(fi.Read(&root_index), "MetaInfo: invalid format");
+  }
+};
+
+/*! 
+ * \brief data object used for learning,
+ * \tparam FMatrix type of feature data source
+ */
+template<typename FMatrix>
+struct DMatrix {
+  /*! \brief meta information about the dataset */
+  MetaInfo info;
+  /*! \brief number of rows in the DMatrix */
+  size_t num_row;
+  /*! \brief feature matrix about data content */
+  FMatrix fmat;
+  /*! 
+   * \brief cache pointer to verify if the data structure is cached in some learner
+   *  used to verify if DMatrix is cached
+   */
+  void *cache_learner_ptr_;
+  /*! \brief default constructor */
+  DMatrix(void) : cache_learner_ptr_(NULL) {}
+};
+
+}  // namespace learner
+}  // namespace xgboost
+#endif  // XGBOOST_LEARNER_DMATRIX_H_
--- a/learner/evaluation-inl.hpp
+++ b/learner/evaluation-inl.hpp
@@ -0,0 +1,346 @@
+#ifndef XGBOOST_LEARNER_EVALUATION_INL_HPP_
+#define XGBOOST_LEARNER_EVALUATION_INL_HPP_
+/*!
+* \file xgboost_evaluation-inl.hpp
+* \brief evaluation metrics for regression and classification and rank
+* \author Kailong Chen, Tianqi Chen
+*/
+#include <vector>
+#include <utility>
+#include <string>
+#include <climits>
+#include <algorithm>
+#include "./evaluation.h"
+#include "./helper_utils.h"
+
+namespace xgboost {
+namespace learner {
+/*! 
+ * \brief base class of elementwise evaluation 
+ * \tparam Derived the name of subclass
+ */
+template<typename Derived>
+struct EvalEWiseBase : public IEvaluator {
+  virtual float Eval(const std::vector<float> &preds,
+                     const MetaInfo &info) const {
+    utils::Check(preds.size() == info.labels.size(),
+                 "label and prediction size not match");
+    const unsigned ndata = static_cast<unsigned>(preds.size());
+    float sum = 0.0, wsum = 0.0;
+    #pragma omp parallel for reduction(+:sum, wsum) schedule(static)
+    for (unsigned i = 0; i < ndata; ++i) {
+      const float wt = info.GetWeight(i);
+      sum += Derived::EvalRow(info.labels[i], preds[i]) * wt;
+      wsum += wt;
+    }
+    return Derived::GetFinal(sum, wsum);
+  }
+  /*! 
+   * \brief to be implemented by subclass, 
+   *   get evaluation result from one row 
+   * \param label label of current instance
+   * \param pred prediction value of current instance
+   * \param weight weight of current instance
+   */
+  inline static float EvalRow(float label, float pred);
+  /*! 
+   * \brief to be overide by subclas, final trasnformation 
+   * \param esum the sum statistics returned by EvalRow
+   * \param wsum sum of weight
+   */
+  inline static float GetFinal(float esum, float wsum) {
+    return esum / wsum;
+  }
+};
+
+/*! \brief RMSE */
+struct EvalRMSE : public EvalEWiseBase<EvalRMSE> {
+  virtual const char *Name(void) const {
+    return "rmse";
+  }
+  inline static float EvalRow(float label, float pred) {
+    float diff = label - pred;
+    return diff * diff;
+  }
+  inline static float GetFinal(float esum, float wsum) {
+    return std::sqrt(esum / wsum);
+  }
+};
+
+/*! \brief logloss */
+struct EvalLogLoss : public EvalEWiseBase<EvalLogLoss> {
+  virtual const char *Name(void) const {
+    return "logloss";
+  }
+  inline static float EvalRow(float y, float py) {
+    return - y * std::log(py) - (1.0f - y) * std::log(1 - py);
+  }
+};
+
+/*! \brief error */
+struct EvalError : public EvalEWiseBase<EvalError> {
+  virtual const char *Name(void) const {
+    return "error";
+  }
+  inline static float EvalRow(float label, float pred) {
+    // assume label is in [0,1]
+    return pred > 0.5f ? 1.0f - label : label;
+  }
+};
+
+/*! \brief match error */
+struct EvalMatchError : public EvalEWiseBase<EvalMatchError> {
+  virtual const char *Name(void) const {
+    return "merror";
+  }
+  inline static float EvalRow(float label, float pred) {
+    return static_cast<int>(pred) != static_cast<int>(label);
+  }
+};
+
+/*! \brief AMS: also records best threshold */
+struct EvalAMS : public IEvaluator {
+ public:
+  explicit EvalAMS(const char *name) {
+    name_ = name;
+    // note: ams@0 will automatically select which ratio to go
+    utils::Check(sscanf(name, "ams@%f", &ratio_) == 1, "invalid ams format");
+  }
+  virtual float Eval(const std::vector<float> &preds,
+                     const MetaInfo &info) const {
+    const unsigned ndata = static_cast<unsigned>(preds.size());
+    utils::Check(info.weights.size() == ndata, "we need weight to evaluate ams");
+    std::vector< std::pair<float, unsigned> > rec(ndata);
+
+    #pragma omp parallel for schedule(static)
+    for (unsigned i = 0; i < ndata; ++i) {
+      rec[i] = std::make_pair(preds[i], i);
+    }
+    std::sort(rec.begin(), rec.end(), CmpFirst);
+    unsigned ntop = static_cast<unsigned>(ratio_ * ndata);
+    if (ntop == 0) ntop = ndata;
+    const double br = 10.0;
+    unsigned thresindex = 0;
+    double s_tp = 0.0, b_fp = 0.0, tams = 0.0;
+    for (unsigned i = 0; i < ndata-1 && i < ntop; ++i) {
+      const unsigned ridx = rec[i].second;
+      const float wt = info.weights[ridx];
+      if (info.labels[ridx] > 0.5f) {
+        s_tp += wt;
+      } else {
+        b_fp += wt;
+      }
+      if (rec[i].first != rec[i+1].first) {
+        double ams = sqrtf(2*((s_tp+b_fp+br) * log(1.0 + s_tp/(b_fp+br)) - s_tp));
+        if (tams < ams) {
+          thresindex = i;
+          tams = ams;
+        }
+      }
+    }
+    if (ntop == ndata) {
+      fprintf(stderr, "\tams-ratio=%g", static_cast<float>(thresindex) / ndata);
+      return tams;
+    } else {
+      return sqrtf(2*((s_tp+b_fp+br) * log(1.0 + s_tp/(b_fp+br)) - s_tp));
+    }
+  }
+  virtual const char *Name(void) const {
+    return name_.c_str();
+  }
+
+ private:
+  std::string name_;
+  float ratio_;
+};
+
+/*! \brief Area under curve, for both classification and rank */
+struct EvalAuc : public IEvaluator {
+  virtual float Eval(const std::vector<float> &preds,
+                     const MetaInfo &info) const {
+    utils::Check(preds.size() == info.labels.size(), "label size predict size not match");
+    std::vector<unsigned> tgptr(2, 0); tgptr[1] = preds.size();
+    const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
+    utils::Check(gptr.back() == preds.size(),
+                 "EvalAuc: group structure must match number of prediction");
+    const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
+    // sum statictis
+    double sum_auc = 0.0f;
+    #pragma omp parallel reduction(+:sum_auc)
+    {
+      // each thread takes a local rec
+      std::vector< std::pair<float, unsigned> > rec;
+      #pragma omp for schedule(static)
+      for (unsigned k = 0; k < ngroup; ++k) {
+        rec.clear();
+        for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
+          rec.push_back(std::make_pair(preds[j], j));
+        }
+        std::sort(rec.begin(), rec.end(), CmpFirst);
+        // calculate AUC
+        double sum_pospair = 0.0;
+        double sum_npos = 0.0, sum_nneg = 0.0, buf_pos = 0.0, buf_neg = 0.0;
+        for (size_t j = 0; j < rec.size(); ++j) {
+          const float wt = info.GetWeight(rec[j].second);
+          const float ctr = info.labels[rec[j].second];
+          // keep bucketing predictions in same bucket
+          if (j != 0 && rec[j].first != rec[j - 1].first) {
+            sum_pospair += buf_neg * (sum_npos + buf_pos *0.5);
+            sum_npos += buf_pos; sum_nneg += buf_neg;
+            buf_neg = buf_pos = 0.0f;
+          }
+          buf_pos += ctr * wt; buf_neg += (1.0f - ctr) * wt;
+        }
+        sum_pospair += buf_neg * (sum_npos + buf_pos *0.5);
+        sum_npos += buf_pos; sum_nneg += buf_neg;
+        // check weird conditions
+        utils::Check(sum_npos > 0.0 && sum_nneg > 0.0,
+                     "AUC: the dataset only contains pos or neg samples");
+        // this is the AUC
+        sum_auc += sum_pospair / (sum_npos*sum_nneg);
+      }
+    }
+    // return average AUC over list
+    return static_cast<float>(sum_auc) / ngroup;
+  }
+  virtual const char *Name(void) const {
+    return "auc";
+  }
+};
+
+/*! \brief Evaluate rank list */
+struct EvalRankList : public IEvaluator {
+ public:
+  virtual float Eval(const std::vector<float> &preds,
+                     const MetaInfo &info) const {
+    utils::Check(preds.size() == info.labels.size(),
+                  "label size predict size not match");
+    const std::vector<unsigned> &gptr = info.group_ptr;
+    utils::Assert(gptr.size() != 0, "must specify group when constructing rank file");
+    utils::Assert(gptr.back() == preds.size(),
+                   "EvalRanklist: group structure must match number of prediction");
+    const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
+    // sum statistics
+    double sum_metric = 0.0f;
+    #pragma omp parallel reduction(+:sum_metric)
+    {
+      // each thread takes a local rec
+      std::vector< std::pair<float, unsigned> > rec;
+      #pragma omp for schedule(static)
+      for (unsigned k = 0; k < ngroup; ++k) {
+        rec.clear();
+        for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
+          rec.push_back(std::make_pair(preds[j], static_cast<int>(info.labels[j])));
+        }
+        sum_metric += this->EvalMetric(rec);
+      }
+    }
+    return static_cast<float>(sum_metric) / ngroup;
+  }
+  virtual const char *Name(void) const {
+    return name_.c_str();
+  }
+
+ protected:
+  explicit EvalRankList(const char *name) {
+    name_ = name;
+    minus_ = false;
+    if (sscanf(name, "%*[^@]@%u[-]?", &topn_) != 1) {
+      topn_ = UINT_MAX;
+    }    
+    if (name[strlen(name) - 1] == '-') {
+      minus_ = true;
+    }
+  }
+  /*! \return evaluation metric, given the pair_sort record, (pred,label) */
+  virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &pair_sort) const = 0;
+
+ protected:
+  unsigned topn_;
+  std::string name_;
+  bool minus_;
+};
+
+/*! \brief Precison at N, for both classification and rank */
+struct EvalPrecision : public EvalRankList{
+ public:
+  explicit EvalPrecision(const char *name) : EvalRankList(name) {}
+
+ protected:
+  virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &rec) const {
+    // calculate Preicsion
+    std::sort(rec.begin(), rec.end(), CmpFirst);
+    unsigned nhit = 0;
+    for (size_t j = 0; j < rec.size() && j < this->topn_; ++j) {
+      nhit += (rec[j].second != 0);
+    }
+    return static_cast<float>(nhit) / topn_;
+  }
+};
+
+/*! \brief NDCG */
+struct EvalNDCG : public EvalRankList{
+ public:
+  explicit EvalNDCG(const char *name) : EvalRankList(name) {}
+
+ protected:
+  inline float CalcDCG(const std::vector< std::pair<float, unsigned> > &rec) const {
+    double sumdcg = 0.0;
+    for (size_t i = 0; i < rec.size() && i < this->topn_; ++i) {
+      const unsigned rel = rec[i].second;
+      if (rel != 0) { 
+        sumdcg += ((1 << rel) - 1) / logf(i + 2);
+      }
+    }
+    return static_cast<float>(sumdcg);
+  }
+  virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &rec) const {
+    std::stable_sort(rec.begin(), rec.end(), CmpFirst);
+    float dcg = this->CalcDCG(rec);
+    std::stable_sort(rec.begin(), rec.end(), CmpSecond);
+    float idcg = this->CalcDCG(rec);
+    if (idcg == 0.0f) {
+      if (minus_) {
+        return 0.0f;
+      } else {
+        return 1.0f;
+      }
+    }
+    return dcg/idcg;
+  }
+};
+
+/*! \brief Precison at N, for both classification and rank */
+struct EvalMAP : public EvalRankList {
+ public:
+  explicit EvalMAP(const char *name) : EvalRankList(name) {}
+
+ protected:
+  virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &rec) const {
+    std::sort(rec.begin(), rec.end(), CmpFirst);
+    unsigned nhits = 0;
+    double sumap = 0.0;
+    for (size_t i = 0; i < rec.size(); ++i) {
+      if (rec[i].second != 0) {
+        nhits += 1;
+        if (i < this->topn_) {
+          sumap += static_cast<float>(nhits) / (i+1);
+        }
+      }
+    }
+    if (nhits != 0) {
+      sumap /= nhits;
+      return static_cast<float>(sumap);
+    } else {
+      if (minus_) {
+        return 0.0f;
+      } else {
+        return 1.0f;
+      }
+    }
+  }
+};
+
+}  // namespace learner
+}  // namespace xgboost
+#endif  // XGBOOST_LEARNER_EVALUATION_INL_HPP_
--- a/learner/evaluation.h
+++ b/learner/evaluation.h
@@ -0,0 +1,82 @@
+#ifndef XGBOOST_LEARNER_EVALUATION_H_
+#define XGBOOST_LEARNER_EVALUATION_H_
+/*!
+ * \file evaluation.h
+ * \brief interface of evaluation function supported in xgboost
+ * \author Tianqi Chen, Kailong Chen
+ */
+#include <string>
+#include <vector>
+#include "../utils/utils.h"
+
+namespace xgboost {
+namespace learner {
+/*! \brief evaluator that evaluates the loss metrics */
+struct IEvaluator{
+  /*!
+   * \brief evaluate a specific metric
+   * \param preds prediction
+   * \param info information, including label etc.
+   */
+  virtual float Eval(const std::vector<float> &preds,
+                     const MetaInfo &info) const = 0;
+  /*! \return name of metric */
+  virtual const char *Name(void) const = 0;
+  /*! \brief virtual destructor */
+  virtual ~IEvaluator(void) {}
+};
+}  // namespace learner
+}  // namespace xgboost
+
+// include implementations of evaluation functions
+#include "evaluation-inl.hpp"
+// factory function
+namespace xgboost {
+namespace learner {
+inline IEvaluator* CreateEvaluator(const char *name) {
+  if (!strcmp(name, "rmse")) return new EvalRMSE();
+  if (!strcmp(name, "error")) return new EvalError();
+  if (!strcmp(name, "merror")) return new EvalMatchError();
+  if (!strcmp(name, "logloss")) return new EvalLogLoss();
+  if (!strcmp(name, "auc")) return new EvalAuc();
+  if (!strncmp(name, "ams@",4)) return new EvalAMS(name);
+  if (!strncmp(name, "pre@", 4)) return new EvalPrecision(name);
+  if (!strncmp(name, "map", 3)) return new EvalMAP(name);
+  if (!strncmp(name, "ndcg", 3)) return new EvalNDCG(name);
+  utils::Error("unknown evaluation metric type: %s", name);
+  return NULL;
+}
+
+/*! \brief a set of evaluators */
+class EvalSet{
+ public:
+  inline void AddEval(const char *name) {
+    for (size_t i = 0; i < evals_.size(); ++i) {
+      if (!strcmp(name, evals_[i]->Name())) return;
+    }
+    evals_.push_back(CreateEvaluator(name));
+  }
+  ~EvalSet(void) {
+    for (size_t i = 0; i < evals_.size(); ++i) {
+      delete evals_[i];
+    }
+  }
+  inline std::string Eval(const char *evname,
+                          const std::vector<float> &preds,
+                          const MetaInfo &info) const {
+    std::string result = "";
+    for (size_t i = 0; i < evals_.size(); ++i) {
+      float res = evals_[i]->Eval(preds, info);
+      char tmp[1024];
+      snprintf(tmp, sizeof(tmp), "\t%s-%s:%f", evname, evals_[i]->Name(), res);
+      result += tmp;
+    }
+    return result;
+  }
+
+ private:
+  std::vector<const IEvaluator*> evals_;
+};
+}  // namespace learner
+}  // namespace xgboost
+#endif  // XGBOOST_LEARNER_EVALUATION_H_
--- a/learner/helper_utils.h
+++ b/learner/helper_utils.h
@@ -0,0 +1,50 @@
+#ifndef XGBOOST_LEARNER_HELPER_UTILS_H_
+#define XGBOOST_LEARNER_HELPER_UTILS_H_
+/*!
+ * \file helper_utils.h
+ * \brief useful helper functions
+ * \author Tianqi Chen, Kailong Chen
+ */
+#include <utility>
+#include <vector>
+#include <algorithm>
+namespace xgboost {
+namespace learner {
+// simple helper function to do softmax
+inline static void Softmax(std::vector<float>* p_rec) {
+  std::vector<float> &rec = *p_rec;
+  float wmax = rec[0];
+  for (size_t i = 1; i < rec.size(); ++i) {
+    wmax = std::max(rec[i], wmax);
+  }
+  double wsum = 0.0f;
+  for (size_t i = 0; i < rec.size(); ++i) {
+    rec[i] = std::exp(rec[i]-wmax);
+    wsum += rec[i];
+  }
+  for (size_t i = 0; i < rec.size(); ++i) {
+    rec[i] /= static_cast<float>(wsum);
+  }
+}
+// simple helper function to do softmax
+inline static int FindMaxIndex(const std::vector<float>& rec) {
+  size_t mxid = 0;
+  for (size_t i = 1; i < rec.size(); ++i) {
+    if (rec[i] > rec[mxid] + 1e-6f) {
+      mxid = i;
+    }
+  }
+  return static_cast<int>(mxid);
+}
+
+inline static bool CmpFirst(const std::pair<float, unsigned> &a,
+                            const std::pair<float, unsigned> &b) {
+  return a.first > b.first;
+}
+inline static bool CmpSecond(const std::pair<float, unsigned> &a,
+                             const std::pair<float, unsigned> &b) {
+  return a.second > b.second;
+}
+}  // namespace learner
+}  // namespace xgboost
+#endif  // XGBOOST_LEARNER_HELPER_UTILS_H_
--- a/learner/learner-inl.hpp
+++ b/learner/learner-inl.hpp
@@ -0,0 +1,296 @@
+#ifndef XGBOOST_LEARNER_LEARNER_INL_HPP_
+#define XGBOOST_LEARNER_LEARNER_INL_HPP_
+/*!
+ * \file learner-inl.hpp
+ * \brief learning algorithm 
+ * \author Tianqi Chen
+ */
+#include <algorithm>
+#include <vector>
+#include <utility>
+#include <string>
+#include "./objective.h"
+#include "./evaluation.h"
+#include "../gbm/gbm.h"
+
+namespace xgboost {
+/*! \brief namespace for learning algorithm */
+namespace learner {
+/*! 
+ * \brief learner that takes do gradient boosting on specific objective functions
+ *  and do training and prediction
+ */
+template<typename FMatrix>
+class BoostLearner {
+ public:
+  BoostLearner(void) {
+    obj_ = NULL;
+    gbm_ = NULL;
+    name_obj_ = "reg:linear";
+    name_gbm_ = "gbtree";
+  }
+  ~BoostLearner(void) {
+    if (obj_ != NULL) delete obj_;
+    if (gbm_ != NULL) delete gbm_;
+  }
+  /*!
+   * \brief add internal cache space for mat, this can speedup prediction for matrix,
+   *        please cache prediction for training and eval data
+   *    warning: if the model is loaded from file from some previous training history
+   *             set cache data must be called with exactly SAME 
+   *             data matrices to continue training otherwise it will cause error
+   * \param mats array of pointers to matrix whose prediction result need to be cached
+   */          
+  inline void SetCacheData(const std::vector<DMatrix<FMatrix>*>& mats) {
+    // estimate feature bound
+    unsigned num_feature = 0;
+    // assign buffer index
+    size_t buffer_size = 0;
+    utils::Assert(cache_.size() == 0, "can only call cache data once");
+    for (size_t i = 0; i < mats.size(); ++i) {
+      bool dupilicate = false;
+      for (size_t j = 0; j < i; ++j) {
+        if (mats[i] == mats[j]) dupilicate = true;
+      }
+      if (dupilicate) continue;
+      // set mats[i]'s cache learner pointer to this
+      mats[i]->cache_learner_ptr_ = this;
+      cache_.push_back(CacheEntry(mats[i], buffer_size, mats[i]->num_row));
+      buffer_size += mats[i]->num_row;
+      num_feature = std::max(num_feature, static_cast<unsigned>(mats[i]->num_col));
+    }
+    char str_temp[25];
+    if (num_feature > mparam.num_feature) {
+      snprintf(str_temp, sizeof(str_temp), "%u", num_feature);
+      this->SetParam("bst:num_feature", str_temp);
+    }
+    snprintf(str_temp, sizeof(str_temp), "%lu", buffer_size);
+    this->SetParam("num_pbuffer", str_temp);
+    if (!silent) {
+      printf("buffer_size=%ld\n", buffer_size);
+    }
+  }
+  /*!
+   * \brief set parameters from outside
+   * \param name name of the parameter
+   * \param val  value of the parameter
+   */
+  inline void SetParam(const char *name, const char *val) {
+    if (!strcmp(name, "silent")) silent = atoi(val);
+    if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val);
+    if (gbm_ == NULL) {
+      if (!strcmp(name, "objective")) name_obj_ = val;
+      if (!strcmp(name, "booster")) name_gbm_ = val;
+      mparam.SetParam(name, val);
+    }
+    cfg_.push_back(std::make_pair(std::string(name), std::string(val)));
+  }
+  /*!
+   * \brief initialize the model
+   */
+  inline void InitModel(void) {
+    this->InitObjGBM();
+    // adapt the base score
+    mparam.base_score = obj_->ProbToMargin(mparam.base_score);
+    gbm_->InitModel();
+  }
+  /*!
+   * \brief load model from stream
+   * \param fi input stream
+   */
+  inline void LoadModel(utils::IStream &fi) {
+    utils::Check(fi.Read(&mparam, sizeof(ModelParam)) != 0,
+                 "BoostLearner: wrong model format");
+    utils::Check(fi.Read(&name_obj_), "BoostLearner: wrong model format");
+    utils::Check(fi.Read(&name_gbm_), "BoostLearner: wrong model format");
+    // delete existing gbm if any
+    if (obj_ != NULL) delete obj_;
+    if (gbm_ != NULL) delete gbm_;
+    this->InitObjGBM();
+    gbm_->LoadModel(fi);
+  }
+  /*!
+   * \brief load model from file
+   * \param fname file name
+   */
+  inline void LoadModel(const char *fname) {
+    utils::FileStream fi(utils::FopenCheck(fname, "rb"));
+    this->LoadModel(fi);
+    fi.Close();
+  }
+  inline void SaveModel(utils::IStream &fo) const {
+    fo.Write(&mparam, sizeof(ModelParam));
+    fo.Write(&name_obj_);
+    fo.Write(&name_gbm_);
+    gbm_->SaveModel(fo);
+  }
+  /*!
+   * \brief save model into file
+   * \param fname file name
+   */
+  inline void SaveModel(const char *fname) const {
+    utils::FileStream fo(utils::FopenCheck(fname, "wb"));
+    this->SaveModel(fo);
+    fo.Close();
+  }  
+  /*!
+   * \brief update the model for one iteration
+   * \param iter current iteration number
+   * \param p_train pointer to the data matrix
+   */
+  inline void UpdateOneIter(int iter, DMatrix<FMatrix> *p_train) {
+    this->PredictRaw(preds_, *p_train);
+    obj_->GetGradient(preds_, p_train->info, iter, &gpair_);
+    gbm_->DoBoost(gpair_, p_train->fmat, p_train->info.root_index);
+  }
+  /*!
+   * \brief evaluate the model for specific iteration
+   * \param iter iteration number
+   * \param evals datas i want to evaluate
+   * \param evname name of each dataset
+   * \return a string corresponding to the evaluation result
+   */
+  inline std::string EvalOneIter(int iter,
+                                 const std::vector<const DMatrix<FMatrix>*> &evals,
+                                 const std::vector<std::string> &evname) {
+    std::string res;
+    char tmp[256];
+    snprintf(tmp, sizeof(tmp), "[%d]", iter);
+    res = tmp;
+    for (size_t i = 0; i < evals.size(); ++i) {
+      this->PredictRaw(*evals[i], &preds_);
+      obj_->EvalTransform(&preds_);
+      res += evaluator_.Eval(evname[i].c_str(), preds_, evals[i]->info);
+    }
+    return res;
+  }
+  /*!
+   * \brief simple evaluation function, with a specified metric
+   * \param data input data
+   * \param metric name of metric
+   * \return a pair of <evaluation name, result>
+   */
+  std::pair<std::string, float> Evaluate(const DMatrix<FMatrix> &data, std::string metric) {
+    if (metric == "auto") metric = obj_->DefaultEvalMetric();
+    IEvaluator *ev = CreateEvaluator(metric.c_str());
+    this->PredictRaw(data, &preds_);
+    obj_->EvalTransform(&preds_);
+    float res = ev->Eval(preds_, data.info);
+    delete ev;
+    return std::make_pair(metric, res);
+  }
+  /*!
+   * \brief get prediction
+   * \param data input data
+   * \param out_preds output vector that stores the prediction
+   */
+  inline void Predict(const DMatrix<FMatrix> &data,
+                      std::vector<float> *out_preds) const {
+    this->PredictRaw(data, out_preds);
+    obj_->PredTransform(out_preds);
+  }
+
+ protected:
+  /*! 
+   * \brief initialize the objective function and GBM, 
+   * if not yet done
+   */
+  inline void InitObjGBM(void) {
+    if (obj_ != NULL) return;
+    utils::Assert(gbm_ == NULL, "GBM and obj should be NULL");
+    obj_ = CreateObjFunction(name_obj_.c_str());
+    gbm_ = gbm::CreateGradBooster<FMatrix>(name_gbm_.c_str());
+    for (size_t i = 0; i < cfg_.size(); ++i) {
+      obj_->SetParam(cfg_[i].first.c_str(), cfg_[i].second.c_str());
+      gbm_->SetParam(cfg_[i].first.c_str(), cfg_[i].second.c_str());
+    }
+    evaluator_.AddEval(obj_->DefaultEvalMetric());
+  }
+  /*! 
+   * \brief get un-transformed prediction
+   * \param data training data matrix
+   * \param out_preds output vector that stores the prediction
+   */
+  inline void PredictRaw(const DMatrix<FMatrix> &data,
+                         std::vector<float> *out_preds) {
+    gbm_->Predict(data.fmat, this->FindBufferOffset(data),
+                  data.info, out_preds);
+  }
+
+  /*! \brief training parameter for regression */
+  struct ModelParam{
+    /* \brief global bias */
+    float base_score;
+    /* \brief number of features  */
+    unsigned num_feature;
+    /* \brief number of class, if it is multi-class classification  */
+    int num_class;
+    /*! \brief reserved field */
+    int reserved[32];
+    /*! \brief constructor */
+    ModelParam(void) {
+      base_score = 0.5f;
+      num_feature = 0;
+      num_class = 0;
+      memset(reserved, 0, sizeof(reserved));
+    }
+    /*!
+     * \brief set parameters from outside
+     * \param name name of the parameter
+     * \param val value of the parameter
+     */
+    inline void SetParam(const char *name, const char *val) {
+      if (!strcmp("base_score", name)) base_score = static_cast<float>(atof(val));
+      if (!strcmp("num_class", name)) num_class = atoi(val);
+      if (!strcmp("bst:num_feature", name)) num_feature = atoi(val);
+    }
+  };
+  // data fields
+  // silent during training
+  int silent;
+  // evaluation set
+  EvalSet evaluator_;
+  // model parameter
+  ModelParam   mparam;
+  // gbm model that back everything
+  gbm::IGradBooster<FMatrix> *gbm_;
+  // name of gbm model used for training
+  std::string name_gbm_;
+  // objective fnction
+  IObjFunction *obj_;
+  // name of objective function
+  std::string name_obj_;
+  // configurations
+  std::vector< std::pair<std::string, std::string> > cfg_;
+  // temporal storages for prediciton
+  std::vector<float> preds_;
+  // gradient pairs
+  std::vector<bst_gpair> gpair_;
+
+ private:
+  // cache entry object that helps handle feature caching
+  struct CacheEntry {
+    const DMatrix<FMatrix> *mat_;
+    size_t buffer_offset_;
+    size_t num_row_;
+    CacheEntry(const DMatrix<FMatrix> *mat, size_t buffer_offset, size_t num_row)
+        :mat_(mat), buffer_offset_(buffer_offset), num_row_(num_row) {}
+  };
+  // find internal bufer offset for certain matrix, if not exist, return -1
+  inline int64_t FindBufferOffset(const DMatrix<FMatrix> &mat) const {
+    for (size_t i = 0; i < cache_.size(); ++i) {
+      if (cache_[i].mat_ == &mat && mat.cache_learner_ptr_ == this) {
+        if (cache_[i].num_row_ == mat.num_row) {
+          return cache_[i].buffer_offset_;
+        }
+      }
+    }
+    return -1;
+  }
+  // data structure field
+  /*! \brief the entries indicates that we have internal prediction cache */
+  std::vector<CacheEntry> cache_;
+};
+}  // namespace learner
+}  // namespace xgboost
+#endif  // XGBOOST_LEARNER_LEARNER_INL_HPP_
--- a/learner/objective-inl.hpp
+++ b/learner/objective-inl.hpp
@@ -0,0 +1,137 @@
+#ifndef XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
+#define XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
+/*!
+ * \file objective-inl.hpp
+ * \brief objective function implementations
+ * \author Tianqi Chen, Kailong Chen
+ */
+#include <vector>
+#include "./objective.h"
+
+namespace xgboost {
+namespace learner {
+/*! \brief defines functions to calculate some commonly used functions */
+struct LossType {
+  /*! \brief indicate which type we are using */
+  int loss_type;
+  // list of constants
+  static const int kLinearSquare = 0;
+  static const int kLogisticNeglik = 1;
+  static const int kLogisticClassify = 2;
+  static const int kLogisticRaw = 3;
+  /*!
+   * \brief transform the linear sum to prediction
+   * \param x linear sum of boosting ensemble
+   * \return transformed prediction
+   */
+  inline float PredTransform(float x) const {
+    switch (loss_type) {
+      case kLogisticRaw:
+      case kLinearSquare: return x;
+      case kLogisticClassify:
+      case kLogisticNeglik: return 1.0f / (1.0f + expf(-x));
+      default: utils::Error("unknown loss_type"); return 0.0f;
+    }
+  }
+  /*!
+   * \brief calculate first order gradient of loss, given transformed prediction
+   * \param predt transformed prediction
+   * \param label true label
+   * \return first order gradient
+   */
+  inline float FirstOrderGradient(float predt, float label) const {
+    switch (loss_type) {
+      case kLinearSquare: return predt - label;
+      case kLogisticRaw: predt = 1.0f / (1.0f + expf(-predt));
+      case kLogisticClassify:
+      case kLogisticNeglik: return predt - label;
+      default: utils::Error("unknown loss_type"); return 0.0f;
+    }
+  }
+  /*!
+   * \brief calculate second order gradient of loss, given transformed prediction
+   * \param predt transformed prediction
+   * \param label true label
+   * \return second order gradient
+   */
+  inline float SecondOrderGradient(float predt, float label) const {
+    switch (loss_type) {
+      case kLinearSquare: return 1.0f;
+      case kLogisticRaw: predt = 1.0f / (1.0f + expf(-predt));
+      case kLogisticClassify:
+      case kLogisticNeglik: return predt * (1 - predt);
+      default: utils::Error("unknown loss_type"); return 0.0f;
+    }
+  }
+  /*!
+   * \brief transform probability value back to margin
+   */
+  inline float ProbToMargin(float base_score) const {
+    if (loss_type == kLogisticRaw ||
+        loss_type == kLogisticClassify ||
+        loss_type == kLogisticNeglik ) {
+      utils::Check(base_score > 0.0f && base_score < 1.0f,
+                   "base_score must be in (0,1) for logistic loss");
+      base_score = -logf(1.0f / base_score - 1.0f);
+    }
+    return base_score;
+  }
+  /*! \brief get default evaluation metric for the objective */
+  inline const char *DefaultEvalMetric(void) const {
+    if (loss_type == kLogisticClassify) return "error";
+    if (loss_type == kLogisticRaw) return "auc";
+    return "rmse";
+  }
+};
+
+/*! \brief objective function that only need to */
+class RegLossObj : public IObjFunction{
+ public:
+  explicit RegLossObj(int loss_type) {
+    loss.loss_type = loss_type;
+    scale_pos_weight = 1.0f;
+  }
+  virtual ~RegLossObj(void) {}
+  virtual void SetParam(const char *name, const char *val) {
+    if (!strcmp("scale_pos_weight", name)) {
+      scale_pos_weight = static_cast<float>(atof(val));
+    }
+  }
+  virtual void GetGradient(const std::vector<float>& preds,
+                           const MetaInfo &info,
+                           int iter,
+                           std::vector<bst_gpair> *out_gpair) {
+    utils::Check(preds.size() == info.labels.size(),
+                 "labels are not correctly provided");
+    std::vector<bst_gpair> &gpair = *out_gpair;
+    gpair.resize(preds.size());
+    // start calculating gradient
+    const unsigned ndata = static_cast<unsigned>(preds.size());
+    #pragma omp parallel for schedule(static)
+    for (unsigned j = 0; j < ndata; ++j) {
+      float p = loss.PredTransform(preds[j]);
+      float w = info.GetWeight(j);
+      if (info.labels[j] == 1.0f) w *= scale_pos_weight;
+      gpair[j] = bst_gpair(loss.FirstOrderGradient(p, info.labels[j]) * w,
+                           loss.SecondOrderGradient(p, info.labels[j]) * w);
+    }
+  }
+  virtual const char* DefaultEvalMetric(void) {
+    return loss.DefaultEvalMetric();
+  }
+  virtual void PredTransform(std::vector<float> *io_preds) {
+    std::vector<float> &preds = *io_preds;
+    const unsigned ndata = static_cast<unsigned>(preds.size());
+    #pragma omp parallel for schedule(static)
+    for (unsigned j = 0; j < ndata; ++j) {
+      preds[j] = loss.PredTransform(preds[j]);
+    }
+  }
+
+ protected:
+  float scale_pos_weight;
+  LossType loss;
+};
+}  // namespace learner
+}  // namespace xgboost
+#endif  // XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
--- a/learner/objective.h
+++ b/learner/objective.h
@@ -0,0 +1,80 @@
+#ifndef XGBOOST_LEARNER_OBJECTIVE_H_
+#define XGBOOST_LEARNER_OBJECTIVE_H_
+/*!
+ * \file objective.h
+ * \brief interface of objective function used for gradient boosting
+ * \author Tianqi Chen, Kailong Chen
+ */
+#include "dmatrix.h"
+
+namespace xgboost {
+namespace learner {
+/*! \brief interface of objective function */
+class IObjFunction{
+ public:
+  /*! \brief virtual destructor */
+  virtual ~IObjFunction(void){}
+  /*!
+   * \brief set parameters from outside
+   * \param name name of the parameter
+   * \param val value of the parameter
+   */
+  virtual void SetParam(const char *name, const char *val) = 0;  
+  /*!
+   * \brief get gradient over each of predictions, given existing information
+   * \param preds prediction of current round
+   * \param info information about labels, weights, groups in rank
+   * \param iter current iteration number
+   * \param out_gpair output of get gradient, saves gradient and second order gradient in
+   */
+  virtual void GetGradient(const std::vector<float>& preds,
+                           const MetaInfo &info,
+                           int iter,
+                           std::vector<bst_gpair> *out_gpair) = 0;
+  /*! \return the default evaluation metric for the objective */
+  virtual const char* DefaultEvalMetric(void) = 0;
+  // the following functions are optional, most of time default implementation is good enough
+  /*!
+   * \brief transform prediction values, this is only called when Prediction is called
+   * \param io_preds prediction values, saves to this vector as well
+   */
+  virtual void PredTransform(std::vector<float> *io_preds){}
+  /*!
+   * \brief transform prediction values, this is only called when Eval is called, 
+   *  usually it redirect to PredTransform
+   * \param io_preds prediction values, saves to this vector as well
+   */
+  virtual void EvalTransform(std::vector<float> *io_preds) {
+    this->PredTransform(io_preds);
+  }
+  /*!
+   * \brief transform probability value back to margin
+   * this is used to transform user-set base_score back to margin 
+   * used by gradient boosting
+   * \return transformed value
+   */
+  virtual float ProbToMargin(float base_score) {
+    return base_score;
+  }
+};
+
+}  // namespace learner
+}  // namespace xgboost
+
+// this are implementations of objective functions
+#include "objective-inl.hpp"
+// factory function
+namespace xgboost {
+namespace learner {
+/*! \brief factory funciton to create objective function by name */
+inline IObjFunction* CreateObjFunction(const char *name) {
+  if (!strcmp("reg:linear", name)) return new RegLossObj( LossType::kLinearSquare );
+  if (!strcmp("reg:logistic", name)) return new RegLossObj( LossType::kLogisticNeglik );
+  if (!strcmp("binary:logistic", name)) return new RegLossObj( LossType::kLogisticClassify );
+  if (!strcmp("binary:logitraw", name)) return new RegLossObj( LossType::kLogisticRaw );
+  utils::Error("unknown objective function type: %s", name);
+  return NULL;
+}
+}  // namespace learner
+}  // namespace xgboost
+#endif  // XGBOOST_LEARNER_OBJECTIVE_H_