From dedd87662bf6bda6021496b09efcfae28a33943a Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Wed, 30 Dec 2015 18:22:15 -0800
Subject: [PATCH] [OBJ] Add basic objective function and registry

---
 dmlc-core                         |   2 +-
 include/xgboost/base.h            |  14 +
 include/xgboost/data.h            |   5 +-
 include/xgboost/objective.h       |  96 +++++
 old_src/learner/helper_utils.h    |  80 ----
 old_src/learner/objective-inl.hpp | 642 ------------------------------
 old_src/learner/objective.h       |  89 -----
 src/c_api.cc                      |   2 -
 src/common/math.h                 | 107 +++++
 src/common/random.h               |  26 ++
 src/data/data.cc                  |  31 +-
 src/data/simple_csr_source.h      |  11 +-
 src/objective/multi_class.cc      | 134 +++++++
 src/objective/objective.cc        |  17 +
 src/objective/rank.cc             | 329 +++++++++++++++
 src/objective/regression.cc       | 216 ++++++++++
 16 files changed, 965 insertions(+), 836 deletions(-)
 create mode 100644 include/xgboost/objective.h
 delete mode 100644 old_src/learner/helper_utils.h
 delete mode 100644 old_src/learner/objective-inl.hpp
 delete mode 100644 old_src/learner/objective.h
 delete mode 100644 src/c_api.cc
 create mode 100644 src/common/math.h
 create mode 100644 src/common/random.h
 create mode 100644 src/objective/multi_class.cc
 create mode 100644 src/objective/objective.cc
 create mode 100644 src/objective/rank.cc
 create mode 100644 src/objective/regression.cc
diff --git a/dmlc-core b/dmlc-core
index 4b951c037..98879773f 160000
--- a/dmlc-core
+++ b/dmlc-core
@@ -1 +1 @@
-Subproject commit 4b951c0378386b7f4d9eae72be2ecd3b9c816afe
+Subproject commit 98879773f062e1c5b8a380e002c25672f2b48b13
diff --git a/include/xgboost/base.h b/include/xgboost/base.h
index b1d0e453a..61667928f 100644
--- a/include/xgboost/base.h
+++ b/include/xgboost/base.h
@@ -7,6 +7,7 @@
 #define XGBOOST_BASE_H_
 
 #include <dmlc/base.h>
+#include <dmlc/omp.h>
 
 namespace xgboost {
 /*!
@@ -17,10 +18,23 @@ typedef uint32_t bst_uint;
 /*! \brief float type, used for storing statistics */
 typedef float bst_float;
 
+/*! \brief gradient statistics pair usually needed in gradient boosting */
+struct bst_gpair {
+  /*! \brief gradient statistics */
+  bst_float grad;
+  /*! \brief second order gradient statistics */
+  bst_float hess;
+  bst_gpair() {}
+  bst_gpair(bst_float grad, bst_float hess) : grad(grad), hess(hess) {}
+};
+
 const float rt_eps = 1e-5f;
 // min gap between feature values to allow a split happen
 const float rt_2eps = rt_eps * 2.0f;
 
+typedef dmlc::omp_ulong omp_ulong;
+typedef dmlc::omp_uint bst_omp_uint;
+
 /*!
  * \brief define compatible keywords in g++
  *  Used to support g++-4.6 and g++4.7
diff --git a/include/xgboost/data.h b/include/xgboost/data.h
index 160f5bad6..e85537347 100644
--- a/include/xgboost/data.h
+++ b/include/xgboost/data.h
@@ -10,6 +10,7 @@
 #include <dmlc/base.h>
 #include <dmlc/data.h>
 #include <memory>
+#include <vector>
 #include "./base.h"
 
 namespace xgboost {
@@ -261,7 +262,7 @@ class DMatrix {
    * \return a Created DMatrix.
    */
   static DMatrix* Create(std::unique_ptr<DataSource>&& source,
-                         const char* cache_prefix=nullptr);
+                         const char* cache_prefix = nullptr);
   /*!
    * \brief Create a DMatrix by loaidng data from parser.
    *  Parser can later be deleted after the DMatrix i created.
@@ -275,7 +276,7 @@ class DMatrix {
    * \return A created DMatrix.
    */
   static DMatrix* Create(dmlc::Parser<uint32_t>* parser,
-                         const char* cache_prefix=nullptr);
+                         const char* cache_prefix = nullptr);
 };
 
 }  // namespace xgboost
diff --git a/include/xgboost/objective.h b/include/xgboost/objective.h
new file mode 100644
index 000000000..0b1d895d2
--- /dev/null
+++ b/include/xgboost/objective.h
@@ -0,0 +1,96 @@
+/*!
+ * Copyright 2014 by Contributors
+ * \file objective.h
+ * \brief interface of objective function used by xgboost.
+ * \author Tianqi Chen, Kailong Chen
+ */
+#ifndef XGBOOST_OBJECTIVE_H_
+#define XGBOOST_OBJECTIVE_H_
+
+#include <dmlc/registry.h>
+#include <vector>
+#include <utility>
+#include <string>
+#include <functional>
+#include "./data.h"
+#include "./base.h"
+
+namespace xgboost {
+/*! \brief interface of objective function */
+class ObjFunction {
+ public:
+  /*! \brief virtual destructor */
+  virtual ~ObjFunction() {}
+  /*!
+   * \brief Initialize the objective with the specified parameters.
+   * \param args arguments to the objective function.
+   */
+  virtual void Init(const std::vector<std::pair<std::string, std::string> >& args) = 0;
+  /*!
+   * \brief Get gradient over each of predictions, given existing information.
+   * \param preds prediction of current round
+   * \param info information about labels, weights, groups in rank
+   * \param iteration current iteration number.
+   * \param out_gpair output of get gradient, saves gradient and second order gradient in
+   */
+  virtual void GetGradient(const std::vector<float>& preds,
+                           const MetaInfo& info,
+                           int iteration,
+                           std::vector<bst_gpair>* out_gpair) = 0;
+  /*! \return the default evaluation metric for the objective */
+  virtual const char* DefaultEvalMetric() const = 0;
+  // the following functions are optional, most of time default implementation is good enough
+  /*!
+   * \brief transform prediction values, this is only called when Prediction is called
+   * \param io_preds prediction values, saves to this vector as well
+   */
+  virtual void PredTransform(std::vector<float> *io_preds) {}
+  /*!
+   * \brief transform prediction values, this is only called when Eval is called,
+   *  usually it redirect to PredTransform
+   * \param io_preds prediction values, saves to this vector as well
+   */
+  virtual void EvalTransform(std::vector<float> *io_preds) {
+    this->PredTransform(io_preds);
+  }
+  /*!
+   * \brief transform probability value back to margin
+   * this is used to transform user-set base_score back to margin
+   * used by gradient boosting
+   * \return transformed value
+   */
+  virtual float ProbToMargin(float base_score) const {
+    return base_score;
+  }
+  /*!
+   * \brief Create an objective function according to name.
+   * \param name Name of the objective.
+   */
+  static ObjFunction* Create(const char* name);
+};
+
+/*!
+ * \brief Registry entry for DataIterator factory functions.
+ */
+struct ObjFunctionReg
+    : public dmlc::FunctionRegEntryBase<ObjFunctionReg,
+                                        std::function<ObjFunction* ()> > {
+};
+
+/*!
+ * \brief Macro to register objective
+ *
+ * \code
+ * // example of registering a objective
+ * XGBOOST_REGISTER_OBJECTIVE(LinearRegression, "reg:linear")
+ * .describe("Linear regression objective")
+ * .set_body([]() {
+ *     return new RegLossObj(LossType::kLinearSquare);
+ *   });
+ * \endcode
+ */
+#define XGBOOST_REGISTER_OBJECTIVE(UniqueId, Name)                      \
+  static ::xgboost::ObjFunctionReg & __make_ ## ObjFunctionReg ## _ ## UniqueId ## __ = \
+      ::dmlc::Registry< ::xgboost::ObjFunctionReg>::Get()->__REGISTER__(#Name)
+}  // namespace xgboost
+#endif  // XGBOOST_OBJECTIVE_H_
diff --git a/old_src/learner/helper_utils.h b/old_src/learner/helper_utils.h
deleted file mode 100644
index 0db1b46f3..000000000
--- a/old_src/learner/helper_utils.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*!
- * Copyright 2014 by Contributors
- * \file helper_utils.h
- * \brief useful helper functions
- * \author Tianqi Chen, Kailong Chen
- */
-#ifndef XGBOOST_LEARNER_HELPER_UTILS_H_
-#define XGBOOST_LEARNER_HELPER_UTILS_H_
-
-#include <utility>
-#include <vector>
-#include <cmath>
-#include <algorithm>
-namespace xgboost {
-namespace learner {
-// simple helper function to do softmax
-inline static void Softmax(std::vector<float>* p_rec) {
-  std::vector<float> &rec = *p_rec;
-  float wmax = rec[0];
-  for (size_t i = 1; i < rec.size(); ++i) {
-    wmax = std::max(rec[i], wmax);
-  }
-  double wsum = 0.0f;
-  for (size_t i = 0; i < rec.size(); ++i) {
-    rec[i] = std::exp(rec[i]-wmax);
-    wsum += rec[i];
-  }
-  for (size_t i = 0; i < rec.size(); ++i) {
-    rec[i] /= static_cast<float>(wsum);
-  }
-}
-
-inline static int FindMaxIndex(const float  *rec, size_t size) {
-  size_t mxid = 0;
-  for (size_t i = 1; i < size; ++i) {
-    if (rec[i] > rec[mxid]) {
-      mxid = i;
-    }
-  }
-  return static_cast<int>(mxid);
-}
-
-// simple helper function to do softmax
-inline static int FindMaxIndex(const std::vector<float>& rec) {
-  return FindMaxIndex(BeginPtr(rec), rec.size());
-}
-
-// perform numerically safe logsum
-inline float LogSum(float x, float y) {
-  if (x < y) {
-    return y + std::log(std::exp(x - y) + 1.0f);
-  } else {
-    return x + std::log(std::exp(y - x) + 1.0f);
-  }
-}
-// numerically safe logsum
-inline float LogSum(const float *rec, size_t size) {
-  float mx = rec[0];
-  for (size_t i = 1; i < size; ++i) {
-    mx = std::max(mx, rec[i]);
-  }
-  float sum = 0.0f;
-  for (size_t i = 0; i < size; ++i) {
-    sum += std::exp(rec[i] - mx);
-  }
-  return mx + std::log(sum);
-}
-
-// comparator functions for sorting pairs in descending order
-inline static bool CmpFirst(const std::pair<float, unsigned> &a,
-                            const std::pair<float, unsigned> &b) {
-  return a.first > b.first;
-}
-inline static bool CmpSecond(const std::pair<float, unsigned> &a,
-                             const std::pair<float, unsigned> &b) {
-  return a.second > b.second;
-}
-}  // namespace learner
-}  // namespace xgboost
-#endif  // XGBOOST_LEARNER_HELPER_UTILS_H_
diff --git a/old_src/learner/objective-inl.hpp b/old_src/learner/objective-inl.hpp
deleted file mode 100644
index ce23b02fb..000000000
--- a/old_src/learner/objective-inl.hpp
+++ /dev/null
@@ -1,642 +0,0 @@
-/*!
- * Copyright 2014 by Contributors
- * \file objective-inl.hpp
- * \brief objective function implementations
- * \author Tianqi Chen, Kailong Chen
- */
-#ifndef XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
-#define XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
-
-#include <vector>
-#include <algorithm>
-#include <utility>
-#include <cmath>
-#include <functional>
-#include "../data.h"
-#include "./objective.h"
-#include "./helper_utils.h"
-#include "../utils/random.h"
-#include "../utils/omp.h"
-
-namespace xgboost {
-namespace learner {
-/*! \brief defines functions to calculate some commonly used functions */
-struct LossType {
-  /*! \brief indicate which type we are using */
-  int loss_type;
-  // list of constants
-  static const int kLinearSquare = 0;
-  static const int kLogisticNeglik = 1;
-  static const int kLogisticClassify = 2;
-  static const int kLogisticRaw = 3;
-  /*!
-   * \brief transform the linear sum to prediction
-   * \param x linear sum of boosting ensemble
-   * \return transformed prediction
-   */
-  inline float PredTransform(float x) const {
-    switch (loss_type) {
-      case kLogisticRaw:
-      case kLinearSquare: return x;
-      case kLogisticClassify:
-      case kLogisticNeglik: return 1.0f / (1.0f + std::exp(-x));
-      default: utils::Error("unknown loss_type"); return 0.0f;
-    }
-  }
-  /*!
-   * \brief check if label range is valid
-   */
-  inline bool CheckLabel(float x) const {
-    if (loss_type != kLinearSquare) {
-      return x >= 0.0f && x <= 1.0f;
-    }
-    return true;
-  }
-  /*!
-   * \brief error message displayed when check label fail
-   */
-  inline const char * CheckLabelErrorMsg(void) const {
-    if (loss_type != kLinearSquare) {
-      return "label must be in [0,1] for logistic regression";
-    } else {
-      return "";
-    }
-  }
-  /*!
-   * \brief calculate first order gradient of loss, given transformed prediction
-   * \param predt transformed prediction
-   * \param label true label
-   * \return first order gradient
-   */
-  inline float FirstOrderGradient(float predt, float label) const {
-    switch (loss_type) {
-      case kLinearSquare: return predt - label;
-      case kLogisticRaw: predt = 1.0f / (1.0f + std::exp(-predt));
-      case kLogisticClassify:
-      case kLogisticNeglik: return predt - label;
-      default: utils::Error("unknown loss_type"); return 0.0f;
-    }
-  }
-  /*!
-   * \brief calculate second order gradient of loss, given transformed prediction
-   * \param predt transformed prediction
-   * \param label true label
-   * \return second order gradient
-   */
-  inline float SecondOrderGradient(float predt, float label) const {
-    // cap second order gradient to positive value
-    const float eps = 1e-16f;
-    switch (loss_type) {
-      case kLinearSquare: return 1.0f;
-      case kLogisticRaw: predt = 1.0f / (1.0f + std::exp(-predt));
-      case kLogisticClassify:
-      case kLogisticNeglik: return std::max(predt * (1.0f - predt), eps);
-      default: utils::Error("unknown loss_type"); return 0.0f;
-    }
-  }
-  /*!
-   * \brief transform probability value back to margin
-   */
-  inline float ProbToMargin(float base_score) const {
-    if (loss_type == kLogisticRaw ||
-        loss_type == kLogisticClassify ||
-        loss_type == kLogisticNeglik ) {
-      utils::Check(base_score > 0.0f && base_score < 1.0f,
-                   "base_score must be in (0,1) for logistic loss");
-      base_score = -std::log(1.0f / base_score - 1.0f);
-    }
-    return base_score;
-  }
-  /*! \brief get default evaluation metric for the objective */
-  inline const char *DefaultEvalMetric(void) const {
-    if (loss_type == kLogisticClassify) return "error";
-    if (loss_type == kLogisticRaw) return "auc";
-    return "rmse";
-  }
-};
-
-/*! \brief objective function that only need to */
-class RegLossObj : public IObjFunction {
- public:
-  explicit RegLossObj(int loss_type) {
-    loss.loss_type = loss_type;
-    scale_pos_weight = 1.0f;
-  }
-  virtual ~RegLossObj(void) {}
-  virtual void SetParam(const char *name, const char *val) {
-    using namespace std;
-    if (!strcmp("scale_pos_weight", name)) {
-      scale_pos_weight = static_cast<float>(atof(val));
-    }
-  }
-  virtual void GetGradient(const std::vector<float> &preds,
-                           const MetaInfo &info,
-                           int iter,
-                           std::vector<bst_gpair> *out_gpair) {
-    utils::Check(info.labels.size() != 0, "label set cannot be empty");
-    utils::Check(preds.size() % info.labels.size() == 0,
-                 "labels are not correctly provided");
-    std::vector<bst_gpair> &gpair = *out_gpair;
-    gpair.resize(preds.size());
-    // check if label in range
-    bool label_correct = true;
-    // start calculating gradient
-    const unsigned nstep = static_cast<unsigned>(info.labels.size());
-    const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
-    #pragma omp parallel for schedule(static)
-    for (bst_omp_uint i = 0; i < ndata; ++i) {
-      const unsigned j = i % nstep;
-      float p = loss.PredTransform(preds[i]);
-      float w = info.GetWeight(j);
-      if (info.labels[j] == 1.0f) w *= scale_pos_weight;
-      if (!loss.CheckLabel(info.labels[j])) label_correct = false;
-      gpair[i] = bst_gpair(loss.FirstOrderGradient(p, info.labels[j]) * w,
-                           loss.SecondOrderGradient(p, info.labels[j]) * w);
-    }
-    utils::Check(label_correct, loss.CheckLabelErrorMsg());
-  }
-  virtual const char* DefaultEvalMetric(void) const {
-    return loss.DefaultEvalMetric();
-  }
-  virtual void PredTransform(std::vector<float> *io_preds) {
-    std::vector<float> &preds = *io_preds;
-    const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
-    #pragma omp parallel for schedule(static)
-    for (bst_omp_uint j = 0; j < ndata; ++j) {
-      preds[j] = loss.PredTransform(preds[j]);
-    }
-  }
-  virtual float ProbToMargin(float base_score) const {
-    return loss.ProbToMargin(base_score);
-  }
-
- protected:
-  float scale_pos_weight;
-  LossType loss;
-};
-
-// poisson regression for count
-class PoissonRegression : public IObjFunction {
- public:
-  PoissonRegression(void) {
-    max_delta_step = 0.0f;
-  }
-  virtual ~PoissonRegression(void) {}
-
-  virtual void SetParam(const char *name, const char *val) {
-    using namespace std;
-    if (!strcmp("max_delta_step", name)) {
-      max_delta_step = static_cast<float>(atof(val));
-    }
-  }
-  virtual void GetGradient(const std::vector<float> &preds,
-                           const MetaInfo &info,
-                           int iter,
-                           std::vector<bst_gpair> *out_gpair) {
-    utils::Check(max_delta_step != 0.0f,
-                 "PoissonRegression: need to set max_delta_step");
-    utils::Check(info.labels.size() != 0, "label set cannot be empty");
-    utils::Check(preds.size() == info.labels.size(),
-                 "labels are not correctly provided");
-    std::vector<bst_gpair> &gpair = *out_gpair;
-    gpair.resize(preds.size());
-    // check if label in range
-    bool label_correct = true;
-    // start calculating gradient
-    const long ndata = static_cast<bst_omp_uint>(preds.size()); // NOLINT(*)
-    #pragma omp parallel for schedule(static)
-    for (long i = 0; i < ndata; ++i) { // NOLINT(*)
-      float p = preds[i];
-      float w = info.GetWeight(i);
-      float y = info.labels[i];
-      if (y >= 0.0f) {
-        gpair[i] = bst_gpair((std::exp(p) - y) * w,
-                             std::exp(p + max_delta_step) * w);
-      } else {
-        label_correct = false;
-      }
-    }
-    utils::Check(label_correct,
-                 "PoissonRegression: label must be nonnegative");
-  }
-  virtual void PredTransform(std::vector<float> *io_preds) {
-    std::vector<float> &preds = *io_preds;
-    const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
-    #pragma omp parallel for schedule(static)
-    for (long j = 0; j < ndata; ++j) {  // NOLINT(*)
-      preds[j] = std::exp(preds[j]);
-    }
-  }
-  virtual void EvalTransform(std::vector<float> *io_preds) {
-    PredTransform(io_preds);
-  }
-  virtual float ProbToMargin(float base_score) const {
-    return std::log(base_score);
-  }
-  virtual const char* DefaultEvalMetric(void) const {
-    return "poisson-nloglik";
-  }
-
- private:
-  float max_delta_step;
-};
-
-// softmax multi-class classification
-class SoftmaxMultiClassObj : public IObjFunction {
- public:
-  explicit SoftmaxMultiClassObj(int output_prob)
-      : output_prob(output_prob) {
-    nclass = 0;
-  }
-  virtual ~SoftmaxMultiClassObj(void) {}
-  virtual void SetParam(const char *name, const char *val) {
-    using namespace std;
-    if (!strcmp( "num_class", name )) nclass = atoi(val);
-  }
-  virtual void GetGradient(const std::vector<float> &preds,
-                           const MetaInfo &info,
-                           int iter,
-                           std::vector<bst_gpair> *out_gpair) {
-    utils::Check(nclass != 0, "must set num_class to use softmax");
-    utils::Check(info.labels.size() != 0, "label set cannot be empty");
-    utils::Check(preds.size() % (static_cast<size_t>(nclass) * info.labels.size()) == 0,
-                 "SoftmaxMultiClassObj: label size and pred size does not match");
-    std::vector<bst_gpair> &gpair = *out_gpair;
-    gpair.resize(preds.size());
-    const unsigned nstep = static_cast<unsigned>(info.labels.size() * nclass);
-    const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size() / nclass);
-    int label_error = 0;
-    #pragma omp parallel
-    {
-      std::vector<float> rec(nclass);
-      #pragma omp for schedule(static)
-      for (bst_omp_uint i = 0; i < ndata; ++i) {
-        for (int k = 0; k < nclass; ++k) {
-          rec[k] = preds[i * nclass + k];
-        }
-        Softmax(&rec);
-        const unsigned j = i % nstep;
-        int label = static_cast<int>(info.labels[j]);
-        if (label < 0 || label >= nclass)  {
-          label_error = label; label = 0;
-        }
-        const float wt = info.GetWeight(j);
-        for (int k = 0; k < nclass; ++k) {
-          float p = rec[k];
-          const float h = 2.0f * p * (1.0f - p) * wt;
-          if (label == k) {
-            gpair[i * nclass + k] = bst_gpair((p - 1.0f) * wt, h);
-          } else {
-            gpair[i * nclass + k] = bst_gpair(p* wt, h);
-          }
-        }
-      }
-    }
-    utils::Check(label_error >= 0 && label_error < nclass,
-                 "SoftmaxMultiClassObj: label must be in [0, num_class),"\
-                 " num_class=%d but found %d in label", nclass, label_error);
-  }
-  virtual void PredTransform(std::vector<float> *io_preds) {
-    this->Transform(io_preds, output_prob);
-  }
-  virtual void EvalTransform(std::vector<float> *io_preds) {
-    this->Transform(io_preds, 1);
-  }
-  virtual const char* DefaultEvalMetric(void) const {
-    return "merror";
-  }
-
- private:
-  inline void Transform(std::vector<float> *io_preds, int prob) {
-    utils::Check(nclass != 0, "must set num_class to use softmax");
-    std::vector<float> &preds = *io_preds;
-    std::vector<float> tmp;
-    const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size()/nclass);
-    if (prob == 0) tmp.resize(ndata);
-    #pragma omp parallel
-    {
-      std::vector<float> rec(nclass);
-      #pragma omp for schedule(static)
-      for (bst_omp_uint j = 0; j < ndata; ++j) {
-        for (int k = 0; k < nclass; ++k) {
-          rec[k] = preds[j * nclass + k];
-        }
-        if (prob == 0) {
-          tmp[j] = static_cast<float>(FindMaxIndex(rec));
-        } else {
-          Softmax(&rec);
-          for (int k = 0; k < nclass; ++k) {
-            preds[j * nclass + k] = rec[k];
-          }
-        }
-      }
-    }
-    if (prob == 0) preds = tmp;
-  }
-  // data field
-  int nclass;
-  int output_prob;
-};
-
-/*! \brief objective for lambda rank */
-class LambdaRankObj : public IObjFunction {
- public:
-  LambdaRankObj(void) {
-    loss.loss_type = LossType::kLogisticRaw;
-    fix_list_weight = 0.0f;
-    num_pairsample = 1;
-  }
-  virtual ~LambdaRankObj(void) {}
-  virtual void SetParam(const char *name, const char *val) {
-    using namespace std;
-    if (!strcmp( "loss_type", name )) loss.loss_type = atoi(val);
-    if (!strcmp( "fix_list_weight", name)) fix_list_weight = static_cast<float>(atof(val));
-    if (!strcmp( "num_pairsample", name)) num_pairsample = atoi(val);
-  }
-  virtual void GetGradient(const std::vector<float> &preds,
-                           const MetaInfo &info,
-                           int iter,
-                           std::vector<bst_gpair> *out_gpair) {
-    utils::Check(preds.size() == info.labels.size(), "label size predict size not match");
-    std::vector<bst_gpair> &gpair = *out_gpair;
-    gpair.resize(preds.size());
-    // quick consistency when group is not available
-    std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(info.labels.size());
-    const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
-    utils::Check(gptr.size() != 0 && gptr.back() == info.labels.size(),
-                 "group structure not consistent with #rows");
-    const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
-    #pragma omp parallel
-    {
-      // parall construct, declare random number generator here, so that each
-      // thread use its own random number generator, seed by thread id and current iteration
-      random::Random rnd; rnd.Seed(iter* 1111 + omp_get_thread_num());
-      std::vector<LambdaPair> pairs;
-      std::vector<ListEntry>  lst;
-      std::vector< std::pair<float, unsigned> > rec;
-      #pragma omp for schedule(static)
-      for (bst_omp_uint k = 0; k < ngroup; ++k) {
-        lst.clear(); pairs.clear();
-        for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) {
-          lst.push_back(ListEntry(preds[j], info.labels[j], j));
-          gpair[j] = bst_gpair(0.0f, 0.0f);
-        }
-        std::sort(lst.begin(), lst.end(), ListEntry::CmpPred);
-        rec.resize(lst.size());
-        for (unsigned i = 0; i < lst.size(); ++i) {
-          rec[i] = std::make_pair(lst[i].label, i);
-        }
-        std::sort(rec.begin(), rec.end(), CmpFirst);
-        // enumerate buckets with same label, for each item in the lst, grab another sample randomly
-        for (unsigned i = 0; i < rec.size(); ) {
-          unsigned j = i + 1;
-          while (j < rec.size() && rec[j].first == rec[i].first) ++j;
-          // bucket in [i,j), get a sample outside bucket
-          unsigned nleft = i, nright = static_cast<unsigned>(rec.size() - j);
-          if (nleft + nright != 0) {
-            int nsample = num_pairsample;
-            while (nsample --) {
-              for (unsigned pid = i; pid < j; ++pid) {
-                unsigned ridx = static_cast<unsigned>(rnd.RandDouble() * (nleft+nright));
-                if (ridx < nleft) {
-                  pairs.push_back(LambdaPair(rec[ridx].second, rec[pid].second));
-                } else {
-                  pairs.push_back(LambdaPair(rec[pid].second, rec[ridx+j-i].second));
-                }
-              }
-            }
-          }
-          i = j;
-        }
-        // get lambda weight for the pairs
-        this->GetLambdaWeight(lst, &pairs);
-        // rescale each gradient and hessian so that the lst have constant weighted
-        float scale = 1.0f / num_pairsample;
-        if (fix_list_weight != 0.0f) {
-          scale *= fix_list_weight / (gptr[k+1] - gptr[k]);
-        }
-        for (size_t i = 0; i < pairs.size(); ++i) {
-          const ListEntry &pos = lst[pairs[i].pos_index];
-          const ListEntry &neg = lst[pairs[i].neg_index];
-          const float w = pairs[i].weight * scale;
-          float p = loss.PredTransform(pos.pred - neg.pred);
-          float g = loss.FirstOrderGradient(p, 1.0f);
-          float h = loss.SecondOrderGradient(p, 1.0f);
-          // accumulate gradient and hessian in both pid, and nid
-          gpair[pos.rindex].grad += g * w;
-          gpair[pos.rindex].hess += 2.0f * w * h;
-          gpair[neg.rindex].grad -= g * w;
-          gpair[neg.rindex].hess += 2.0f * w * h;
-        }
-      }
-    }
-  }
-  virtual const char* DefaultEvalMetric(void) const {
-    return "map";
-  }
-
- protected:
-  /*! \brief helper information in a list */
-  struct ListEntry {
-    /*! \brief the predict score we in the data */
-    float pred;
-    /*! \brief the actual label of the entry */
-    float label;
-    /*! \brief row index in the data matrix */
-    unsigned rindex;
-    // constructor
-    ListEntry(float pred, float label, unsigned rindex)
-        : pred(pred), label(label), rindex(rindex) {}
-    // comparator by prediction
-    inline static bool CmpPred(const ListEntry &a, const ListEntry &b) {
-      return a.pred > b.pred;
-    }
-    // comparator by label
-    inline static bool CmpLabel(const ListEntry &a, const ListEntry &b) {
-      return a.label > b.label;
-    }
-  };
-  /*! \brief a pair in the lambda rank */
-  struct LambdaPair {
-    /*! \brief positive index: this is a position in the list */
-    unsigned pos_index;
-    /*! \brief negative index: this is a position in the list */
-    unsigned neg_index;
-    /*! \brief weight to be filled in */
-    float weight;
-    // constructor
-    LambdaPair(unsigned pos_index, unsigned neg_index)
-        : pos_index(pos_index), neg_index(neg_index), weight(1.0f) {}
-  };
-  /*!
-   * \brief get lambda weight for existing pairs
-   * \param list a list that is sorted by pred score
-   * \param io_pairs record of pairs, containing the pairs to fill in weights
-   */
-  virtual void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
-                               std::vector<LambdaPair> *io_pairs) = 0;
-
- private:
-  // loss function
-  LossType loss;
-  // number of samples peformed for each instance
-  int num_pairsample;
-  // fix weight of each elements in list
-  float fix_list_weight;
-};
-
-class PairwiseRankObj: public LambdaRankObj{
- public:
-  virtual ~PairwiseRankObj(void) {}
-
- protected:
-  virtual void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
-                               std::vector<LambdaPair> *io_pairs) {}
-};
-
-// beta version: NDCG lambda rank
-class LambdaRankObjNDCG : public LambdaRankObj {
- public:
-  virtual ~LambdaRankObjNDCG(void) {}
-
- protected:
-  virtual void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
-                               std::vector<LambdaPair> *io_pairs) {
-    std::vector<LambdaPair> &pairs = *io_pairs;
-    float IDCG;
-    {
-      std::vector<float> labels(sorted_list.size());
-      for (size_t i = 0; i < sorted_list.size(); ++i) {
-        labels[i] = sorted_list[i].label;
-      }
-      std::sort(labels.begin(), labels.end(), std::greater<float>());
-      IDCG = CalcDCG(labels);
-    }
-    if (IDCG == 0.0) {
-      for (size_t i = 0; i < pairs.size(); ++i) {
-        pairs[i].weight = 0.0f;
-      }
-    } else {
-      IDCG = 1.0f / IDCG;
-      for (size_t i = 0; i < pairs.size(); ++i) {
-        unsigned pos_idx = pairs[i].pos_index;
-        unsigned neg_idx = pairs[i].neg_index;
-        float pos_loginv = 1.0f / std::log(pos_idx + 2.0f);
-        float neg_loginv = 1.0f / std::log(neg_idx + 2.0f);
-        int pos_label = static_cast<int>(sorted_list[pos_idx].label);
-        int neg_label = static_cast<int>(sorted_list[neg_idx].label);
-        float original =
-            ((1 << pos_label) - 1) * pos_loginv + ((1 << neg_label) - 1) * neg_loginv;
-        float changed  =
-            ((1 << neg_label) - 1) * pos_loginv + ((1 << pos_label) - 1) * neg_loginv;
-        float delta = (original - changed) * IDCG;
-        if (delta < 0.0f) delta = - delta;
-        pairs[i].weight = delta;
-      }
-    }
-  }
-  inline static float CalcDCG(const std::vector<float> &labels) {
-    double sumdcg = 0.0;
-    for (size_t i = 0; i < labels.size(); ++i) {
-      const unsigned rel = static_cast<unsigned>(labels[i]);
-      if (rel != 0) {
-        sumdcg += ((1 << rel) - 1) / std::log(static_cast<float>(i + 2));
-      }
-    }
-    return static_cast<float>(sumdcg);
-  }
-};
-
-class LambdaRankObjMAP : public LambdaRankObj {
- public:
-  virtual ~LambdaRankObjMAP(void) {}
-
- protected:
-  struct MAPStats {
-    /*! \brief the accumulated precision */
-    float ap_acc;
-    /*!
-     * \brief the accumulated precision,
-     *   assuming a positive instance is missing
-     */
-    float ap_acc_miss;
-    /*!
-     * \brief the accumulated precision,
-     * assuming that one more positive instance is inserted ahead
-     */
-    float ap_acc_add;
-    /* \brief the accumulated positive instance count */
-    float hits;
-    MAPStats(void) {}
-    MAPStats(float ap_acc, float ap_acc_miss, float ap_acc_add, float hits)
-        : ap_acc(ap_acc), ap_acc_miss(ap_acc_miss), ap_acc_add(ap_acc_add), hits(hits) {}
-  };
-  /*!
-   * \brief Obtain the delta MAP if trying to switch the positions of instances in index1 or index2
-   *        in sorted triples
-   * \param sorted_list the list containing entry information
-   * \param index1,index2 the instances switched
-   * \param map_stats a vector containing the accumulated precisions for each position in a list
-   */
-  inline float GetLambdaMAP(const std::vector<ListEntry> &sorted_list,
-                            int index1, int index2,
-                            std::vector<MAPStats> *p_map_stats) {
-    std::vector<MAPStats> &map_stats = *p_map_stats;
-    if (index1 == index2 || map_stats[map_stats.size() - 1].hits == 0) {
-      return 0.0f;
-    }
-    if (index1 > index2) std::swap(index1, index2);
-    float original = map_stats[index2].ap_acc;
-    if (index1 != 0) original -= map_stats[index1 - 1].ap_acc;
-    float changed = 0;
-    float label1 = sorted_list[index1].label > 0.0f ? 1.0f : 0.0f;
-    float label2 = sorted_list[index2].label > 0.0f ? 1.0f : 0.0f;
-    if (label1 == label2) {
-      return 0.0;
-    } else if (label1 < label2) {
-      changed += map_stats[index2 - 1].ap_acc_add - map_stats[index1].ap_acc_add;
-      changed += (map_stats[index1].hits + 1.0f) / (index1 + 1);
-    } else {
-      changed += map_stats[index2 - 1].ap_acc_miss - map_stats[index1].ap_acc_miss;
-      changed += map_stats[index2].hits / (index2 + 1);
-    }
-    float ans = (changed - original) / (map_stats[map_stats.size() - 1].hits);
-    if (ans < 0) ans = -ans;
-    return ans;
-  }
-  /*
-   * \brief obtain preprocessing results for calculating delta MAP
-   * \param sorted_list the list containing entry information
-   * \param map_stats a vector containing the accumulated precisions for each position in a list
-   */
-  inline void GetMAPStats(const std::vector<ListEntry> &sorted_list,
-                          std::vector<MAPStats> *p_map_acc) {
-    std::vector<MAPStats> &map_acc = *p_map_acc;
-    map_acc.resize(sorted_list.size());
-    float hit = 0, acc1 = 0, acc2 = 0, acc3 = 0;
-    for (size_t i = 1; i <= sorted_list.size(); ++i) {
-      if (sorted_list[i - 1].label > 0.0f) {
-        hit++;
-        acc1 += hit / i;
-        acc2 += (hit - 1) / i;
-        acc3 += (hit + 1) / i;
-      }
-      map_acc[i - 1] = MAPStats(acc1, acc2, acc3, hit);
-    }
-  }
-  virtual void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
-                               std::vector<LambdaPair> *io_pairs) {
-    std::vector<LambdaPair> &pairs = *io_pairs;
-    std::vector<MAPStats> map_stats;
-    GetMAPStats(sorted_list, &map_stats);
-    for (size_t i = 0; i < pairs.size(); ++i) {
-      pairs[i].weight =
-          GetLambdaMAP(sorted_list, pairs[i].pos_index,
-                       pairs[i].neg_index, &map_stats);
-    }
-  }
-};
-
-}  // namespace learner
-}  // namespace xgboost
-#endif  // XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
diff --git a/old_src/learner/objective.h b/old_src/learner/objective.h
deleted file mode 100644
index 774286854..000000000
--- a/old_src/learner/objective.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*!
- * Copyright 2014 by Contributors
- * \file objective.h
- * \brief interface of objective function used for gradient boosting
- * \author Tianqi Chen, Kailong Chen
- */
-#ifndef XGBOOST_LEARNER_OBJECTIVE_H_
-#define XGBOOST_LEARNER_OBJECTIVE_H_
-
-#include <vector>
-#include "./dmatrix.h"
-
-namespace xgboost {
-namespace learner {
-/*! \brief interface of objective function */
-class IObjFunction{
- public:
-  /*! \brief virtual destructor */
-  virtual ~IObjFunction(void) {}
-  /*!
-   * \brief set parameters from outside
-   * \param name name of the parameter
-   * \param val value of the parameter
-   */
-  virtual void SetParam(const char *name, const char *val) = 0;
-  /*!
-   * \brief get gradient over each of predictions, given existing information
-   * \param preds prediction of current round
-   * \param info information about labels, weights, groups in rank
-   * \param iter current iteration number
-   * \param out_gpair output of get gradient, saves gradient and second order gradient in
-   */
-  virtual void GetGradient(const std::vector<float> &preds,
-                           const MetaInfo &info,
-                           int iter,
-                           std::vector<bst_gpair> *out_gpair) = 0;
-  /*! \return the default evaluation metric for the objective */
-  virtual const char* DefaultEvalMetric(void) const = 0;
-  // the following functions are optional, most of time default implementation is good enough
-  /*!
-   * \brief transform prediction values, this is only called when Prediction is called
-   * \param io_preds prediction values, saves to this vector as well
-   */
-  virtual void PredTransform(std::vector<float> *io_preds) {}
-  /*!
-   * \brief transform prediction values, this is only called when Eval is called,
-   *  usually it redirect to PredTransform
-   * \param io_preds prediction values, saves to this vector as well
-   */
-  virtual void EvalTransform(std::vector<float> *io_preds) {
-    this->PredTransform(io_preds);
-  }
-  /*!
-   * \brief transform probability value back to margin
-   * this is used to transform user-set base_score back to margin
-   * used by gradient boosting
-   * \return transformed value
-   */
-  virtual float ProbToMargin(float base_score) const {
-    return base_score;
-  }
-};
-}  // namespace learner
-}  // namespace xgboost
-
-// this are implementations of objective functions
-#include "objective-inl.hpp"
-// factory function
-namespace xgboost {
-namespace learner {
-/*! \brief factory function to create objective function by name */
-inline IObjFunction* CreateObjFunction(const char *name) {
-  using namespace std;
-  if (!strcmp("reg:linear", name)) return new RegLossObj(LossType::kLinearSquare);
-  if (!strcmp("reg:logistic", name)) return new RegLossObj(LossType::kLogisticNeglik);
-  if (!strcmp("binary:logistic", name)) return new RegLossObj(LossType::kLogisticClassify);
-  if (!strcmp("binary:logitraw", name)) return new RegLossObj(LossType::kLogisticRaw);
-  if (!strcmp("count:poisson", name)) return new PoissonRegression();
-  if (!strcmp("multi:softmax", name)) return new SoftmaxMultiClassObj(0);
-  if (!strcmp("multi:softprob", name)) return new SoftmaxMultiClassObj(1);
-  if (!strcmp("rank:pairwise", name )) return new PairwiseRankObj();
-  if (!strcmp("rank:ndcg", name)) return new LambdaRankObjNDCG();
-  if (!strcmp("rank:map", name)) return new LambdaRankObjMAP();
-  utils::Error("unknown objective function type: %s", name);
-  return NULL;
-}
-}  // namespace learner
-}  // namespace xgboost
-#endif  // XGBOOST_LEARNER_OBJECTIVE_H_
diff --git a/src/c_api.cc b/src/c_api.cc
deleted file mode 100644
index cf1b06d3c..000000000
--- a/src/c_api.cc
+++ /dev/null
@@ -1,2 +0,0 @@
-#include <xgboost/c_api.h>
-
diff --git a/src/common/math.h b/src/common/math.h
new file mode 100644
index 000000000..fd2f9d5c3
--- /dev/null
+++ b/src/common/math.h
@@ -0,0 +1,107 @@
+/*!
+ * Copyright 2015 by Contributors
+ * \file math.h
+ * \brief additional math utils
+ * \author Tianqi Chen
+ */
+#ifndef XGBOOST_COMMON_MATH_H_
+#define XGBOOST_COMMON_MATH_H_
+
+#include <utility>
+#include <vector>
+#include <cmath>
+#include <algorithm>
+
+namespace xgboost {
+namespace common {
+/*!
+ * \brief calculate the sigmoid of the input.
+ * \param x input parameter
+ * \return the transformed value.
+ */
+inline float Sigmoid(float x) {
+  return 1.0f / (1.0f + std::exp(-x));
+}
+
+/*!
+ * \brief do inplace softmax transformaton on p_rec
+ * \param p_rec the input/output vector of the values.
+ */
+inline void Softmax(std::vector<float>* p_rec) {
+  std::vector<float> &rec = *p_rec;
+  float wmax = rec[0];
+  for (size_t i = 1; i < rec.size(); ++i) {
+    wmax = std::max(rec[i], wmax);
+  }
+  double wsum = 0.0f;
+  for (size_t i = 0; i < rec.size(); ++i) {
+    rec[i] = std::exp(rec[i] - wmax);
+    wsum += rec[i];
+  }
+  for (size_t i = 0; i < rec.size(); ++i) {
+    rec[i] /= static_cast<float>(wsum);
+  }
+}
+
+/*!
+ * \brief Find the maximum iterator within the iterators
+ * \param begin The begining iterator.
+ * \param end The end iterator.
+ * \return the iterator point to the maximum value.
+ * \tparam Iterator The type of the iterator.
+ */
+template<typename Iterator>
+inline Iterator FindMaxIndex(Iterator begin, Iterator end) {
+  Iterator maxit = begin;
+  for (Iterator it = begin; it != end; ++it) {
+    if (*it > *maxit) maxit = it;
+  }
+  return maxit;
+}
+
+/*!
+ * \brief perform numerically safe logsum
+ * \param x left input operand
+ * \param y right input operand
+ * \return  log(exp(x) + exp(y))
+ */
+inline float LogSum(float x, float y) {
+  if (x < y) {
+    return y + std::log(std::exp(x - y) + 1.0f);
+  } else {
+    return x + std::log(std::exp(y - x) + 1.0f);
+  }
+}
+
+/*!
+ * \brief perform numerically safe logsum
+ * \param begin The begining iterator.
+ * \param end The end iterator.
+ * \return the iterator point to the maximum value.
+ * \tparam Iterator The type of the iterator.
+ */
+template<typename Iterator>
+inline float LogSum(Iterator begin, Iterator end) {
+  float mx = *begin;
+  for (Iterator it = begin; it != end; ++it) {
+    mx = std::max(mx, *it);
+  }
+  float sum = 0.0f;
+  for (Iterator it = begin; it != end; ++it) {
+    sum += std::exp(*it - mx);
+  }
+  return mx + std::log(sum);
+}
+
+// comparator functions for sorting pairs in descending order
+inline static bool CmpFirst(const std::pair<float, unsigned> &a,
+                            const std::pair<float, unsigned> &b) {
+  return a.first > b.first;
+}
+inline static bool CmpSecond(const std::pair<float, unsigned> &a,
+                             const std::pair<float, unsigned> &b) {
+  return a.second > b.second;
+}
+}  // namespace common
+}  // namespace xgboost
+#endif  // XGBOOST_COMMON_MATH_H_
diff --git a/src/common/random.h b/src/common/random.h
new file mode 100644
index 000000000..16441a9c9
--- /dev/null
+++ b/src/common/random.h
@@ -0,0 +1,26 @@
+/*!
+ * Copyright 2015 by Contributors
+ * \file random.h
+ * \brief Utility related to random.
+ * \author Tianqi Chen
+ */
+#ifndef XGBOOST_COMMON_RANDOM_H_
+#define XGBOOST_COMMON_RANDOM_H_
+
+#include <random>
+
+namespace xgboost {
+namespace common {
+/*!
+ * \brief Random Engine
+ */
+typedef std::mt19937 RandomEngine;
+/*!
+ * \brief global singleton of a random engine.
+ *  Only use this engine when necessary, not thread-safe.
+ */
+static RandomEngine* GlobalRandom();
+
+}  // namespace common
+}  // namespace xgboost
+#endif  // XGBOOST_COMMON_RANDOM_H_
diff --git a/src/data/data.cc b/src/data/data.cc
index d020ff0de..11bef8a9b 100644
--- a/src/data/data.cc
+++ b/src/data/data.cc
@@ -2,8 +2,8 @@
  * Copyright 2015 by Contributors
  * \file data.cc
  */
-#include <cstring>
 #include <xgboost/data.h>
+#include <cstring>
 
 namespace xgboost {
 // implementation of inline functions
@@ -35,7 +35,8 @@ void MetaInfo::LoadBinary(dmlc::Stream *fi) {
   CHECK_EQ(version, kVersion) << "MetaInfo: invalid format";
   CHECK(fi->Read(&num_row, sizeof(num_row)) == sizeof(num_row)) << "MetaInfo: invalid format";
   CHECK(fi->Read(&num_col, sizeof(num_col)) == sizeof(num_col)) << "MetaInfo: invalid format";
-  CHECK(fi->Read(&num_nonzero, sizeof(num_nonzero)) == sizeof(num_nonzero)) << "MetaInfo: invalid format";
+  CHECK(fi->Read(&num_nonzero, sizeof(num_nonzero)) == sizeof(num_nonzero))
+      << "MetaInfo: invalid format";
   CHECK(fi->Read(&labels)) <<  "MetaInfo: invalid format";
   CHECK(fi->Read(&group_ptr)) << "MetaInfo: invalid format";
   CHECK(fi->Read(&weights)) << "MetaInfo: invalid format";
@@ -45,19 +46,19 @@ void MetaInfo::LoadBinary(dmlc::Stream *fi) {
 
 // macro to dispatch according to specified pointer types
 #define DISPATCH_CONST_PTR(dtype, old_ptr, cast_ptr, proc)              \
-  switch(dtype) {                                                       \
-  case kFloat32: {                                                      \
-  const float* cast_ptr = reinterpret_cast<const float*>(old_ptr); proc; break; \
-  }                                                                     \
-  case kDouble: {                                                       \
-    const double* cast_ptr = reinterpret_cast<const double*>(old_ptr); proc; break; \
-  }                                                                     \
-  case kUInt32: {                                                       \
-    const uint32_t* cast_ptr = reinterpret_cast<const uint32_t*>(old_ptr); proc; break; \
-  }                                                                     \
-  case kUInt64: {                                                       \
-    const uint64_t* cast_ptr = reinterpret_cast<const uint64_t*>(old_ptr); proc; break; \
-  }                                                                     \
+  switch (dtype) {                                                      \
+    case kFloat32: {                                                    \
+      const float* cast_ptr = reinterpret_cast<const float*>(old_ptr); proc; break; \
+    }                                                                   \
+    case kDouble: {                                                     \
+      const double* cast_ptr = reinterpret_cast<const double*>(old_ptr); proc; break; \
+    }                                                                   \
+    case kUInt32: {                                                     \
+      const uint32_t* cast_ptr = reinterpret_cast<const uint32_t*>(old_ptr); proc; break; \
+    }                                                                   \
+    case kUInt64: {                                                     \
+      const uint64_t* cast_ptr = reinterpret_cast<const uint64_t*>(old_ptr); proc; break; \
+    }                                                                   \
     default: LOG(FATAL) << "Unknown data type" << dtype;                \
   }                                                                     \
 
diff --git a/src/data/simple_csr_source.h b/src/data/simple_csr_source.h
index 3832ba852..a9faa98e3 100644
--- a/src/data/simple_csr_source.h
+++ b/src/data/simple_csr_source.h
@@ -5,13 +5,14 @@
  *  This is an in-memory data structure that holds the data in row oriented format.
  * \author Tianqi Chen
  */
-#ifndef XGBOOST_DATA_SIMPLE_CSR_ROW_ITER_H_
-#define XGBOOST_DATA_SIMPLE_CSR_ROW_ITER_H_
+#ifndef XGBOOST_DATA_SIMPLE_CSR_SOURCE_H_
+#define XGBOOST_DATA_SIMPLE_CSR_SOURCE_H_
 
-#include <vector>
-#include <algorithm>
 #include <xgboost/base.h>
 #include <xgboost/data.h>
+#include <vector>
+#include <algorithm>
+
 
 namespace xgboost {
 /*! \brief namespace of internal data structures*/
@@ -78,4 +79,4 @@ class SimpleCSRSource : public DataSource {
 };
 }  // namespace data
 }  // namespace xgboost
-#endif  // XGBOOST_DATA_SIMPLE_CSR_ROW_ITER_H_
+#endif  // XGBOOST_DATA_SIMPLE_CSR_SOURCE_H_
diff --git a/src/objective/multi_class.cc b/src/objective/multi_class.cc
new file mode 100644
index 000000000..119a0f377
--- /dev/null
+++ b/src/objective/multi_class.cc
@@ -0,0 +1,134 @@
+/*!
+ * Copyright 2015 by Contributors
+ * \file multi_class.cc
+ * \brief Definition of multi-class classification objectives.
+ * \author Tianqi Chen
+ */
+#include <dmlc/logging.h>
+#include <dmlc/omp.h>
+#include <xgboost/objective.h>
+#include <vector>
+#include <algorithm>
+#include <utility>
+#include "../common/math.h"
+
+namespace xgboost {
+namespace obj {
+
+struct SoftmaxMultiClassParam : public dmlc::Parameter<SoftmaxMultiClassParam> {
+  int num_class;
+  // declare parameters
+  DMLC_DECLARE_PARAMETER(SoftmaxMultiClassParam) {
+    DMLC_DECLARE_FIELD(num_class).set_lower_bound(1)
+        .describe("Number of output class in the multi-class classification.");
+  }
+};
+
+class SoftmaxMultiClassObj : public ObjFunction {
+ public:
+  explicit SoftmaxMultiClassObj(bool output_prob)
+      : output_prob_(output_prob) {
+  }
+  void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
+    param_.Init(args);
+  }
+  void GetGradient(const std::vector<float>& preds,
+                   const MetaInfo& info,
+                   int iter,
+                   std::vector<bst_gpair>* out_gpair) override {
+    CHECK_NE(info.labels.size(), 0) << "label set cannot be empty";
+    CHECK(preds.size() == (static_cast<size_t>(param_.num_class) * info.labels.size()))
+        << "SoftmaxMultiClassObj: label size and pred size does not match";
+    out_gpair->resize(preds.size());
+    const int nclass = param_.num_class;
+    const omp_ulong ndata = static_cast<omp_ulong>(preds.size() / nclass);
+
+    int label_error = 0;
+    #pragma omp parallel
+    {
+      std::vector<float> rec(nclass);
+      #pragma omp for schedule(static)
+      for (omp_ulong i = 0; i < ndata; ++i) {
+        for (int k = 0; k < nclass; ++k) {
+          rec[k] = preds[i * nclass + k];
+        }
+        common::Softmax(&rec);
+        int label = static_cast<int>(info.labels[i]);
+        if (label < 0 || label >= nclass)  {
+          label_error = label; label = 0;
+        }
+        const float wt = info.GetWeight(i);
+        for (int k = 0; k < nclass; ++k) {
+          float p = rec[k];
+          const float h = 2.0f * p * (1.0f - p) * wt;
+          if (label == k) {
+            out_gpair->at(i * nclass + k) = bst_gpair((p - 1.0f) * wt, h);
+          } else {
+            out_gpair->at(i * nclass + k) = bst_gpair(p* wt, h);
+          }
+        }
+      }
+    }
+    CHECK(label_error >= 0 && label_error < nclass)
+        << "SoftmaxMultiClassObj: label must be in [0, num_class),"
+        << " num_class=" << nclass
+        << " but found " << label_error << " in label.";
+  }
+  void PredTransform(std::vector<float>* io_preds) override {
+    this->Transform(io_preds, output_prob_);
+  }
+  void EvalTransform(std::vector<float>* io_preds) override {
+    this->Transform(io_preds, true);
+  }
+  const char* DefaultEvalMetric() const override {
+    return "merror";
+  }
+
+ private:
+  inline void Transform(std::vector<float> *io_preds, bool prob) {
+    std::vector<float> &preds = *io_preds;
+    std::vector<float> tmp;
+    const int nclass = param_.num_class;
+    const omp_ulong ndata = static_cast<omp_ulong>(preds.size() / nclass);
+    if (!prob) tmp.resize(ndata);
+
+    #pragma omp parallel
+    {
+      std::vector<float> rec(nclass);
+      #pragma omp for schedule(static)
+      for (omp_ulong j = 0; j < ndata; ++j) {
+        for (int k = 0; k < nclass; ++k) {
+          rec[k] = preds[j * nclass + k];
+        }
+        if (!prob) {
+          tmp[j] = static_cast<float>(
+              common::FindMaxIndex(rec.begin(), rec.end()) - rec.begin());
+        } else {
+          common::Softmax(&rec);
+          for (int k = 0; k < nclass; ++k) {
+            preds[j * nclass + k] = rec[k];
+          }
+        }
+      }
+    }
+    if (!prob) preds = tmp;
+  }
+  // output probability
+  bool output_prob_;
+  // parameter
+  SoftmaxMultiClassParam param_;
+};
+
+// register the ojective functions
+DMLC_REGISTER_PARAMETER(SoftmaxMultiClassParam);
+
+XGBOOST_REGISTER_OBJECTIVE(SoftmaxMultiClass, "multi:softmax")
+.describe("Softmax for multi-class classification, output class index.")
+.set_body([]() { return new SoftmaxMultiClassObj(false); });
+
+XGBOOST_REGISTER_OBJECTIVE(SoftprobMultiClass, "multi:softprob")
+.describe("Softmax for multi-class classification, output probability distribution.")
+.set_body([]() { return new SoftmaxMultiClassObj(true); });
+
+}  // namespace obj
+}  // namespace xgboost
diff --git a/src/objective/objective.cc b/src/objective/objective.cc
new file mode 100644
index 000000000..acba980c2
--- /dev/null
+++ b/src/objective/objective.cc
@@ -0,0 +1,17 @@
+/*!
+ * Copyright 2015 by Contributors
+ * \file objective.cc
+ * \brief global objective function definition.
+ */
+#include <xgboost/objective.h>
+
+namespace dmlc {
+DMLC_REGISTRY_ENABLE(::xgboost::ObjFunctionReg);
+}  // namespace dmlc
+
+namespace xgboost {
+/*! \brief namespace of objective function */
+namespace obj {
+}  // namespace obj
+}  // namespace xgboost
+
diff --git a/src/objective/rank.cc b/src/objective/rank.cc
new file mode 100644
index 000000000..649661ced
--- /dev/null
+++ b/src/objective/rank.cc
@@ -0,0 +1,329 @@
+/*!
+ * Copyright 2015 by Contributors
+ * \file rank.cc
+ * \brief Definition of rank loss.
+ * \author Tianqi Chen, Kailong Chen
+ */
+#include <dmlc/logging.h>
+#include <dmlc/omp.h>
+#include <xgboost/objective.h>
+#include <vector>
+#include <algorithm>
+#include <utility>
+#include "../common/math.h"
+#include "../common/random.h"
+
+namespace xgboost {
+namespace obj {
+
+struct LambdaRankParam : public dmlc::Parameter<LambdaRankParam> {
+  int num_pairsample;
+  float fix_list_weight;
+  // declare parameters
+  DMLC_DECLARE_PARAMETER(LambdaRankParam) {
+    DMLC_DECLARE_FIELD(num_pairsample).set_lower_bound(1).set_default(1)
+        .describe("Number of pair generated for each instance.");
+    DMLC_DECLARE_FIELD(fix_list_weight).set_lower_bound(0.0f).set_default(0.0f)
+        .describe("Normalize the weight of each list by this value,"
+                  " if equals 0, no effect will happen");
+
+  }
+};
+
+// objective for lambda rank
+class LambdaRankObj : public ObjFunction {
+ public:
+  void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
+    param_.Init(args);
+  }
+  void GetGradient(const std::vector<float>& preds,
+                   const MetaInfo& info,
+                   int iter,
+                   std::vector<bst_gpair>* out_gpair) override {
+    CHECK_EQ(preds.size(),info.labels.size()) << "label size predict size not match";
+    std::vector<bst_gpair>& gpair = *out_gpair;
+
+    gpair.resize(preds.size());
+    // quick consistency when group is not available
+    std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(info.labels.size());
+    const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
+    CHECK(gptr.size() != 0 && gptr.back() == info.labels.size())
+        << "group structure not consistent with #rows";
+    const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
+    #pragma omp parallel
+    {
+      // parall construct, declare random number generator here, so that each
+      // thread use its own random number generator, seed by thread id and current iteration
+      common::RandomEngine rnd(iter * 1111 + omp_get_thread_num());
+
+      std::vector<LambdaPair> pairs;
+      std::vector<ListEntry>  lst;
+      std::vector< std::pair<float, unsigned> > rec;
+      #pragma omp for schedule(static)
+      for (bst_omp_uint k = 0; k < ngroup; ++k) {
+        lst.clear(); pairs.clear();
+        for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) {
+          lst.push_back(ListEntry(preds[j], info.labels[j], j));
+          gpair[j] = bst_gpair(0.0f, 0.0f);
+        }
+        std::sort(lst.begin(), lst.end(), ListEntry::CmpPred);
+        rec.resize(lst.size());
+        for (unsigned i = 0; i < lst.size(); ++i) {
+          rec[i] = std::make_pair(lst[i].label, i);
+        }
+        std::sort(rec.begin(), rec.end(), common::CmpFirst);
+        // enumerate buckets with same label, for each item in the lst, grab another sample randomly
+        for (unsigned i = 0; i < rec.size(); ) {
+          unsigned j = i + 1;
+          while (j < rec.size() && rec[j].first == rec[i].first) ++j;
+          // bucket in [i,j), get a sample outside bucket
+          unsigned nleft = i, nright = static_cast<unsigned>(rec.size() - j);
+          if (nleft + nright != 0) {
+            int nsample = param_.num_pairsample;
+            while (nsample --) {
+              for (unsigned pid = i; pid < j; ++pid) {
+                unsigned ridx = std::uniform_int_distribution<unsigned>(0, nleft + nright - 1)(rnd);
+                if (ridx < nleft) {
+                  pairs.push_back(LambdaPair(rec[ridx].second, rec[pid].second));
+                } else {
+                  pairs.push_back(LambdaPair(rec[pid].second, rec[ridx+j-i].second));
+                }
+              }
+            }
+          }
+          i = j;
+        }
+        // get lambda weight for the pairs
+        this->GetLambdaWeight(lst, &pairs);
+        // rescale each gradient and hessian so that the lst have constant weighted
+        float scale = 1.0f / param_.num_pairsample;
+        if (param_.fix_list_weight != 0.0f) {
+          scale *= param_.fix_list_weight / (gptr[k + 1] - gptr[k]);
+        }
+        for (size_t i = 0; i < pairs.size(); ++i) {
+          const ListEntry &pos = lst[pairs[i].pos_index];
+          const ListEntry &neg = lst[pairs[i].neg_index];
+          const float w = pairs[i].weight * scale;
+          const float eps = 1e-16f;
+          float p = common::Sigmoid(pos.pred - neg.pred);
+          float g = p - 1.0f;
+          float h = std::max(p * (1.0f - p), eps);
+          // accumulate gradient and hessian in both pid, and nid
+          gpair[pos.rindex].grad += g * w;
+          gpair[pos.rindex].hess += 2.0f * w * h;
+          gpair[neg.rindex].grad -= g * w;
+          gpair[neg.rindex].hess += 2.0f * w * h;
+        }
+      }
+    }
+  }
+  const char* DefaultEvalMetric(void) const override {
+    return "map";
+  }
+
+ protected:
+  /*! \brief helper information in a list */
+  struct ListEntry {
+    /*! \brief the predict score we in the data */
+    float pred;
+    /*! \brief the actual label of the entry */
+    float label;
+    /*! \brief row index in the data matrix */
+    unsigned rindex;
+    // constructor
+    ListEntry(float pred, float label, unsigned rindex)
+        : pred(pred), label(label), rindex(rindex) {}
+    // comparator by prediction
+    inline static bool CmpPred(const ListEntry &a, const ListEntry &b) {
+      return a.pred > b.pred;
+    }
+    // comparator by label
+    inline static bool CmpLabel(const ListEntry &a, const ListEntry &b) {
+      return a.label > b.label;
+    }
+  };
+  /*! \brief a pair in the lambda rank */
+  struct LambdaPair {
+    /*! \brief positive index: this is a position in the list */
+    unsigned pos_index;
+    /*! \brief negative index: this is a position in the list */
+    unsigned neg_index;
+    /*! \brief weight to be filled in */
+    float weight;
+    // constructor
+    LambdaPair(unsigned pos_index, unsigned neg_index)
+        : pos_index(pos_index), neg_index(neg_index), weight(1.0f) {}
+  };
+  /*!
+   * \brief get lambda weight for existing pairs
+   * \param list a list that is sorted by pred score
+   * \param io_pairs record of pairs, containing the pairs to fill in weights
+   */
+  virtual void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
+                               std::vector<LambdaPair> *io_pairs) = 0;
+
+ private:
+  LambdaRankParam param_;
+};
+
+class PairwiseRankObj: public LambdaRankObj{
+ protected:
+  void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
+                       std::vector<LambdaPair> *io_pairs) override {}
+};
+
+// beta version: NDCG lambda rank
+class LambdaRankObjNDCG : public LambdaRankObj {
+ protected:
+  void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
+                       std::vector<LambdaPair> *io_pairs) override {
+    std::vector<LambdaPair> &pairs = *io_pairs;
+    float IDCG;
+    {
+      std::vector<float> labels(sorted_list.size());
+      for (size_t i = 0; i < sorted_list.size(); ++i) {
+        labels[i] = sorted_list[i].label;
+      }
+      std::sort(labels.begin(), labels.end(), std::greater<float>());
+      IDCG = CalcDCG(labels);
+    }
+    if (IDCG == 0.0) {
+      for (size_t i = 0; i < pairs.size(); ++i) {
+        pairs[i].weight = 0.0f;
+      }
+    } else {
+      IDCG = 1.0f / IDCG;
+      for (size_t i = 0; i < pairs.size(); ++i) {
+        unsigned pos_idx = pairs[i].pos_index;
+        unsigned neg_idx = pairs[i].neg_index;
+        float pos_loginv = 1.0f / std::log(pos_idx + 2.0f);
+        float neg_loginv = 1.0f / std::log(neg_idx + 2.0f);
+        int pos_label = static_cast<int>(sorted_list[pos_idx].label);
+        int neg_label = static_cast<int>(sorted_list[neg_idx].label);
+        float original =
+            ((1 << pos_label) - 1) * pos_loginv + ((1 << neg_label) - 1) * neg_loginv;
+        float changed  =
+            ((1 << neg_label) - 1) * pos_loginv + ((1 << pos_label) - 1) * neg_loginv;
+        float delta = (original - changed) * IDCG;
+        if (delta < 0.0f) delta = - delta;
+        pairs[i].weight = delta;
+      }
+    }
+  }
+  inline static float CalcDCG(const std::vector<float> &labels) {
+    double sumdcg = 0.0;
+    for (size_t i = 0; i < labels.size(); ++i) {
+      const unsigned rel = static_cast<unsigned>(labels[i]);
+      if (rel != 0) {
+        sumdcg += ((1 << rel) - 1) / std::log(static_cast<float>(i + 2));
+      }
+    }
+    return static_cast<float>(sumdcg);
+  }
+};
+
+class LambdaRankObjMAP : public LambdaRankObj {
+ protected:
+  struct MAPStats {
+    /*! \brief the accumulated precision */
+    float ap_acc;
+    /*!
+     * \brief the accumulated precision,
+     *   assuming a positive instance is missing
+     */
+    float ap_acc_miss;
+    /*!
+     * \brief the accumulated precision,
+     * assuming that one more positive instance is inserted ahead
+     */
+    float ap_acc_add;
+    /* \brief the accumulated positive instance count */
+    float hits;
+    MAPStats(void) {}
+    MAPStats(float ap_acc, float ap_acc_miss, float ap_acc_add, float hits)
+        : ap_acc(ap_acc), ap_acc_miss(ap_acc_miss), ap_acc_add(ap_acc_add), hits(hits) {}
+  };
+  /*!
+   * \brief Obtain the delta MAP if trying to switch the positions of instances in index1 or index2
+   *        in sorted triples
+   * \param sorted_list the list containing entry information
+   * \param index1,index2 the instances switched
+   * \param map_stats a vector containing the accumulated precisions for each position in a list
+   */
+  inline float GetLambdaMAP(const std::vector<ListEntry> &sorted_list,
+                            int index1, int index2,
+                            std::vector<MAPStats> *p_map_stats) {
+    std::vector<MAPStats> &map_stats = *p_map_stats;
+    if (index1 == index2 || map_stats[map_stats.size() - 1].hits == 0) {
+      return 0.0f;
+    }
+    if (index1 > index2) std::swap(index1, index2);
+    float original = map_stats[index2].ap_acc;
+    if (index1 != 0) original -= map_stats[index1 - 1].ap_acc;
+    float changed = 0;
+    float label1 = sorted_list[index1].label > 0.0f ? 1.0f : 0.0f;
+    float label2 = sorted_list[index2].label > 0.0f ? 1.0f : 0.0f;
+    if (label1 == label2) {
+      return 0.0;
+    } else if (label1 < label2) {
+      changed += map_stats[index2 - 1].ap_acc_add - map_stats[index1].ap_acc_add;
+      changed += (map_stats[index1].hits + 1.0f) / (index1 + 1);
+    } else {
+      changed += map_stats[index2 - 1].ap_acc_miss - map_stats[index1].ap_acc_miss;
+      changed += map_stats[index2].hits / (index2 + 1);
+    }
+    float ans = (changed - original) / (map_stats[map_stats.size() - 1].hits);
+    if (ans < 0) ans = -ans;
+    return ans;
+  }
+  /*
+   * \brief obtain preprocessing results for calculating delta MAP
+   * \param sorted_list the list containing entry information
+   * \param map_stats a vector containing the accumulated precisions for each position in a list
+   */
+  inline void GetMAPStats(const std::vector<ListEntry> &sorted_list,
+                          std::vector<MAPStats> *p_map_acc) {
+    std::vector<MAPStats> &map_acc = *p_map_acc;
+    map_acc.resize(sorted_list.size());
+    float hit = 0, acc1 = 0, acc2 = 0, acc3 = 0;
+    for (size_t i = 1; i <= sorted_list.size(); ++i) {
+      if (sorted_list[i - 1].label > 0.0f) {
+        hit++;
+        acc1 += hit / i;
+        acc2 += (hit - 1) / i;
+        acc3 += (hit + 1) / i;
+      }
+      map_acc[i - 1] = MAPStats(acc1, acc2, acc3, hit);
+    }
+  }
+  void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
+                       std::vector<LambdaPair> *io_pairs) override {
+    std::vector<LambdaPair> &pairs = *io_pairs;
+    std::vector<MAPStats> map_stats;
+    GetMAPStats(sorted_list, &map_stats);
+    for (size_t i = 0; i < pairs.size(); ++i) {
+      pairs[i].weight =
+          GetLambdaMAP(sorted_list, pairs[i].pos_index,
+                       pairs[i].neg_index, &map_stats);
+    }
+  }
+};
+
+// register the ojective functions
+DMLC_REGISTER_PARAMETER(LambdaRankParam);
+
+XGBOOST_REGISTER_OBJECTIVE(PairwieRankObj, "rank:pairwise")
+.describe("Pairwise rank objective.")
+.set_body([]() { return new PairwiseRankObj(); });
+
+XGBOOST_REGISTER_OBJECTIVE(LambdaRankNDCG, "rank:ndcg")
+.describe("LambdaRank with NDCG as objective.")
+.set_body([]() { return new LambdaRankObjNDCG(); });
+
+XGBOOST_REGISTER_OBJECTIVE(LambdaRankObjMAP, "rank:map")
+.describe("LambdaRank with MAP as objective.")
+.set_body([]() { return new LambdaRankObjMAP(); });
+
+}  // namespace obj
+}  // namespace xgboost
+
diff --git a/src/objective/regression.cc b/src/objective/regression.cc
new file mode 100644
index 000000000..2c0db1e02
--- /dev/null
+++ b/src/objective/regression.cc
@@ -0,0 +1,216 @@
+/*!
+ * Copyright 2015 by Contributors
+ * \file regression.cc
+ * \brief Definition of single-value regression and classification objectives.
+ * \author Tianqi Chen, Kailong Chen
+ */
+#include <dmlc/logging.h>
+#include <dmlc/omp.h>
+#include <xgboost/objective.h>
+#include <vector>
+#include <algorithm>
+#include <utility>
+#include "../common/math.h"
+
+namespace xgboost {
+namespace obj {
+// common regressions
+// linear regression
+struct LinearSquareLoss {
+  static float PredTransform(float x) { return x; }
+  static bool CheckLabel(float x) { return true; }
+  static float FirstOrderGradient(float predt, float label) { return predt - label; }
+  static float SecondOrderGradient(float predt, float label) { return 1.0f; }
+  static float ProbToMargin(float base_score) { return base_score; }
+  static const char* LabelErrorMsg() { return ""; }
+  static const char* DefaultEvalMetric() { return "rmse"; }
+};
+// logistic loss for probability regression task
+struct LogisticRegression {
+  static float PredTransform(float x) { return common::Sigmoid(x); }
+  static bool CheckLabel(float x) { return x >= 0.0f && x <= 1.0f; }
+  static float FirstOrderGradient(float predt, float label) { return predt - label; }
+  static float SecondOrderGradient(float predt, float label) {
+    const float eps = 1e-16f;
+    return std::max(predt * (1.0f - predt), eps);
+  }
+  static float ProbToMargin(float base_score) {
+    CHECK(base_score > 0.0f && base_score < 1.0f)
+        << "base_score must be in (0,1) for logistic loss";
+    return -std::log(1.0f / base_score - 1.0f);
+  }
+  static const char* LabelErrorMsg() {
+    return "label must be in [0,1] for logistic regression";
+  }
+  static const char* DefaultEvalMetric() { return "rmse"; }
+};
+// logistic loss for binary classification task.
+struct LogisticClassification : public LogisticRegression {
+  static const char* DefaultEvalMetric() { return "error"; }
+};
+// logistic loss, but predict un-transformed margin
+struct LogisticRaw : public LogisticRegression {
+  static float PredTransform(float x) { return x; }
+  static float FirstOrderGradient(float predt, float label) {
+    predt = common::Sigmoid(predt);
+    return predt - label;
+  }
+  static float SecondOrderGradient(float predt, float label) {
+    const float eps = 1e-16f;
+    predt = common::Sigmoid(predt);
+    return std::max(predt * (1.0f - predt), eps);
+  }
+  static const char* DefaultEvalMetric() { return "auc"; }
+};
+
+struct RegLossParam : public dmlc::Parameter<RegLossParam> {
+  float scale_pos_weight;
+  // declare parameters
+  DMLC_DECLARE_PARAMETER(RegLossParam) {
+    DMLC_DECLARE_FIELD(scale_pos_weight).set_default(1.0f).set_lower_bound(0.0f)
+        .describe("Scale the weight of positive examples by this factor");
+  }
+};
+
+// regression los function
+template<typename Loss>
+class RegLossObj : public ObjFunction {
+ public:
+  void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
+    param_.Init(args);
+  }
+  void GetGradient(const std::vector<float> &preds,
+                   const MetaInfo &info,
+                   int iter,
+                   std::vector<bst_gpair> *out_gpair) override {
+    CHECK_NE(info.labels.size(), 0) << "label set cannot be empty";
+    CHECK_EQ(preds.size(), info.labels.size()) << "labels are not correctly provided";
+    out_gpair->resize(preds.size());
+    // check if label in range
+    bool label_correct = true;
+    // start calculating gradient
+    const omp_ulong ndata = static_cast<omp_ulong>(preds.size());
+    #pragma omp parallel for schedule(static)
+    for (omp_ulong i = 0; i < ndata; ++i) {
+      float p = Loss::PredTransform(preds[i]);
+      float w = info.GetWeight(i);
+      if (info.labels[i] == 1.0f) w *= param_.scale_pos_weight;
+      if (Loss::CheckLabel(info.labels[i])) label_correct = false;
+      out_gpair->at(i) = bst_gpair(Loss::FirstOrderGradient(p, info.labels[i]) * w,
+                                   Loss::SecondOrderGradient(p, info.labels[i]) * w);
+    }
+    if (!label_correct) {
+      LOG(FATAL) << Loss::LabelErrorMsg();
+    }
+  }
+  virtual const char* DefaultEvalMetric() const {
+    return Loss::DefaultEvalMetric();
+  }
+  virtual void PredTransform(std::vector<float> *io_preds) {
+    std::vector<float> &preds = *io_preds;
+    const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
+    #pragma omp parallel for schedule(static)
+    for (bst_omp_uint j = 0; j < ndata; ++j) {
+      preds[j] = Loss::PredTransform(preds[j]);
+    }
+  }
+  virtual float ProbToMargin(float base_score) const {
+    return Loss::ProbToMargin(base_score);
+  }
+
+ protected:
+  RegLossParam param_;
+};
+
+// register the ojective functions
+DMLC_REGISTER_PARAMETER(RegLossParam);
+
+XGBOOST_REGISTER_OBJECTIVE(LinearRegression, "reg:linear")
+.describe("Linear regression.")
+.set_body([]() { return new RegLossObj<LinearSquareLoss>(); });
+
+XGBOOST_REGISTER_OBJECTIVE(LogisticRegression, "reg:logistic")
+.describe("Logistic regression for probability regression task.")
+.set_body([]() { return new RegLossObj<LogisticRegression>(); });
+
+XGBOOST_REGISTER_OBJECTIVE(LogisticClassification, "binary:logistic")
+.describe("Logistic regression for binary classification task.")
+.set_body([]() { return new RegLossObj<LogisticClassification>(); });
+
+XGBOOST_REGISTER_OBJECTIVE(LogisticRaw, "binary:logitraw")
+.describe("Logistic regression for classification, output score before logistic transformation")
+.set_body([]() { return new RegLossObj<LogisticRaw>(); });
+
+// declare parameter
+struct PoissonRegressionParam : public dmlc::Parameter<PoissonRegressionParam> {
+  float max_delta_step;
+  DMLC_DECLARE_PARAMETER(PoissonRegressionParam) {
+    DMLC_DECLARE_FIELD(max_delta_step).set_lower_bound(0.0f)
+        .describe("Maximum delta step we allow each weight estimation to be." \
+                  " This parameter is required for possion regression.");
+  }
+};
+
+// poisson regression for count
+class PoissonRegression : public ObjFunction {
+ public:
+  // declare functions
+  void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
+    param_.Init(args);
+  }
+
+  void GetGradient(const std::vector<float> &preds,
+                   const MetaInfo &info,
+                   int iter,
+                   std::vector<bst_gpair> *out_gpair) override {
+    CHECK_NE(info.labels.size(), 0) << "label set cannot be empty";
+    CHECK_EQ(preds.size(), info.labels.size()) << "labels are not correctly provided";
+    out_gpair->resize(preds.size());
+    // check if label in range
+    bool label_correct = true;
+    // start calculating gradient
+    const omp_ulong ndata = static_cast<omp_ulong>(preds.size()); // NOLINT(*)
+    #pragma omp parallel for schedule(static)
+    for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
+      float p = preds[i];
+      float w = info.GetWeight(i);
+      float y = info.labels[i];
+      if (y >= 0.0f) {
+        out_gpair->at(i) = bst_gpair((std::exp(p) - y) * w,
+                                     std::exp(p + param_.max_delta_step) * w);
+      } else {
+        label_correct = false;
+      }
+    }
+    CHECK(label_correct) << "PoissonRegression: label must be nonnegative";
+  }
+  void PredTransform(std::vector<float> *io_preds) override {
+    std::vector<float> &preds = *io_preds;
+    const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
+    #pragma omp parallel for schedule(static)
+    for (long j = 0; j < ndata; ++j) {  // NOLINT(*)
+      preds[j] = std::exp(preds[j]);
+    }
+  }
+  void EvalTransform(std::vector<float> *io_preds) override {
+    PredTransform(io_preds);
+  }
+  float ProbToMargin(float base_score) const override {
+    return std::log(base_score);
+  }
+  const char* DefaultEvalMetric(void) const override {
+    return "poisson-nloglik";
+  }
+
+ private:
+  PoissonRegressionParam param_;
+};
+
+// register the ojective functions
+DMLC_REGISTER_PARAMETER(PoissonRegressionParam);
+
+XGBOOST_REGISTER_OBJECTIVE(PoissonRegression, "count:poisson")
+.describe("Possion regression for count data.")
+.set_body([]() { return new PoissonRegression(); });
+}  // namespace obj
+}  // namespace xgboost