add cvgrad stats, simplify data

2014-08-24 20:07:16 -07:00 · 2014-08-24 20:07:16 -07:00 · 6daa1c365d
commit 6daa1c365d
parent c640485f1d
4 changed files with 113 additions and 28 deletions
--- a/4
+++ b/4
@ -5,9 +5,9 @@ export LDFLAGS= -pthread -lm
 # add include path to Rinternals.h here
 ifeq ($(no_omp),1)
-	export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas -DDISABLE_OPENMP 
+	export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas -DDISABLE_OPENMP -funroll-loops 
 else
-	export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fopenmp
+	export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fopenmp -funroll-loops 
 endif
 # expose these flags to R CMD SHLIB
--- a/src/tree/param.h
+++ b/src/tree/param.h
@ -22,10 +22,10 @@ struct TrainParam{
  //----- the rest parameters are less important ----
  // minimum amount of hessian(weight) allowed in a child
  float min_child_weight;
-  // weight decay parameter used to control leaf fitting
+  // L2 regularization factor
  float reg_lambda;
-  // reg method
+  // L1 regularization factor
-  int reg_method;
+  float reg_alpha;
  // default direction choice
  int default_direction;
  // whether we want to do subsample
@ -36,6 +36,8 @@ struct TrainParam{
  float colsample_bytree;
  // speed optimization for dense column
  float opt_dense_col;
  // leaf vector size
  int size_leaf_vector;
  // number of threads to be used for tree construction,
  // if OpenMP is enabled, if equals 0, use system default
  int nthread;
@ -45,13 +47,14 @@ struct TrainParam{
    min_child_weight = 1.0f;
    max_depth = 6;
    reg_lambda = 1.0f;
-    reg_method = 2;
+    reg_alpha = 0.0f;
    default_direction = 0;
    subsample = 1.0f;
    colsample_bytree = 1.0f;
    colsample_bylevel = 1.0f;
    opt_dense_col = 1.0f;
    nthread = 0;
    size_leaf_vector = 0;
  }
  /*! 
   * \brief set parameters from outside 
@ -63,15 +66,17 @@ struct TrainParam{
    if (!strcmp(name, "gamma")) min_split_loss = static_cast<float>(atof(val));
    if (!strcmp(name, "eta")) learning_rate = static_cast<float>(atof(val));
    if (!strcmp(name, "lambda")) reg_lambda = static_cast<float>(atof(val));
    if (!strcmp(name, "alpha")) reg_alpha = static_cast<float>(atof(val));
    if (!strcmp(name, "learning_rate")) learning_rate = static_cast<float>(atof(val));
    if (!strcmp(name, "min_child_weight")) min_child_weight = static_cast<float>(atof(val));
    if (!strcmp(name, "min_split_loss")) min_split_loss = static_cast<float>(atof(val));
    if (!strcmp(name, "reg_lambda")) reg_lambda = static_cast<float>(atof(val));
-    if (!strcmp(name, "reg_method")) reg_method = static_cast<float>(atof(val));
+    if (!strcmp(name, "reg_alpha")) reg_alpha = static_cast<float>(atof(val));
    if (!strcmp(name, "subsample")) subsample = static_cast<float>(atof(val));
    if (!strcmp(name, "colsample_bylevel")) colsample_bylevel = static_cast<float>(atof(val));
    if (!strcmp(name, "colsample_bytree")) colsample_bytree  = static_cast<float>(atof(val));
    if (!strcmp(name, "opt_dense_col")) opt_dense_col = static_cast<float>(atof(val));
    if (!strcmp(name, "size_leaf_vector")) size_leaf_vector = atoi(val);
    if (!strcmp(name, "max_depth")) max_depth = atoi(val);
    if (!strcmp(name, "nthread")) nthread = atoi(val);
    if (!strcmp(name, "default_direction")) {
@ -82,31 +87,31 @@ struct TrainParam{
  }
  // calculate the cost of loss function
  inline double CalcGain(double sum_grad, double sum_hess) const {
-    if (sum_hess < min_child_weight) {
+    if (sum_hess < min_child_weight) return 0.0;
-      return 0.0;
+    if (reg_alpha == 0.0f) {
      return Sqr(sum_grad) / (sum_hess + reg_lambda);
    } else {
      return Sqr(ThresholdL1(sum_grad, reg_alpha)) / (sum_hess + reg_lambda); 
    }
-    switch (reg_method) {
+  }
-      case 1 : return Sqr(ThresholdL1(sum_grad, reg_lambda)) / sum_hess;
+  // calculate cost of loss function with four stati
-      case 2 : return Sqr(sum_grad) / (sum_hess + reg_lambda);
+  inline double CalcGain(double sum_grad, double sum_hess,
-      case 3 : return
+                         double test_grad, double test_hess) const {
-          Sqr(ThresholdL1(sum_grad, 0.5 * reg_lambda)) /
+    double w = CalcWeight(sum_grad, sum_hess);
-          (sum_hess + 0.5 * reg_lambda);
+    double ret = test_grad * w  + 0.5 * (test_hess + reg_lambda) * Sqr(w);
-      default: return Sqr(sum_grad) / sum_hess;
+    if (reg_alpha == 0.0f) {
      return 2.0 * ret;
    } else {
      return 2.0 * (ret + reg_alpha * std::abs(w));
    }
  }
  // calculate weight given the statistics
  inline double CalcWeight(double sum_grad, double sum_hess) const {
-    if (sum_hess < min_child_weight) {
+    if (sum_hess < min_child_weight) return 0.0;
-      return 0.0;
+    if (reg_alpha == 0.0f) {
      return -sum_grad / (sum_hess + reg_lambda);
    } else {
-      switch (reg_method) {
+      return -ThresholdL1(sum_grad, reg_alpha) / (sum_hess + reg_lambda);
        case 1: return - ThresholdL1(sum_grad, reg_lambda) / sum_hess;
        case 2: return - sum_grad / (sum_hess + reg_lambda);
        case 3: return
            - ThresholdL1(sum_grad, 0.5 * reg_lambda) /
            (sum_hess + 0.5 * reg_lambda);
        default: return - sum_grad / sum_hess;
      }
    }
  }
  /*! \brief whether need forward small to big search: default right */
@ -153,6 +158,9 @@ struct GradStats {
  inline void Clear(void) {
    sum_grad = sum_hess = 0.0f;
  }
  /*! \brief check if necessary information is ready */
  inline static void CheckInfo(const BoosterInfo &info) {
  }
  /*!
   * \brief accumulate statistics,
   * \param gpair the vector storing the gradient statistics
@ -188,14 +196,88 @@ struct GradStats {
  }
  /*! \brief set leaf vector value based on statistics */
  inline void SetLeafVec(const TrainParam &param, bst_float *vec) const{
-  }
+  }  
- protected:
+  // constructor to allow inheritance
  GradStats(void) {}
  /*! \brief add statistics to the data */
  inline void Add(double grad, double hess) {
    sum_grad += grad; sum_hess += hess;
  }
 };
 /*! \brief vectorized cv statistics */
 template<int vsize>
 struct CVGradStats : public GradStats {
  // additional statistics
  GradStats train[vsize], valid[vsize];
  // constructor
  explicit CVGradStats(const TrainParam &param) 
      : GradStats(param) {
    utils::Check(param.size_leaf_vector == vsize,
                 "CVGradStats: vsize must match size_leaf_vector");
  }
  /*! \brief check if necessary information is ready */
  inline static void CheckInfo(const BoosterInfo &info) {
    utils::Check(info.fold_index.size() != 0,
                 "CVGradStats: require fold_index");
  }
  /*! \brief clear the statistics */
  inline void Clear(void) {
    GradStats::Clear();
    for (int i = 0; i < vsize; ++i) {
      train[i].Clear(); valid[i].Clear();
    }
  }
  inline void Add(const std::vector<bst_gpair> &gpair,
                  const BoosterInfo &info,
                  bst_uint ridx) {
    GradStats::Add(gpair[ridx].grad, gpair[ridx].hess);
    const size_t step = info.fold_index.size();
    for (int i = 0; i < vsize; ++i) {
      const bst_gpair &b = gpair[(i + 1) * step + ridx];
      if (info.fold_index[ridx] == i) {
        valid[i].Add(b.grad, b.hess);
      } else {
        train[i].Add(b.grad, b.hess);
      }
    }
  }
  /*! \brief calculate gain of the solution */
  inline double CalcGain(const TrainParam &param) const {
    double ret = 0.0;
    for (int i = 0; i < vsize; ++i) {
      ret += param.CalcGain(train[i].sum_grad,
                            train[i].sum_hess,
                            vsize * valid[i].sum_grad,
                            vsize * valid[i].sum_hess);      
    }
    return ret;
  }
  /*! \brief add statistics to the data */
  inline void Add(const CVGradStats &b) {
    GradStats::Add(b);
    for (int i = 0; i < vsize; ++i) {
      train[i].Add(b.train[i]);
      valid[i].Add(b.valid[i]);
    }
  }
  /*! \brief set current value to a - b */
  inline void SetSubstract(const CVGradStats &a, const CVGradStats &b) {
    GradStats::SetSubstract(a, b);
    for (int i = 0; i < vsize; ++i) {
      train[i].SetSubstract(a.train[i], b.train[i]);
      valid[i].SetSubstract(a.valid[i], b.valid[i]);
    }
  }
  /*! \brief set leaf vector value based on statistics */
  inline void SetLeafVec(const TrainParam &param, bst_float *vec) const{
    for (int i = 0; i < vsize; ++i) {
      vec[i] = param.learning_rate *
          param.CalcWeight(train[i].sum_grad, train[i].sum_hess);
    }
  }
 };
 /*! 
 * \brief statistics that is helpful to store 
 *   and represent a split solution for the tree
--- a/src/tree/updater.h
+++ b/src/tree/updater.h
@ -62,6 +62,8 @@ inline IUpdater<FMatrix>* CreateUpdater(const char *name) {
  if (!strcmp(name, "prune")) return new TreePruner<FMatrix>();
  if (!strcmp(name, "refresh")) return new TreeRefresher<FMatrix, GradStats>();
  if (!strcmp(name, "grow_colmaker")) return new ColMaker<FMatrix, GradStats>();
  if (!strcmp(name, "grow_colmaker2")) return new ColMaker<FMatrix, CVGradStats<2> >();
  if (!strcmp(name, "grow_colmaker5")) return new ColMaker<FMatrix, CVGradStats<5> >();
  utils::Error("unknown updater:%s", name);
  return NULL;
 }
--- a/src/tree/updater_colmaker-inl.hpp
+++ b/src/tree/updater_colmaker-inl.hpp
@ -27,6 +27,7 @@ class ColMaker: public IUpdater<FMatrix> {
                      const FMatrix &fmat,
                      const BoosterInfo &info,
                      const std::vector<RegTree*> &trees) {
    TStats::CheckInfo(info);
    // rescale learning rate according to size of trees
    float lr = param.learning_rate;
    param.learning_rate = lr / trees.size();