check unity back

2014-08-29 18:35:26 -07:00 · 2014-08-29 18:35:26 -07:00 · ce2d34ecd4
commit ce2d34ecd4
parent 551b3b70f1
4 changed files with 177 additions and 26 deletions
--- a/src/io/page_dmatrix-inl.hpp
+++ b/src/io/page_dmatrix-inl.hpp
@ -18,7 +18,7 @@ struct RowBatchPage {
    utils::Assert(data_ != NULL, "fail to allocate row batch page");
    this->Clear();
  }
-  ~RowBatchPage(void) {
+  ~BinaryPage(void) {
    if (data_ != NULL) delete [] data_;
  }
  /*! 
--- a/src/tree/param.h
+++ b/src/tree/param.h
@ -38,6 +38,8 @@ struct TrainParam{
  float opt_dense_col;
  // leaf vector size
  int size_leaf_vector;  
  // option for parallelization
  int parallel_option;
  // number of threads to be used for tree construction,
  // if OpenMP is enabled, if equals 0, use system default
  int nthread;
@ -55,6 +57,7 @@ struct TrainParam{
    opt_dense_col = 1.0f;
    nthread = 0;
    size_leaf_vector = 0;
    parallel_option = 0;
  }
  /*! 
   * \brief set parameters from outside 
@ -79,6 +82,7 @@ struct TrainParam{
    if (!strcmp(name, "size_leaf_vector")) size_leaf_vector = atoi(val);
    if (!strcmp(name, "max_depth")) max_depth = atoi(val);
    if (!strcmp(name, "nthread")) nthread = atoi(val);
    if (!strcmp(name, "parallel_option")) parallel_option = atoi(val);
    if (!strcmp(name, "default_direction")) {
      if (!strcmp(val, "learn")) default_direction = 0;
      if (!strcmp(val, "left")) default_direction = 1;
--- a/src/tree/updater_colmaker-inl.hpp
+++ b/src/tree/updater_colmaker-inl.hpp
@ -45,15 +45,19 @@ class ColMaker: public IUpdater {
  // data structure
  /*! \brief per thread x per node entry to store tmp data */
  struct ThreadEntry {
-    /*! \brief statistics of data*/
+    /*! \brief statistics of data */
    TStats stats;
    /*! \brief extra statistics of data */
    TStats stats_extra;
    /*! \brief last feature value scanned */
    float  last_fvalue;
    /*! \brief first feature value scanned */
    float  first_fvalue;
    /*! \brief current best solution */
    SplitEntry best;
    // constructor
    explicit ThreadEntry(const TrainParam &param)
-        : stats(param) {
+        : stats(param), stats_extra(param) {
    }
  };
  struct NodeEntry {
@ -220,6 +224,136 @@ class ColMaker: public IUpdater {
      // use new nodes for qexpand
      qexpand = newnodes;
    }    
    // parallel find the best split of current fid
    // this function does not support nested functions
    inline void ParallelFindSplit(const ColBatch::Inst &col,
                                  bst_uint fid,
                                  const IFMatrix &fmat,
                                  const std::vector<bst_gpair> &gpair,
                                  const BoosterInfo &info) {
      bool need_forward = param.need_forward_search(fmat.GetColDensity(fid));
      bool need_backward = param.need_backward_search(fmat.GetColDensity(fid));
      int nthread;
      #pragma omp parallel
      {
        const int tid = omp_get_thread_num();
        std::vector<ThreadEntry> &temp = stemp[tid];
        // cleanup temp statistics
        for (size_t j = 0; j < qexpand.size(); ++j) {
          temp[qexpand[j]].stats.Clear();
        }
        nthread = omp_get_num_threads();
        bst_uint step = (col.length + nthread - 1) / nthread;
        bst_uint end = std::min(col.length, step * (tid + 1));
        for (bst_uint i = tid * step; i < end; ++i) {
          const bst_uint ridx = col[i].index;
          const int nid = position[ridx];
          if (nid < 0) continue;
          const float fvalue = col[i].fvalue;
          if (temp[nid].stats.Empty()) {
            temp[nid].first_fvalue = fvalue;
          }
          temp[nid].stats.Add(gpair, info, ridx);
          temp[nid].last_fvalue = fvalue;
        }
      }
      // start collecting the partial sum statistics
      bst_omp_uint nnode = static_cast<bst_omp_uint>(qexpand.size());
      #pragma omp parallel for schedule(static)
      for (bst_omp_uint j = 0; j < nnode; ++j) {
        const int nid = qexpand[j];
        TStats sum(param), tmp(param), c(param);
        for (int tid = 0; tid < nthread; ++tid) {
          tmp = stemp[tid][nid].stats;
          stemp[tid][nid].stats = sum;
          sum.Add(tmp);
          if (tid != 0) {
            std::swap(stemp[tid - 1][nid].last_fvalue, stemp[tid][nid].first_fvalue);
          }
        }
        for (int tid = 0; tid < nthread; ++tid) {
          stemp[tid][nid].stats_extra = sum;
          ThreadEntry &e = stemp[tid][nid];
          float fsplit;
          if (tid != 0) {
            if(fabsf(stemp[tid - 1][nid].last_fvalue - e.first_fvalue) > rt_2eps) {
              fsplit = (stemp[tid - 1][nid].last_fvalue - e.first_fvalue) * 0.5f;
            } else {
              continue;
            }
          } else {
            fsplit = e.first_fvalue - rt_eps;
          }                        
          if (need_forward && tid != 0) {
            c.SetSubstract(snode[nid].stats, e.stats);
            if (c.sum_hess >= param.min_child_weight && e.stats.sum_hess >= param.min_child_weight) {
              bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
              e.best.Update(loss_chg, fid, fsplit, false);
            }
          }
          if (need_backward) {
            tmp.SetSubstract(sum, e.stats);
            c.SetSubstract(snode[nid].stats, tmp);
            if (c.sum_hess >= param.min_child_weight && tmp.sum_hess >= param.min_child_weight) {
              bst_float loss_chg = static_cast<bst_float>(tmp.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
              e.best.Update(loss_chg, fid, fsplit, true);
            }
          }
        }
        if (need_backward) {
          tmp = sum;
          ThreadEntry &e = stemp[nthread-1][nid];
          c.SetSubstract(snode[nid].stats, tmp);
          if (c.sum_hess >= param.min_child_weight && tmp.sum_hess >= param.min_child_weight) {
            bst_float loss_chg = static_cast<bst_float>(tmp.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
            e.best.Update(loss_chg, fid, e.last_fvalue + rt_eps, true);
          }
        }
      }
      // rescan, generate candidate split
      #pragma omp parallel
      {
        TStats c(param), cright(param);
        const int tid = omp_get_thread_num();
        std::vector<ThreadEntry> &temp = stemp[tid];
        nthread = static_cast<bst_uint>(omp_get_num_threads());
        bst_uint step = (col.length + nthread - 1) / nthread;
        bst_uint end = std::min(col.length, step * (tid + 1));
        for (bst_uint i = tid * step; i < end; ++i) {
          const bst_uint ridx = col[i].index;
          const int nid = position[ridx];
          if (nid < 0) continue;
          const float fvalue = col[i].fvalue;
          // get the statistics of nid
          ThreadEntry &e = temp[nid];
          if (e.stats.Empty()) {
            e.stats.Add(gpair, info, ridx);
            e.first_fvalue = fvalue;
          } else {
            // forward default right
            if (fabsf(fvalue - e.first_fvalue) > rt_2eps){
              if (need_forward) { 
                c.SetSubstract(snode[nid].stats, e.stats);
                if (c.sum_hess >= param.min_child_weight && e.stats.sum_hess >= param.min_child_weight) {
                  bst_float loss_chg = static_cast<bst_float>(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
                  e.best.Update(loss_chg, fid, (fvalue + e.first_fvalue) * 0.5f, false);
                }
              }
              if (need_backward) {
                cright.SetSubstract(e.stats_extra, e.stats);
                c.SetSubstract(snode[nid].stats, cright);
                if (c.sum_hess >= param.min_child_weight && cright.sum_hess >= param.min_child_weight) {
                  bst_float loss_chg = static_cast<bst_float>(cright.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain);
                  e.best.Update(loss_chg, fid, (fvalue + e.first_fvalue) * 0.5f, true);
                }
              }
            }          
            e.stats.Add(gpair, info, ridx);
            e.first_fvalue = fvalue;            
          }
        }
      }
    }    
    // enumerate the split values of specific feature
    inline void EnumerateSplit(const ColBatch::Entry *begin,
                               const ColBatch::Entry *end,
@ -272,6 +406,38 @@ class ColMaker: public IUpdater {
        }
      }
    }
    // update the solution candidate 
    virtual void UpdateSolution(const ColBatch &batch,                                
                                const std::vector<bst_gpair> &gpair,
                                const IFMatrix &fmat,
                                const BoosterInfo &info) {
      // start enumeration
      const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
      #if defined(_OPENMP)                                                                
      const int batch_size = std::max(static_cast<int>(nsize / this->nthread / 32), 1);
      #endif
      if (param.parallel_option == 0) {
        #pragma omp parallel for schedule(dynamic, batch_size)
        for (bst_omp_uint i = 0; i < nsize; ++i) {
          const bst_uint fid = batch.col_index[i];
          const int tid = omp_get_thread_num();
          const ColBatch::Inst c = batch[i];
          if (param.need_forward_search(fmat.GetColDensity(fid))) {
            this->EnumerateSplit(c.data, c.data + c.length, +1, 
                                 fid, gpair, info, stemp[tid]);
          }
          if (param.need_backward_search(fmat.GetColDensity(fid))) {
            this->EnumerateSplit(c.data + c.length - 1, c.data - 1, -1, 
                                 fid, gpair, info, stemp[tid]);
          }
        }
      } else {
        for (bst_omp_uint i = 0; i < nsize; ++i) {
          this->ParallelFindSplit(batch[i], batch.col_index[i],
                                  fmat, gpair, info);
        }
      }      
    }
    // find splits at current level, do split per level
    inline void FindSplit(int depth,
                          const std::vector<int> &qexpand,
@ -288,26 +454,7 @@ class ColMaker: public IUpdater {
      }
      utils::IIterator<ColBatch> *iter = p_fmat->ColIterator(feat_set);
      while (iter->Next()) {
-        const ColBatch &batch = iter->Value();
+        this->UpdateSolution(iter->Value(), gpair, *p_fmat, info);
        // start enumeration
        const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
        #if defined(_OPENMP)                                                                
        const int batch_size = std::max(static_cast<int>(nsize / this->nthread / 32), 1);
        #endif
        #pragma omp parallel for schedule(dynamic, batch_size)
        for (bst_omp_uint i = 0; i < nsize; ++i) {
          const bst_uint fid = batch.col_index[i];
          const int tid = omp_get_thread_num();
          const ColBatch::Inst c = batch[i];
          if (param.need_forward_search(p_fmat->GetColDensity(fid))) {            
            this->EnumerateSplit(c.data, c.data + c.length, +1, 
                                 fid, gpair, info, stemp[tid]);
          }
          if (param.need_backward_search(p_fmat->GetColDensity(fid))) {
            this->EnumerateSplit(c.data + c.length - 1, c.data - 1, -1, 
                                 fid, gpair, info, stemp[tid]);
          }
        }
      }
      // after this each thread's stemp will get the best candidates, aggregate results
      for (size_t i = 0; i < qexpand.size(); ++i) {
@ -325,6 +472,7 @@ class ColMaker: public IUpdater {
        }
      }
    }
    // reset position of each data points after split is created in the tree
    inline void ResetPosition(const std::vector<int> &qexpand, IFMatrix *p_fmat, const RegTree &tree) {
      const std::vector<bst_uint> &rowset = p_fmat->buffered_rowset();
--- a/src/utils/io.h
+++ b/src/utils/io.h
@ -100,7 +100,6 @@ class ISeekStream: public IStream {
 /*! \brief implementation of file i/o stream */
 class FileStream : public ISeekStream {
 public:
  explicit FileStream(void) {}
  explicit FileStream(FILE *fp) {
    this->fp = fp;
  }