initial correction for vec tree

2014-08-24 18:48:19 -07:00 · 2014-08-24 18:48:19 -07:00 · c640485f1d
commit c640485f1d
parent 7874c2559b
2 changed files with 40 additions and 23 deletions
--- a/src/gbm/gbtree-inl.hpp
+++ b/src/gbm/gbtree-inl.hpp
@ -117,17 +117,13 @@ class GBTree : public IGradBooster<FMatrix> {
    }

    std::vector<float> &preds = *out_preds;
-    preds.resize(0);
+    const size_t stride = info.num_row * mparam.num_output_group;
+    preds.resize(stride * (mparam.size_leaf_vector+1));
    // start collecting the prediction
    utils::IIterator<SparseBatch> *iter = fmat.RowIterator();
    iter->BeforeFirst();
    while (iter->Next()) {
      const SparseBatch &batch = iter->Value();
-      utils::Assert(batch.base_rowid * mparam.num_output_group == preds.size(),
-                    "base_rowid is not set correctly");
-      // output convention: nrow * k, where nrow is number of rows
-      // k is number of group
-      preds.resize(preds.size() + batch.size * mparam.num_output_group);
      // parallel over local batch
      const unsigned nsize = static_cast<unsigned>(batch.size);
      #pragma omp parallel for schedule(static)
@ -135,13 +131,13 @@ class GBTree : public IGradBooster<FMatrix> {
        const int tid = omp_get_thread_num();
        tree::RegTree::FVec &feats = thread_temp[tid];
        const size_t ridx = batch.base_rowid + i;
-        const unsigned root_idx = info.GetRoot(ridx);
+        utils::Assert(ridx < info.num_row, "data row index exceed bound");
        // loop over output groups
        for (int gid = 0; gid < mparam.num_output_group; ++gid) {
-          preds[ridx * mparam.num_output_group + gid] =
-              this->Pred(batch[i],
-                         buffer_offset < 0 ? -1 : buffer_offset+ridx,
-                         gid, root_idx, &feats);
+          this->Pred(batch[i],
+                     buffer_offset < 0 ? -1 : buffer_offset + ridx,
+                     gid, info.GetRoot(ridx), &feats,
+                     &preds[ridx * mparam.num_output_group + gid], stride);
        }
      }
    }
@ -211,24 +207,34 @@ class GBTree : public IGradBooster<FMatrix> {
    mparam.num_trees += tparam.num_parallel_tree;
  }
  // make a prediction for a single instance
-  inline float Pred(const SparseBatch::Inst &inst,
-                    int64_t buffer_index,
-                    int bst_group,
-                    unsigned root_index,
-                    tree::RegTree::FVec *p_feats) {
+  inline void Pred(const SparseBatch::Inst &inst,
+                   int64_t buffer_index,
+                   int bst_group,
+                   unsigned root_index,
+                   tree::RegTree::FVec *p_feats,
+                   float *out_pred, size_t stride) {
    size_t itop = 0;
    float  psum = 0.0f;
+    // sum of leaf vector 
+    std::vector<float> vec_psum(mparam.size_leaf_vector, 0.0f);
    const int bid = mparam.BufferOffset(buffer_index, bst_group);
    // load buffered results if any
    if (bid >= 0) {
      itop = pred_counter[bid];
      psum = pred_buffer[bid];
+      for (int i = 0; i < mparam.size_leaf_vector; ++i) {
+        vec_psum[i] = pred_buffer[bid + i + 1];
+      }
    }
    if (itop != trees.size()) {
      p_feats->Fill(inst);
      for (size_t i = itop; i < trees.size(); ++i) {
        if (tree_info[i] == bst_group) {
-          psum += trees[i]->Predict(*p_feats, root_index);
+          int tid = trees[i]->GetLeafIndex(*p_feats, root_index);
+          psum += (*trees[i])[tid].leaf_value();
+          for (int j = 0; j < mparam.size_leaf_vector; ++j) {
+            vec_psum[j] += trees[i]->leafvec(tid)[j];
+          }
        }
      }
      p_feats->Drop(inst);
@ -237,8 +243,14 @@ class GBTree : public IGradBooster<FMatrix> {
    if (bid >= 0) {
      pred_counter[bid] = static_cast<unsigned>(trees.size());
      pred_buffer[bid] = psum;
+      for (int i = 0; i < mparam.size_leaf_vector; ++i) {
+        pred_buffer[bid + i + 1] = vec_psum[i];
+      }
+    }
+    out_pred[0] = psum;
+    for (int i = 0; i < mparam.size_leaf_vector; ++i) {
+      out_pred[stride * (i + 1)] = vec_psum[i];
    }
-    return psum;
  }
  // --- data structure ---
  /*! \brief training parameters */
@ -291,14 +303,17 @@ class GBTree : public IGradBooster<FMatrix> {
     *    suppose we have n instance and k group, output will be k*n 
     */
    int num_output_group;
+    /*! \brief size of leaf vector needed in tree */
+    int size_leaf_vector;
    /*! \brief reserved parameters */
-    int reserved[32];
+    int reserved[31];
    /*! \brief constructor */
    ModelParam(void) {
      num_trees = 0;
      num_roots = num_feature = 0;
      num_pbuffer = 0;
      num_output_group = 1;
+      size_leaf_vector = 0;
      memset(reserved, 0, sizeof(reserved));
    }
    /*!
@ -311,10 +326,11 @@ class GBTree : public IGradBooster<FMatrix> {
      if (!strcmp("num_output_group", name)) num_output_group = atol(val);
      if (!strcmp("bst:num_roots", name)) num_roots = atoi(val);
      if (!strcmp("bst:num_feature", name)) num_feature = atoi(val);
+      if (!strcmp("bst:size_leaf_vector", name)) size_leaf_vector = atoi(val);
    }
    /*! \return size of prediction buffer actually needed */
    inline size_t PredBufferSize(void) const {
-      return num_output_group * num_pbuffer;
+      return num_output_group * num_pbuffer * (size_leaf_vector + 1);
    }
    /*! 
     * \brief get the buffer offset given a buffer index and group id  
@ -323,7 +339,7 @@ class GBTree : public IGradBooster<FMatrix> {
    inline size_t BufferOffset(int64_t buffer_index, int bst_group) const {
      if (buffer_index < 0) return -1;
      utils::Check(buffer_index < num_pbuffer, "buffer_index exceed num_pbuffer");
-      return buffer_index + num_pbuffer * bst_group;
+      return (buffer_index + num_pbuffer * bst_group) * (size_leaf_vector + 1);
    }
  };
  // training parameter
--- a/src/learner/evaluation-inl.hpp
+++ b/src/learner/evaluation-inl.hpp
@ -24,9 +24,10 @@ template<typename Derived>
 struct EvalEWiseBase : public IEvaluator {
  virtual float Eval(const std::vector<float> &preds,
                     const MetaInfo &info) const {
-    utils::Check(preds.size() == info.labels.size(),
+    utils::Check(info.labels.size() != 0, "label set cannot be empty");
+    utils::Check(preds.size() % info.labels.size() == 0,
                 "label and prediction size not match");
-    const unsigned ndata = static_cast<unsigned>(preds.size());
+    const unsigned ndata = static_cast<unsigned>(info.labels.size());
    float sum = 0.0, wsum = 0.0;
    #pragma omp parallel for reduction(+: sum, wsum) schedule(static)
    for (unsigned i = 0; i < ndata; ++i) {