no exception throwing within omp parallel; set nthread in Learner (#1421)

2016-07-29 12:08:03 -05:00 · 2016-07-29 12:08:03 -05:00 · 75f401481f
commit 75f401481f
parent 89c4f67f59
4 changed files with 35 additions and 19 deletions
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@ -26,8 +26,6 @@ DMLC_REGISTRY_FILE_TAG(gbtree);

 /*! \brief training parameters */
 struct GBTreeTrainParam : public dmlc::Parameter<GBTreeTrainParam> {
-  /*! \brief number of threads */
-  int nthread;
  /*!
   * \brief number of parallel trees constructed each iteration
   *  use this option to support boosted random forest
@ -37,8 +35,6 @@ struct GBTreeTrainParam : public dmlc::Parameter<GBTreeTrainParam> {
  std::string updater_seq;
  // declare parameters
  DMLC_DECLARE_PARAMETER(GBTreeTrainParam) {
-    DMLC_DECLARE_FIELD(nthread).set_lower_bound(0).set_default(0)
-        .describe("Number of threads used for training.");
    DMLC_DECLARE_FIELD(num_parallel_tree).set_lower_bound(1).set_default(1)
        .describe("Number of parallel trees constructed during each iteration."\
                  " This option is used to support boosted random forest");
@ -145,9 +141,6 @@ class GBTree : public GradientBooster {
    for (const auto& up : updaters) {
      up->Init(cfg);
    }
-    if (tparam.nthread != 0) {
-      omp_set_num_threads(tparam.nthread);
-    }
  }

  void Load(dmlc::Stream* fi) override {
@ -247,12 +240,16 @@ class GBTree : public GradientBooster {
      const RowBatch &batch = iter->Value();
      // parallel over local batch
      const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
+      int ridx_error = 0;
      #pragma omp parallel for schedule(static)
      for (bst_omp_uint i = 0; i < nsize; ++i) {
        const int tid = omp_get_thread_num();
        RegTree::FVec &feats = thread_temp[tid];
        int64_t ridx = static_cast<int64_t>(batch.base_rowid + i);
-        CHECK_LT(static_cast<size_t>(ridx), info.num_row);
+        if (static_cast<size_t>(ridx) >= info.num_row) {
+          ridx_error = 1;
+          continue;
+        }
        // loop over output groups
        for (int gid = 0; gid < mparam.num_output_group; ++gid) {
          this->Pred(batch[i],
@ -262,6 +259,7 @@ class GBTree : public GradientBooster {
                     ntree_limit);
        }
      }
+      CHECK(!ridx_error) << "ridx out of bounds";
    }
  }

@ -368,19 +366,28 @@ class GBTree : public GradientBooster {
                                     const int* leaf_position) {
    const RowSet& rowset = p_fmat->buffered_rowset();
    const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
+    int pred_counter_error = 0, tid_error = 0;
    #pragma omp parallel for schedule(static)
    for (bst_omp_uint i = 0; i < ndata; ++i) {
      const bst_uint ridx = rowset[i];
      const int64_t bid = this->BufferOffset(buffer_offset + ridx, bst_group);
      const int tid = leaf_position[ridx];
-      CHECK_EQ(pred_counter[bid], trees.size());
-      CHECK_GE(tid, 0);
+      if (pred_counter[bid] != trees.size()) {
+        pred_counter_error = 1;
+        continue;
+      }
+      if (tid < 0) {
+        tid_error = 1;
+        continue;
+      }
      pred_buffer[bid] += new_tree[tid].leaf_value();
      for (int i = 0; i < mparam.size_leaf_vector; ++i) {
        pred_buffer[bid + i + 1] += new_tree.leafvec(tid)[i];
      }
      pred_counter[bid] += tparam.num_parallel_tree;
    }
+    CHECK(!pred_counter_error) << "incorrect pred_counter[bid]";
+    CHECK(!tid_error) << "tid cannot be negative";
  }
  // make a prediction for a single instance
  inline void Pred(const RowBatch::Inst &inst,
--- a/src/learner.cc
+++ b/src/learner.cc
@ -78,6 +78,9 @@ struct LearnerTrainParam
  float prob_buffer_row;
  // maximum row per batch.
  size_t max_row_perbatch;
+  // number of threads to use if OpenMP is enabled
+  // if equals 0, use system default
+  int nthread;
  // declare parameters
  DMLC_DECLARE_PARAMETER(LearnerTrainParam) {
    DMLC_DECLARE_FIELD(seed).set_default(0)
@ -101,6 +104,8 @@ struct LearnerTrainParam
        .describe("Maximum buffered row portion");
    DMLC_DECLARE_FIELD(max_row_perbatch).set_default(std::numeric_limits<size_t>::max())
        .describe("maximum row per batch.");
+    DMLC_DECLARE_FIELD(nthread).set_default(0)
+        .describe("Number of threads to use.");
  }
 };

@ -149,7 +154,11 @@ class LearnerImpl : public Learner {
        cfg_[kv.first] = kv.second;
      }
    }
-    // add additional parameter
+    if (tparam.nthread != 0) {
+      omp_set_num_threads(tparam.nthread);
+    }
+
+    // add additional parameters
    // These are cosntraints that need to be satisfied.
    if (tparam.dsplit == 0 && rabit::IsDistributed()) {
      tparam.dsplit = 2;
--- a/src/metric/rank_metric.cc
+++ b/src/metric/rank_metric.cc
@ -97,6 +97,7 @@ struct EvalAuc : public Metric {
    const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
    // sum statistics
    double sum_auc = 0.0f;
+    int auc_error = 0;
    #pragma omp parallel reduction(+:sum_auc)
    {
      // each thread takes a local rec
@ -128,12 +129,16 @@ struct EvalAuc : public Metric {
        sum_npos += buf_pos;
        sum_nneg += buf_neg;
        // check weird conditions
-        CHECK(sum_npos > 0.0 && sum_nneg > 0.0)
-            << "AUC: the dataset only contains pos or neg samples";
+        if (sum_npos <= 0.0 || sum_nneg <= 0.0) {
+          auc_error = 1;
+          continue;
+        }
        // this is the AUC
        sum_auc += sum_pospair / (sum_npos*sum_nneg);
      }
    }
+    CHECK(!auc_error)
+      << "AUC: the dataset only contains pos or neg samples";
    if (distributed) {
      float dat[2];
      dat[0] = static_cast<float>(sum_auc);
--- a/src/tree/param.h
+++ b/src/tree/param.h
@ -53,9 +53,6 @@ struct TrainParam : public dmlc::Parameter<TrainParam> {
  int parallel_option;
  // option to open cacheline optimization
  bool cache_opt;
-  // number of threads to be used for tree construction,
-  // if OpenMP is enabled, if equals 0, use system default
-  int nthread;
  // whether to not print info during training.
  bool silent;
  // declare the parameters
@ -98,10 +95,8 @@ struct TrainParam : public dmlc::Parameter<TrainParam> {
        .describe("Different types of parallelization algorithm.");
    DMLC_DECLARE_FIELD(cache_opt).set_default(true)
        .describe("EXP Param: Cache aware optimization.");
-    DMLC_DECLARE_FIELD(nthread).set_default(0)
-        .describe("Number of threads used for training.");
    DMLC_DECLARE_FIELD(silent).set_default(false)
-        .describe("Not print information during trainig.");
+        .describe("Do not print information during trainig.");
    // add alias of parameters
    DMLC_DECLARE_ALIAS(reg_lambda, lambda);
    DMLC_DECLARE_ALIAS(reg_alpha, alpha);