diff --git a/src/gbm/gbtree.cc b/src/gbm/gbtree.cc
index 4a6f0b8c6..728ca46df 100644
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@@ -26,8 +26,6 @@ DMLC_REGISTRY_FILE_TAG(gbtree);
 
 /*! \brief training parameters */
 struct GBTreeTrainParam : public dmlc::Parameter<GBTreeTrainParam> {
-  /*! \brief number of threads */
-  int nthread;
   /*!
    * \brief number of parallel trees constructed each iteration
    *  use this option to support boosted random forest
@@ -37,8 +35,6 @@ struct GBTreeTrainParam : public dmlc::Parameter<GBTreeTrainParam> {
   std::string updater_seq;
   // declare parameters
   DMLC_DECLARE_PARAMETER(GBTreeTrainParam) {
-    DMLC_DECLARE_FIELD(nthread).set_lower_bound(0).set_default(0)
-        .describe("Number of threads used for training.");
     DMLC_DECLARE_FIELD(num_parallel_tree).set_lower_bound(1).set_default(1)
         .describe("Number of parallel trees constructed during each iteration."\
                   " This option is used to support boosted random forest");
@@ -145,9 +141,6 @@ class GBTree : public GradientBooster {
     for (const auto& up : updaters) {
       up->Init(cfg);
     }
-    if (tparam.nthread != 0) {
-      omp_set_num_threads(tparam.nthread);
-    }
   }
 
   void Load(dmlc::Stream* fi) override {
@@ -247,12 +240,16 @@ class GBTree : public GradientBooster {
       const RowBatch &batch = iter->Value();
       // parallel over local batch
       const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
+      int ridx_error = 0;
       #pragma omp parallel for schedule(static)
       for (bst_omp_uint i = 0; i < nsize; ++i) {
         const int tid = omp_get_thread_num();
         RegTree::FVec &feats = thread_temp[tid];
         int64_t ridx = static_cast<int64_t>(batch.base_rowid + i);
-        CHECK_LT(static_cast<size_t>(ridx), info.num_row);
+        if (static_cast<size_t>(ridx) >= info.num_row) {
+          ridx_error = 1;
+          continue;
+        }
         // loop over output groups
         for (int gid = 0; gid < mparam.num_output_group; ++gid) {
           this->Pred(batch[i],
@@ -262,6 +259,7 @@ class GBTree : public GradientBooster {
                      ntree_limit);
         }
       }
+      CHECK(!ridx_error) << "ridx out of bounds";
     }
   }
 
@@ -368,19 +366,28 @@ class GBTree : public GradientBooster {
                                      const int* leaf_position) {
     const RowSet& rowset = p_fmat->buffered_rowset();
     const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
+    int pred_counter_error = 0, tid_error = 0;
     #pragma omp parallel for schedule(static)
     for (bst_omp_uint i = 0; i < ndata; ++i) {
       const bst_uint ridx = rowset[i];
       const int64_t bid = this->BufferOffset(buffer_offset + ridx, bst_group);
       const int tid = leaf_position[ridx];
-      CHECK_EQ(pred_counter[bid], trees.size());
-      CHECK_GE(tid, 0);
+      if (pred_counter[bid] != trees.size()) {
+        pred_counter_error = 1;
+        continue;
+      }
+      if (tid < 0) {
+        tid_error = 1;
+        continue;
+      }
       pred_buffer[bid] += new_tree[tid].leaf_value();
       for (int i = 0; i < mparam.size_leaf_vector; ++i) {
         pred_buffer[bid + i + 1] += new_tree.leafvec(tid)[i];
       }
       pred_counter[bid] += tparam.num_parallel_tree;
     }
+    CHECK(!pred_counter_error) << "incorrect pred_counter[bid]";
+    CHECK(!tid_error) << "tid cannot be negative";
   }
   // make a prediction for a single instance
   inline void Pred(const RowBatch::Inst &inst,
diff --git a/src/learner.cc b/src/learner.cc
index a7391b018..da1e87b96 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -78,6 +78,9 @@ struct LearnerTrainParam
   float prob_buffer_row;
   // maximum row per batch.
   size_t max_row_perbatch;
+  // number of threads to use if OpenMP is enabled
+  // if equals 0, use system default
+  int nthread;
   // declare parameters
   DMLC_DECLARE_PARAMETER(LearnerTrainParam) {
     DMLC_DECLARE_FIELD(seed).set_default(0)
@@ -101,6 +104,8 @@ struct LearnerTrainParam
         .describe("Maximum buffered row portion");
     DMLC_DECLARE_FIELD(max_row_perbatch).set_default(std::numeric_limits<size_t>::max())
         .describe("maximum row per batch.");
+    DMLC_DECLARE_FIELD(nthread).set_default(0)
+        .describe("Number of threads to use.");
   }
 };
 
@@ -149,7 +154,11 @@ class LearnerImpl : public Learner {
         cfg_[kv.first] = kv.second;
       }
     }
-    // add additional parameter
+    if (tparam.nthread != 0) {
+      omp_set_num_threads(tparam.nthread);
+    }
+
+    // add additional parameters
     // These are cosntraints that need to be satisfied.
     if (tparam.dsplit == 0 && rabit::IsDistributed()) {
       tparam.dsplit = 2;
diff --git a/src/metric/rank_metric.cc b/src/metric/rank_metric.cc
index feb0f37ff..a412f24a1 100644
--- a/src/metric/rank_metric.cc
+++ b/src/metric/rank_metric.cc
@@ -97,6 +97,7 @@ struct EvalAuc : public Metric {
     const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
     // sum statistics
     double sum_auc = 0.0f;
+    int auc_error = 0;
     #pragma omp parallel reduction(+:sum_auc)
     {
       // each thread takes a local rec
@@ -128,12 +129,16 @@ struct EvalAuc : public Metric {
         sum_npos += buf_pos;
         sum_nneg += buf_neg;
         // check weird conditions
-        CHECK(sum_npos > 0.0 && sum_nneg > 0.0)
-            << "AUC: the dataset only contains pos or neg samples";
+        if (sum_npos <= 0.0 || sum_nneg <= 0.0) {
+          auc_error = 1;
+          continue;
+        }
         // this is the AUC
         sum_auc += sum_pospair / (sum_npos*sum_nneg);
       }
     }
+    CHECK(!auc_error)
+      << "AUC: the dataset only contains pos or neg samples";
     if (distributed) {
       float dat[2];
       dat[0] = static_cast<float>(sum_auc);
diff --git a/src/tree/param.h b/src/tree/param.h
index 23d0c5b81..61ddffe33 100644
--- a/src/tree/param.h
+++ b/src/tree/param.h
@@ -53,9 +53,6 @@ struct TrainParam : public dmlc::Parameter<TrainParam> {
   int parallel_option;
   // option to open cacheline optimization
   bool cache_opt;
-  // number of threads to be used for tree construction,
-  // if OpenMP is enabled, if equals 0, use system default
-  int nthread;
   // whether to not print info during training.
   bool silent;
   // declare the parameters
@@ -98,10 +95,8 @@ struct TrainParam : public dmlc::Parameter<TrainParam> {
         .describe("Different types of parallelization algorithm.");
     DMLC_DECLARE_FIELD(cache_opt).set_default(true)
         .describe("EXP Param: Cache aware optimization.");
-    DMLC_DECLARE_FIELD(nthread).set_default(0)
-        .describe("Number of threads used for training.");
     DMLC_DECLARE_FIELD(silent).set_default(false)
-        .describe("Not print information during trainig.");
+        .describe("Do not print information during trainig.");
     // add alias of parameters
     DMLC_DECLARE_ALIAS(reg_lambda, lambda);
     DMLC_DECLARE_ALIAS(reg_alpha, alpha);