diff --git a/src/gbm/gbtree.cc b/src/gbm/gbtree.cc index 4a6f0b8c6..728ca46df 100644 --- a/src/gbm/gbtree.cc +++ b/src/gbm/gbtree.cc @@ -26,8 +26,6 @@ DMLC_REGISTRY_FILE_TAG(gbtree); /*! \brief training parameters */ struct GBTreeTrainParam : public dmlc::Parameter { - /*! \brief number of threads */ - int nthread; /*! * \brief number of parallel trees constructed each iteration * use this option to support boosted random forest @@ -37,8 +35,6 @@ struct GBTreeTrainParam : public dmlc::Parameter { std::string updater_seq; // declare parameters DMLC_DECLARE_PARAMETER(GBTreeTrainParam) { - DMLC_DECLARE_FIELD(nthread).set_lower_bound(0).set_default(0) - .describe("Number of threads used for training."); DMLC_DECLARE_FIELD(num_parallel_tree).set_lower_bound(1).set_default(1) .describe("Number of parallel trees constructed during each iteration."\ " This option is used to support boosted random forest"); @@ -145,9 +141,6 @@ class GBTree : public GradientBooster { for (const auto& up : updaters) { up->Init(cfg); } - if (tparam.nthread != 0) { - omp_set_num_threads(tparam.nthread); - } } void Load(dmlc::Stream* fi) override { @@ -247,12 +240,16 @@ class GBTree : public GradientBooster { const RowBatch &batch = iter->Value(); // parallel over local batch const bst_omp_uint nsize = static_cast(batch.size); + int ridx_error = 0; #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < nsize; ++i) { const int tid = omp_get_thread_num(); RegTree::FVec &feats = thread_temp[tid]; int64_t ridx = static_cast(batch.base_rowid + i); - CHECK_LT(static_cast(ridx), info.num_row); + if (static_cast(ridx) >= info.num_row) { + ridx_error = 1; + continue; + } // loop over output groups for (int gid = 0; gid < mparam.num_output_group; ++gid) { this->Pred(batch[i], @@ -262,6 +259,7 @@ class GBTree : public GradientBooster { ntree_limit); } } + CHECK(!ridx_error) << "ridx out of bounds"; } } @@ -368,19 +366,28 @@ class GBTree : public GradientBooster { const int* leaf_position) { const RowSet& rowset = p_fmat->buffered_rowset(); const bst_omp_uint ndata = static_cast(rowset.size()); + int pred_counter_error = 0, tid_error = 0; #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < ndata; ++i) { const bst_uint ridx = rowset[i]; const int64_t bid = this->BufferOffset(buffer_offset + ridx, bst_group); const int tid = leaf_position[ridx]; - CHECK_EQ(pred_counter[bid], trees.size()); - CHECK_GE(tid, 0); + if (pred_counter[bid] != trees.size()) { + pred_counter_error = 1; + continue; + } + if (tid < 0) { + tid_error = 1; + continue; + } pred_buffer[bid] += new_tree[tid].leaf_value(); for (int i = 0; i < mparam.size_leaf_vector; ++i) { pred_buffer[bid + i + 1] += new_tree.leafvec(tid)[i]; } pred_counter[bid] += tparam.num_parallel_tree; } + CHECK(!pred_counter_error) << "incorrect pred_counter[bid]"; + CHECK(!tid_error) << "tid cannot be negative"; } // make a prediction for a single instance inline void Pred(const RowBatch::Inst &inst, diff --git a/src/learner.cc b/src/learner.cc index a7391b018..da1e87b96 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -78,6 +78,9 @@ struct LearnerTrainParam float prob_buffer_row; // maximum row per batch. size_t max_row_perbatch; + // number of threads to use if OpenMP is enabled + // if equals 0, use system default + int nthread; // declare parameters DMLC_DECLARE_PARAMETER(LearnerTrainParam) { DMLC_DECLARE_FIELD(seed).set_default(0) @@ -101,6 +104,8 @@ struct LearnerTrainParam .describe("Maximum buffered row portion"); DMLC_DECLARE_FIELD(max_row_perbatch).set_default(std::numeric_limits::max()) .describe("maximum row per batch."); + DMLC_DECLARE_FIELD(nthread).set_default(0) + .describe("Number of threads to use."); } }; @@ -149,7 +154,11 @@ class LearnerImpl : public Learner { cfg_[kv.first] = kv.second; } } - // add additional parameter + if (tparam.nthread != 0) { + omp_set_num_threads(tparam.nthread); + } + + // add additional parameters // These are cosntraints that need to be satisfied. if (tparam.dsplit == 0 && rabit::IsDistributed()) { tparam.dsplit = 2; diff --git a/src/metric/rank_metric.cc b/src/metric/rank_metric.cc index feb0f37ff..a412f24a1 100644 --- a/src/metric/rank_metric.cc +++ b/src/metric/rank_metric.cc @@ -97,6 +97,7 @@ struct EvalAuc : public Metric { const bst_omp_uint ngroup = static_cast(gptr.size() - 1); // sum statistics double sum_auc = 0.0f; + int auc_error = 0; #pragma omp parallel reduction(+:sum_auc) { // each thread takes a local rec @@ -128,12 +129,16 @@ struct EvalAuc : public Metric { sum_npos += buf_pos; sum_nneg += buf_neg; // check weird conditions - CHECK(sum_npos > 0.0 && sum_nneg > 0.0) - << "AUC: the dataset only contains pos or neg samples"; + if (sum_npos <= 0.0 || sum_nneg <= 0.0) { + auc_error = 1; + continue; + } // this is the AUC sum_auc += sum_pospair / (sum_npos*sum_nneg); } } + CHECK(!auc_error) + << "AUC: the dataset only contains pos or neg samples"; if (distributed) { float dat[2]; dat[0] = static_cast(sum_auc); diff --git a/src/tree/param.h b/src/tree/param.h index 23d0c5b81..61ddffe33 100644 --- a/src/tree/param.h +++ b/src/tree/param.h @@ -53,9 +53,6 @@ struct TrainParam : public dmlc::Parameter { int parallel_option; // option to open cacheline optimization bool cache_opt; - // number of threads to be used for tree construction, - // if OpenMP is enabled, if equals 0, use system default - int nthread; // whether to not print info during training. bool silent; // declare the parameters @@ -98,10 +95,8 @@ struct TrainParam : public dmlc::Parameter { .describe("Different types of parallelization algorithm."); DMLC_DECLARE_FIELD(cache_opt).set_default(true) .describe("EXP Param: Cache aware optimization."); - DMLC_DECLARE_FIELD(nthread).set_default(0) - .describe("Number of threads used for training."); DMLC_DECLARE_FIELD(silent).set_default(false) - .describe("Not print information during trainig."); + .describe("Do not print information during trainig."); // add alias of parameters DMLC_DECLARE_ALIAS(reg_lambda, lambda); DMLC_DECLARE_ALIAS(reg_alpha, alpha);