Changing omp_get_num_threads to omp_get_max_threads (#1831)
* Updating dmlc-core * Changing omp_get_num_threads to omp_get_max_threads
This commit is contained in:
parent
47ba2de7d4
commit
7078c41dad
@ -1 +1 @@
|
||||
Subproject commit f35f14f30835af238257b979cc1fac3e41ff3291
|
||||
Subproject commit 78b78be34ac27d30f2193f3d51848c62887669c4
|
||||
@ -282,11 +282,7 @@ XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t* col_ptr,
|
||||
std::unique_ptr<data::SimpleCSRSource> source(new data::SimpleCSRSource());
|
||||
|
||||
API_BEGIN();
|
||||
int nthread;
|
||||
#pragma omp parallel
|
||||
{
|
||||
nthread = omp_get_num_threads();
|
||||
}
|
||||
const int nthread = omp_get_max_threads();
|
||||
data::SimpleCSRSource& mat = *source;
|
||||
common::ParallelGroupBuilder<RowBatch::Entry> builder(&mat.row_ptr_, &mat.row_data_);
|
||||
builder.InitBudget(0, nthread);
|
||||
|
||||
@ -83,13 +83,8 @@ void SimpleDMatrix::MakeOneBatch(const std::vector<bool>& enabled,
|
||||
// clear rowset
|
||||
buffered_rowset_.clear();
|
||||
// bit map
|
||||
int nthread;
|
||||
const int nthread = omp_get_max_threads();
|
||||
std::vector<bool> bmap;
|
||||
#pragma omp parallel
|
||||
{
|
||||
nthread = omp_get_num_threads();
|
||||
}
|
||||
|
||||
pcol->Clear();
|
||||
common::ParallelGroupBuilder<SparseBatch::Entry>
|
||||
builder(&pcol->offset, &pcol->data);
|
||||
@ -204,15 +199,7 @@ void SimpleDMatrix::MakeColPage(const RowBatch& batch,
|
||||
size_t buffer_begin,
|
||||
const std::vector<bool>& enabled,
|
||||
SparsePage* pcol) {
|
||||
int nthread;
|
||||
#pragma omp parallel
|
||||
{
|
||||
nthread = omp_get_num_threads();
|
||||
int max_nthread = std::max(omp_get_num_procs() / 2 - 2, 1);
|
||||
if (nthread > max_nthread) {
|
||||
nthread = max_nthread;
|
||||
}
|
||||
}
|
||||
const int nthread = std::min(omp_get_max_threads(), std::max(omp_get_num_procs() / 2 - 2, 1));
|
||||
pcol->Clear();
|
||||
common::ParallelGroupBuilder<SparseBatch::Entry>
|
||||
builder(&pcol->offset, &pcol->data);
|
||||
|
||||
@ -169,12 +169,7 @@ void SparsePageDMatrix::InitColAccess(const std::vector<bool>& enabled,
|
||||
SparsePage *pcol) {
|
||||
pcol->Clear();
|
||||
pcol->min_index = buffered_rowset_[begin];
|
||||
int nthread;
|
||||
#pragma omp parallel
|
||||
{
|
||||
nthread = omp_get_num_threads();
|
||||
nthread = std::max(nthread, std::max(omp_get_num_procs() / 2 - 1, 1));
|
||||
}
|
||||
const int nthread = std::max(omp_get_max_threads(), std::max(omp_get_num_procs() / 2 - 1, 1));
|
||||
common::ParallelGroupBuilder<SparseBatch::Entry>
|
||||
builder(&pcol->offset, &pcol->data);
|
||||
builder.InitBudget(info.num_col, nthread);
|
||||
|
||||
@ -301,11 +301,7 @@ class GBTree : public GradientBooster {
|
||||
void PredictLeaf(DMatrix* p_fmat,
|
||||
std::vector<bst_float>* out_preds,
|
||||
unsigned ntree_limit) override {
|
||||
int nthread;
|
||||
#pragma omp parallel
|
||||
{
|
||||
nthread = omp_get_num_threads();
|
||||
}
|
||||
const int nthread = omp_get_max_threads();
|
||||
InitThreadTemp(nthread);
|
||||
this->PredPath(p_fmat, out_preds, ntree_limit);
|
||||
}
|
||||
@ -365,11 +361,7 @@ class GBTree : public GradientBooster {
|
||||
unsigned tree_begin,
|
||||
unsigned tree_end) {
|
||||
const MetaInfo& info = p_fmat->info();
|
||||
int nthread;
|
||||
#pragma omp parallel
|
||||
{
|
||||
nthread = omp_get_num_threads();
|
||||
}
|
||||
const int nthread = omp_get_max_threads();
|
||||
CHECK_EQ(num_group, mparam.num_output_group);
|
||||
InitThreadTemp(nthread);
|
||||
std::vector<bst_float> &preds = *out_preds;
|
||||
|
||||
@ -118,15 +118,6 @@ class BaseMaker: public TreeUpdater {
|
||||
}
|
||||
return n.cdefault();
|
||||
}
|
||||
/*! \brief get number of omp thread in current context */
|
||||
inline static int get_nthread() {
|
||||
int nthread;
|
||||
#pragma omp parallel
|
||||
{
|
||||
nthread = omp_get_num_threads();
|
||||
}
|
||||
return nthread;
|
||||
}
|
||||
// ------class member helpers---------
|
||||
/*! \brief initialize temp data structure */
|
||||
inline void InitData(const std::vector<bst_gpair> &gpair,
|
||||
@ -350,7 +341,7 @@ class BaseMaker: public TreeUpdater {
|
||||
std::vector<TStats> *p_node_stats) {
|
||||
std::vector< std::vector<TStats> > &thread_temp = *p_thread_temp;
|
||||
const MetaInfo &info = fmat.info();
|
||||
thread_temp.resize(this->get_nthread());
|
||||
thread_temp.resize(omp_get_max_threads());
|
||||
p_node_stats->resize(tree.param.num_nodes);
|
||||
#pragma omp parallel
|
||||
{
|
||||
|
||||
@ -81,7 +81,7 @@ class ColMaker: public TreeUpdater {
|
||||
struct Builder {
|
||||
public:
|
||||
// constructor
|
||||
explicit Builder(const TrainParam& param) : param(param) {}
|
||||
explicit Builder(const TrainParam& param) : param(param), nthread(omp_get_max_threads()) {}
|
||||
// update one tree, growing
|
||||
virtual void Update(const std::vector<bst_gpair>& gpair,
|
||||
DMatrix* p_fmat,
|
||||
@ -166,10 +166,6 @@ class ColMaker: public TreeUpdater {
|
||||
}
|
||||
{
|
||||
// setup temp space for each thread
|
||||
#pragma omp parallel
|
||||
{
|
||||
this->nthread = omp_get_num_threads();
|
||||
}
|
||||
// reserve a small space
|
||||
stemp.clear();
|
||||
stemp.resize(this->nthread, std::vector<ThreadEntry>());
|
||||
@ -277,8 +273,7 @@ class ColMaker: public TreeUpdater {
|
||||
for (size_t j = 0; j < qexpand.size(); ++j) {
|
||||
temp[qexpand[j]].stats.Clear();
|
||||
}
|
||||
nthread = omp_get_num_threads();
|
||||
bst_uint step = (col.length + nthread - 1) / nthread;
|
||||
bst_uint step = (col.length + this->nthread - 1) / this->nthread;
|
||||
bst_uint end = std::min(col.length, step * (tid + 1));
|
||||
for (bst_uint i = tid * step; i < end; ++i) {
|
||||
const bst_uint ridx = col[i].index;
|
||||
@ -298,7 +293,7 @@ class ColMaker: public TreeUpdater {
|
||||
for (bst_omp_uint j = 0; j < nnode; ++j) {
|
||||
const int nid = qexpand[j];
|
||||
TStats sum(param), tmp(param), c(param);
|
||||
for (int tid = 0; tid < nthread; ++tid) {
|
||||
for (int tid = 0; tid < this->nthread; ++tid) {
|
||||
tmp = stemp[tid][nid].stats;
|
||||
stemp[tid][nid].stats = sum;
|
||||
sum.Add(tmp);
|
||||
@ -306,7 +301,7 @@ class ColMaker: public TreeUpdater {
|
||||
std::swap(stemp[tid - 1][nid].last_fvalue, stemp[tid][nid].first_fvalue);
|
||||
}
|
||||
}
|
||||
for (int tid = 0; tid < nthread; ++tid) {
|
||||
for (int tid = 0; tid < this->nthread; ++tid) {
|
||||
stemp[tid][nid].stats_extra = sum;
|
||||
ThreadEntry &e = stemp[tid][nid];
|
||||
bst_float fsplit;
|
||||
@ -341,7 +336,7 @@ class ColMaker: public TreeUpdater {
|
||||
}
|
||||
if (need_backward) {
|
||||
tmp = sum;
|
||||
ThreadEntry &e = stemp[nthread-1][nid];
|
||||
ThreadEntry &e = stemp[this->nthread-1][nid];
|
||||
c.SetSubstract(snode[nid].stats, tmp);
|
||||
if (c.sum_hess >= param.min_child_weight &&
|
||||
tmp.sum_hess >= param.min_child_weight) {
|
||||
@ -357,8 +352,7 @@ class ColMaker: public TreeUpdater {
|
||||
TStats c(param), cright(param);
|
||||
const int tid = omp_get_thread_num();
|
||||
std::vector<ThreadEntry> &temp = stemp[tid];
|
||||
nthread = static_cast<bst_uint>(omp_get_num_threads());
|
||||
bst_uint step = (col.length + nthread - 1) / nthread;
|
||||
bst_uint step = (col.length + this->nthread - 1) / this->nthread;
|
||||
bst_uint end = std::min(col.length, step * (tid + 1));
|
||||
for (bst_uint i = tid * step; i < end; ++i) {
|
||||
const bst_uint ridx = col[i].index;
|
||||
@ -599,7 +593,7 @@ class ColMaker: public TreeUpdater {
|
||||
#endif
|
||||
int poption = param.parallel_option;
|
||||
if (poption == 2) {
|
||||
poption = static_cast<int>(nsize) * 2 < nthread ? 1 : 0;
|
||||
poption = static_cast<int>(nsize) * 2 < this->nthread ? 1 : 0;
|
||||
}
|
||||
if (poption == 0) {
|
||||
#pragma omp parallel for schedule(dynamic, batch_size)
|
||||
@ -760,7 +754,7 @@ class ColMaker: public TreeUpdater {
|
||||
// --data fields--
|
||||
const TrainParam& param;
|
||||
// number of omp thread used during training
|
||||
int nthread;
|
||||
const int nthread;
|
||||
// Per feature: shuffle index of each feature index
|
||||
std::vector<bst_uint> feat_index;
|
||||
// Instance Data: current node position in the tree of each instance
|
||||
|
||||
@ -336,7 +336,7 @@ class CQHistMaker: public HistMaker<TStats> {
|
||||
auto lazy_get_hist = [&]()
|
||||
#endif
|
||||
{
|
||||
thread_hist.resize(this->get_nthread());
|
||||
thread_hist.resize(omp_get_max_threads());
|
||||
// start accumulating statistics
|
||||
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(fset);
|
||||
iter->BeforeFirst();
|
||||
@ -410,7 +410,7 @@ class CQHistMaker: public HistMaker<TStats> {
|
||||
}
|
||||
{
|
||||
// get smmary
|
||||
thread_sketch.resize(this->get_nthread());
|
||||
thread_sketch.resize(omp_get_max_threads());
|
||||
|
||||
// TWOPASS: use the real set + split set in the column iteration.
|
||||
this->SetDefaultPostion(p_fmat, tree);
|
||||
@ -695,7 +695,7 @@ class GlobalProposalHistMaker: public CQHistMaker<TStats> {
|
||||
this->wspace.Init(this->param, 1);
|
||||
// to gain speedup in recovery
|
||||
{
|
||||
this->thread_hist.resize(this->get_nthread());
|
||||
this->thread_hist.resize(omp_get_max_threads());
|
||||
|
||||
// TWOPASS: use the real set + split set in the column iteration.
|
||||
this->SetDefaultPostion(p_fmat, tree);
|
||||
@ -756,7 +756,7 @@ class QuantileHistMaker: public HistMaker<TStats> {
|
||||
const RegTree &tree) override {
|
||||
const MetaInfo &info = p_fmat->info();
|
||||
// initialize the data structure
|
||||
int nthread = BaseMaker::get_nthread();
|
||||
const int nthread = omp_get_max_threads();
|
||||
sketchs.resize(this->qexpand.size() * tree.param.num_feature);
|
||||
for (size_t i = 0; i < sketchs.size(); ++i) {
|
||||
sketchs[i].Init(info.num_row, this->param.sketch_eps);
|
||||
|
||||
@ -34,11 +34,7 @@ class TreeRefresher: public TreeUpdater {
|
||||
std::vector<std::vector<TStats> > stemp;
|
||||
std::vector<RegTree::FVec> fvec_temp;
|
||||
// setup temp space for each thread
|
||||
int nthread;
|
||||
#pragma omp parallel
|
||||
{
|
||||
nthread = omp_get_num_threads();
|
||||
}
|
||||
const int nthread = omp_get_max_threads();
|
||||
fvec_temp.resize(nthread, RegTree::FVec());
|
||||
stemp.resize(nthread, std::vector<TStats>());
|
||||
#pragma omp parallel
|
||||
|
||||
@ -141,7 +141,7 @@ class SketchMaker: public BaseMaker {
|
||||
for (size_t i = 0; i < sketchs.size(); ++i) {
|
||||
sketchs[i].Init(info.num_row, this->param.sketch_eps);
|
||||
}
|
||||
thread_sketch.resize(this->get_nthread());
|
||||
thread_sketch.resize(omp_get_max_threads());
|
||||
// number of rows in
|
||||
const size_t nrows = p_fmat->buffered_rowset().size();
|
||||
// start accumulating statistics
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user