Changing omp_get_num_threads to omp_get_max_threads (#1831)
* Updating dmlc-core * Changing omp_get_num_threads to omp_get_max_threads
This commit is contained in:
parent
47ba2de7d4
commit
7078c41dad
@ -1 +1 @@
|
|||||||
Subproject commit f35f14f30835af238257b979cc1fac3e41ff3291
|
Subproject commit 78b78be34ac27d30f2193f3d51848c62887669c4
|
||||||
@ -282,11 +282,7 @@ XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t* col_ptr,
|
|||||||
std::unique_ptr<data::SimpleCSRSource> source(new data::SimpleCSRSource());
|
std::unique_ptr<data::SimpleCSRSource> source(new data::SimpleCSRSource());
|
||||||
|
|
||||||
API_BEGIN();
|
API_BEGIN();
|
||||||
int nthread;
|
const int nthread = omp_get_max_threads();
|
||||||
#pragma omp parallel
|
|
||||||
{
|
|
||||||
nthread = omp_get_num_threads();
|
|
||||||
}
|
|
||||||
data::SimpleCSRSource& mat = *source;
|
data::SimpleCSRSource& mat = *source;
|
||||||
common::ParallelGroupBuilder<RowBatch::Entry> builder(&mat.row_ptr_, &mat.row_data_);
|
common::ParallelGroupBuilder<RowBatch::Entry> builder(&mat.row_ptr_, &mat.row_data_);
|
||||||
builder.InitBudget(0, nthread);
|
builder.InitBudget(0, nthread);
|
||||||
|
|||||||
@ -83,13 +83,8 @@ void SimpleDMatrix::MakeOneBatch(const std::vector<bool>& enabled,
|
|||||||
// clear rowset
|
// clear rowset
|
||||||
buffered_rowset_.clear();
|
buffered_rowset_.clear();
|
||||||
// bit map
|
// bit map
|
||||||
int nthread;
|
const int nthread = omp_get_max_threads();
|
||||||
std::vector<bool> bmap;
|
std::vector<bool> bmap;
|
||||||
#pragma omp parallel
|
|
||||||
{
|
|
||||||
nthread = omp_get_num_threads();
|
|
||||||
}
|
|
||||||
|
|
||||||
pcol->Clear();
|
pcol->Clear();
|
||||||
common::ParallelGroupBuilder<SparseBatch::Entry>
|
common::ParallelGroupBuilder<SparseBatch::Entry>
|
||||||
builder(&pcol->offset, &pcol->data);
|
builder(&pcol->offset, &pcol->data);
|
||||||
@ -204,15 +199,7 @@ void SimpleDMatrix::MakeColPage(const RowBatch& batch,
|
|||||||
size_t buffer_begin,
|
size_t buffer_begin,
|
||||||
const std::vector<bool>& enabled,
|
const std::vector<bool>& enabled,
|
||||||
SparsePage* pcol) {
|
SparsePage* pcol) {
|
||||||
int nthread;
|
const int nthread = std::min(omp_get_max_threads(), std::max(omp_get_num_procs() / 2 - 2, 1));
|
||||||
#pragma omp parallel
|
|
||||||
{
|
|
||||||
nthread = omp_get_num_threads();
|
|
||||||
int max_nthread = std::max(omp_get_num_procs() / 2 - 2, 1);
|
|
||||||
if (nthread > max_nthread) {
|
|
||||||
nthread = max_nthread;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pcol->Clear();
|
pcol->Clear();
|
||||||
common::ParallelGroupBuilder<SparseBatch::Entry>
|
common::ParallelGroupBuilder<SparseBatch::Entry>
|
||||||
builder(&pcol->offset, &pcol->data);
|
builder(&pcol->offset, &pcol->data);
|
||||||
|
|||||||
@ -169,12 +169,7 @@ void SparsePageDMatrix::InitColAccess(const std::vector<bool>& enabled,
|
|||||||
SparsePage *pcol) {
|
SparsePage *pcol) {
|
||||||
pcol->Clear();
|
pcol->Clear();
|
||||||
pcol->min_index = buffered_rowset_[begin];
|
pcol->min_index = buffered_rowset_[begin];
|
||||||
int nthread;
|
const int nthread = std::max(omp_get_max_threads(), std::max(omp_get_num_procs() / 2 - 1, 1));
|
||||||
#pragma omp parallel
|
|
||||||
{
|
|
||||||
nthread = omp_get_num_threads();
|
|
||||||
nthread = std::max(nthread, std::max(omp_get_num_procs() / 2 - 1, 1));
|
|
||||||
}
|
|
||||||
common::ParallelGroupBuilder<SparseBatch::Entry>
|
common::ParallelGroupBuilder<SparseBatch::Entry>
|
||||||
builder(&pcol->offset, &pcol->data);
|
builder(&pcol->offset, &pcol->data);
|
||||||
builder.InitBudget(info.num_col, nthread);
|
builder.InitBudget(info.num_col, nthread);
|
||||||
|
|||||||
@ -301,11 +301,7 @@ class GBTree : public GradientBooster {
|
|||||||
void PredictLeaf(DMatrix* p_fmat,
|
void PredictLeaf(DMatrix* p_fmat,
|
||||||
std::vector<bst_float>* out_preds,
|
std::vector<bst_float>* out_preds,
|
||||||
unsigned ntree_limit) override {
|
unsigned ntree_limit) override {
|
||||||
int nthread;
|
const int nthread = omp_get_max_threads();
|
||||||
#pragma omp parallel
|
|
||||||
{
|
|
||||||
nthread = omp_get_num_threads();
|
|
||||||
}
|
|
||||||
InitThreadTemp(nthread);
|
InitThreadTemp(nthread);
|
||||||
this->PredPath(p_fmat, out_preds, ntree_limit);
|
this->PredPath(p_fmat, out_preds, ntree_limit);
|
||||||
}
|
}
|
||||||
@ -365,11 +361,7 @@ class GBTree : public GradientBooster {
|
|||||||
unsigned tree_begin,
|
unsigned tree_begin,
|
||||||
unsigned tree_end) {
|
unsigned tree_end) {
|
||||||
const MetaInfo& info = p_fmat->info();
|
const MetaInfo& info = p_fmat->info();
|
||||||
int nthread;
|
const int nthread = omp_get_max_threads();
|
||||||
#pragma omp parallel
|
|
||||||
{
|
|
||||||
nthread = omp_get_num_threads();
|
|
||||||
}
|
|
||||||
CHECK_EQ(num_group, mparam.num_output_group);
|
CHECK_EQ(num_group, mparam.num_output_group);
|
||||||
InitThreadTemp(nthread);
|
InitThreadTemp(nthread);
|
||||||
std::vector<bst_float> &preds = *out_preds;
|
std::vector<bst_float> &preds = *out_preds;
|
||||||
|
|||||||
@ -118,15 +118,6 @@ class BaseMaker: public TreeUpdater {
|
|||||||
}
|
}
|
||||||
return n.cdefault();
|
return n.cdefault();
|
||||||
}
|
}
|
||||||
/*! \brief get number of omp thread in current context */
|
|
||||||
inline static int get_nthread() {
|
|
||||||
int nthread;
|
|
||||||
#pragma omp parallel
|
|
||||||
{
|
|
||||||
nthread = omp_get_num_threads();
|
|
||||||
}
|
|
||||||
return nthread;
|
|
||||||
}
|
|
||||||
// ------class member helpers---------
|
// ------class member helpers---------
|
||||||
/*! \brief initialize temp data structure */
|
/*! \brief initialize temp data structure */
|
||||||
inline void InitData(const std::vector<bst_gpair> &gpair,
|
inline void InitData(const std::vector<bst_gpair> &gpair,
|
||||||
@ -350,7 +341,7 @@ class BaseMaker: public TreeUpdater {
|
|||||||
std::vector<TStats> *p_node_stats) {
|
std::vector<TStats> *p_node_stats) {
|
||||||
std::vector< std::vector<TStats> > &thread_temp = *p_thread_temp;
|
std::vector< std::vector<TStats> > &thread_temp = *p_thread_temp;
|
||||||
const MetaInfo &info = fmat.info();
|
const MetaInfo &info = fmat.info();
|
||||||
thread_temp.resize(this->get_nthread());
|
thread_temp.resize(omp_get_max_threads());
|
||||||
p_node_stats->resize(tree.param.num_nodes);
|
p_node_stats->resize(tree.param.num_nodes);
|
||||||
#pragma omp parallel
|
#pragma omp parallel
|
||||||
{
|
{
|
||||||
|
|||||||
@ -81,7 +81,7 @@ class ColMaker: public TreeUpdater {
|
|||||||
struct Builder {
|
struct Builder {
|
||||||
public:
|
public:
|
||||||
// constructor
|
// constructor
|
||||||
explicit Builder(const TrainParam& param) : param(param) {}
|
explicit Builder(const TrainParam& param) : param(param), nthread(omp_get_max_threads()) {}
|
||||||
// update one tree, growing
|
// update one tree, growing
|
||||||
virtual void Update(const std::vector<bst_gpair>& gpair,
|
virtual void Update(const std::vector<bst_gpair>& gpair,
|
||||||
DMatrix* p_fmat,
|
DMatrix* p_fmat,
|
||||||
@ -166,10 +166,6 @@ class ColMaker: public TreeUpdater {
|
|||||||
}
|
}
|
||||||
{
|
{
|
||||||
// setup temp space for each thread
|
// setup temp space for each thread
|
||||||
#pragma omp parallel
|
|
||||||
{
|
|
||||||
this->nthread = omp_get_num_threads();
|
|
||||||
}
|
|
||||||
// reserve a small space
|
// reserve a small space
|
||||||
stemp.clear();
|
stemp.clear();
|
||||||
stemp.resize(this->nthread, std::vector<ThreadEntry>());
|
stemp.resize(this->nthread, std::vector<ThreadEntry>());
|
||||||
@ -277,8 +273,7 @@ class ColMaker: public TreeUpdater {
|
|||||||
for (size_t j = 0; j < qexpand.size(); ++j) {
|
for (size_t j = 0; j < qexpand.size(); ++j) {
|
||||||
temp[qexpand[j]].stats.Clear();
|
temp[qexpand[j]].stats.Clear();
|
||||||
}
|
}
|
||||||
nthread = omp_get_num_threads();
|
bst_uint step = (col.length + this->nthread - 1) / this->nthread;
|
||||||
bst_uint step = (col.length + nthread - 1) / nthread;
|
|
||||||
bst_uint end = std::min(col.length, step * (tid + 1));
|
bst_uint end = std::min(col.length, step * (tid + 1));
|
||||||
for (bst_uint i = tid * step; i < end; ++i) {
|
for (bst_uint i = tid * step; i < end; ++i) {
|
||||||
const bst_uint ridx = col[i].index;
|
const bst_uint ridx = col[i].index;
|
||||||
@ -298,7 +293,7 @@ class ColMaker: public TreeUpdater {
|
|||||||
for (bst_omp_uint j = 0; j < nnode; ++j) {
|
for (bst_omp_uint j = 0; j < nnode; ++j) {
|
||||||
const int nid = qexpand[j];
|
const int nid = qexpand[j];
|
||||||
TStats sum(param), tmp(param), c(param);
|
TStats sum(param), tmp(param), c(param);
|
||||||
for (int tid = 0; tid < nthread; ++tid) {
|
for (int tid = 0; tid < this->nthread; ++tid) {
|
||||||
tmp = stemp[tid][nid].stats;
|
tmp = stemp[tid][nid].stats;
|
||||||
stemp[tid][nid].stats = sum;
|
stemp[tid][nid].stats = sum;
|
||||||
sum.Add(tmp);
|
sum.Add(tmp);
|
||||||
@ -306,7 +301,7 @@ class ColMaker: public TreeUpdater {
|
|||||||
std::swap(stemp[tid - 1][nid].last_fvalue, stemp[tid][nid].first_fvalue);
|
std::swap(stemp[tid - 1][nid].last_fvalue, stemp[tid][nid].first_fvalue);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (int tid = 0; tid < nthread; ++tid) {
|
for (int tid = 0; tid < this->nthread; ++tid) {
|
||||||
stemp[tid][nid].stats_extra = sum;
|
stemp[tid][nid].stats_extra = sum;
|
||||||
ThreadEntry &e = stemp[tid][nid];
|
ThreadEntry &e = stemp[tid][nid];
|
||||||
bst_float fsplit;
|
bst_float fsplit;
|
||||||
@ -341,7 +336,7 @@ class ColMaker: public TreeUpdater {
|
|||||||
}
|
}
|
||||||
if (need_backward) {
|
if (need_backward) {
|
||||||
tmp = sum;
|
tmp = sum;
|
||||||
ThreadEntry &e = stemp[nthread-1][nid];
|
ThreadEntry &e = stemp[this->nthread-1][nid];
|
||||||
c.SetSubstract(snode[nid].stats, tmp);
|
c.SetSubstract(snode[nid].stats, tmp);
|
||||||
if (c.sum_hess >= param.min_child_weight &&
|
if (c.sum_hess >= param.min_child_weight &&
|
||||||
tmp.sum_hess >= param.min_child_weight) {
|
tmp.sum_hess >= param.min_child_weight) {
|
||||||
@ -357,8 +352,7 @@ class ColMaker: public TreeUpdater {
|
|||||||
TStats c(param), cright(param);
|
TStats c(param), cright(param);
|
||||||
const int tid = omp_get_thread_num();
|
const int tid = omp_get_thread_num();
|
||||||
std::vector<ThreadEntry> &temp = stemp[tid];
|
std::vector<ThreadEntry> &temp = stemp[tid];
|
||||||
nthread = static_cast<bst_uint>(omp_get_num_threads());
|
bst_uint step = (col.length + this->nthread - 1) / this->nthread;
|
||||||
bst_uint step = (col.length + nthread - 1) / nthread;
|
|
||||||
bst_uint end = std::min(col.length, step * (tid + 1));
|
bst_uint end = std::min(col.length, step * (tid + 1));
|
||||||
for (bst_uint i = tid * step; i < end; ++i) {
|
for (bst_uint i = tid * step; i < end; ++i) {
|
||||||
const bst_uint ridx = col[i].index;
|
const bst_uint ridx = col[i].index;
|
||||||
@ -599,7 +593,7 @@ class ColMaker: public TreeUpdater {
|
|||||||
#endif
|
#endif
|
||||||
int poption = param.parallel_option;
|
int poption = param.parallel_option;
|
||||||
if (poption == 2) {
|
if (poption == 2) {
|
||||||
poption = static_cast<int>(nsize) * 2 < nthread ? 1 : 0;
|
poption = static_cast<int>(nsize) * 2 < this->nthread ? 1 : 0;
|
||||||
}
|
}
|
||||||
if (poption == 0) {
|
if (poption == 0) {
|
||||||
#pragma omp parallel for schedule(dynamic, batch_size)
|
#pragma omp parallel for schedule(dynamic, batch_size)
|
||||||
@ -760,7 +754,7 @@ class ColMaker: public TreeUpdater {
|
|||||||
// --data fields--
|
// --data fields--
|
||||||
const TrainParam& param;
|
const TrainParam& param;
|
||||||
// number of omp thread used during training
|
// number of omp thread used during training
|
||||||
int nthread;
|
const int nthread;
|
||||||
// Per feature: shuffle index of each feature index
|
// Per feature: shuffle index of each feature index
|
||||||
std::vector<bst_uint> feat_index;
|
std::vector<bst_uint> feat_index;
|
||||||
// Instance Data: current node position in the tree of each instance
|
// Instance Data: current node position in the tree of each instance
|
||||||
|
|||||||
@ -336,7 +336,7 @@ class CQHistMaker: public HistMaker<TStats> {
|
|||||||
auto lazy_get_hist = [&]()
|
auto lazy_get_hist = [&]()
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
thread_hist.resize(this->get_nthread());
|
thread_hist.resize(omp_get_max_threads());
|
||||||
// start accumulating statistics
|
// start accumulating statistics
|
||||||
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(fset);
|
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(fset);
|
||||||
iter->BeforeFirst();
|
iter->BeforeFirst();
|
||||||
@ -410,7 +410,7 @@ class CQHistMaker: public HistMaker<TStats> {
|
|||||||
}
|
}
|
||||||
{
|
{
|
||||||
// get smmary
|
// get smmary
|
||||||
thread_sketch.resize(this->get_nthread());
|
thread_sketch.resize(omp_get_max_threads());
|
||||||
|
|
||||||
// TWOPASS: use the real set + split set in the column iteration.
|
// TWOPASS: use the real set + split set in the column iteration.
|
||||||
this->SetDefaultPostion(p_fmat, tree);
|
this->SetDefaultPostion(p_fmat, tree);
|
||||||
@ -695,7 +695,7 @@ class GlobalProposalHistMaker: public CQHistMaker<TStats> {
|
|||||||
this->wspace.Init(this->param, 1);
|
this->wspace.Init(this->param, 1);
|
||||||
// to gain speedup in recovery
|
// to gain speedup in recovery
|
||||||
{
|
{
|
||||||
this->thread_hist.resize(this->get_nthread());
|
this->thread_hist.resize(omp_get_max_threads());
|
||||||
|
|
||||||
// TWOPASS: use the real set + split set in the column iteration.
|
// TWOPASS: use the real set + split set in the column iteration.
|
||||||
this->SetDefaultPostion(p_fmat, tree);
|
this->SetDefaultPostion(p_fmat, tree);
|
||||||
@ -756,7 +756,7 @@ class QuantileHistMaker: public HistMaker<TStats> {
|
|||||||
const RegTree &tree) override {
|
const RegTree &tree) override {
|
||||||
const MetaInfo &info = p_fmat->info();
|
const MetaInfo &info = p_fmat->info();
|
||||||
// initialize the data structure
|
// initialize the data structure
|
||||||
int nthread = BaseMaker::get_nthread();
|
const int nthread = omp_get_max_threads();
|
||||||
sketchs.resize(this->qexpand.size() * tree.param.num_feature);
|
sketchs.resize(this->qexpand.size() * tree.param.num_feature);
|
||||||
for (size_t i = 0; i < sketchs.size(); ++i) {
|
for (size_t i = 0; i < sketchs.size(); ++i) {
|
||||||
sketchs[i].Init(info.num_row, this->param.sketch_eps);
|
sketchs[i].Init(info.num_row, this->param.sketch_eps);
|
||||||
|
|||||||
@ -34,11 +34,7 @@ class TreeRefresher: public TreeUpdater {
|
|||||||
std::vector<std::vector<TStats> > stemp;
|
std::vector<std::vector<TStats> > stemp;
|
||||||
std::vector<RegTree::FVec> fvec_temp;
|
std::vector<RegTree::FVec> fvec_temp;
|
||||||
// setup temp space for each thread
|
// setup temp space for each thread
|
||||||
int nthread;
|
const int nthread = omp_get_max_threads();
|
||||||
#pragma omp parallel
|
|
||||||
{
|
|
||||||
nthread = omp_get_num_threads();
|
|
||||||
}
|
|
||||||
fvec_temp.resize(nthread, RegTree::FVec());
|
fvec_temp.resize(nthread, RegTree::FVec());
|
||||||
stemp.resize(nthread, std::vector<TStats>());
|
stemp.resize(nthread, std::vector<TStats>());
|
||||||
#pragma omp parallel
|
#pragma omp parallel
|
||||||
|
|||||||
@ -141,7 +141,7 @@ class SketchMaker: public BaseMaker {
|
|||||||
for (size_t i = 0; i < sketchs.size(); ++i) {
|
for (size_t i = 0; i < sketchs.size(); ++i) {
|
||||||
sketchs[i].Init(info.num_row, this->param.sketch_eps);
|
sketchs[i].Init(info.num_row, this->param.sketch_eps);
|
||||||
}
|
}
|
||||||
thread_sketch.resize(this->get_nthread());
|
thread_sketch.resize(omp_get_max_threads());
|
||||||
// number of rows in
|
// number of rows in
|
||||||
const size_t nrows = p_fmat->buffered_rowset().size();
|
const size_t nrows = p_fmat->buffered_rowset().size();
|
||||||
// start accumulating statistics
|
// start accumulating statistics
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user