Improve operation efficiency for single predict (#5016)

* Improve operation efficiency for single predict
2019-11-10 02:01:28 +08:00 · 2019-11-10 02:01:28 +08:00 · 1733c9e8f7
commit 1733c9e8f7
parent 374648c21a
2 changed files with 39 additions and 35 deletions
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@ -408,6 +408,7 @@ class Dart : public GBTree {
      constexpr int kUnroll = 8;
      const auto nsize = static_cast<bst_omp_uint>(batch.Size());
      const bst_omp_uint rest = nsize % kUnroll;
+      if (nsize >= kUnroll) {
        #pragma omp parallel for schedule(static)
        for (bst_omp_uint i = 0; i < nsize - rest; i += kUnroll) {
          const int tid = omp_get_thread_num();
@ -429,6 +430,7 @@ class Dart : public GBTree {
            }
          }
        }
+      }
      for (bst_omp_uint i = nsize - rest; i < nsize; ++i) {
        RegTree::FVec& feats = thread_temp_[0];
        const auto ridx = static_cast<int64_t>(batch.base_rowid + i);
--- a/src/predictor/cpu_predictor.cc
+++ b/src/predictor/cpu_predictor.cc
@ -63,6 +63,7 @@ class CPUPredictor : public Predictor {
      // Pull to host before entering omp block, as this is not thread safe.
      batch.data.HostVector();
      batch.offset.HostVector();
+      if (nsize >= kUnroll) {
 #pragma omp parallel for schedule(static)
        for (bst_omp_uint i = 0; i < nsize - rest; i += kUnroll) {
          const int tid = omp_get_thread_num();
@ -84,6 +85,7 @@ class CPUPredictor : public Predictor {
            }
          }
        }
+      }
      for (bst_omp_uint i = nsize - rest; i < nsize; ++i) {
        RegTree::FVec& feats = thread_temp[0];
        const auto ridx = static_cast<int64_t>(batch.base_rowid + i);