Improve OpenMP exception handling (#6680)

2021-02-25 06:56:16 +01:00
parent c375173dca
commit 9b530e5697
26 changed files with 610 additions and 475 deletions
--- a/src/objective/rank_obj.cu
+++ b/src/objective/rank_obj.cu
@@ -823,72 +823,80 @@ class LambdaRankObj : public ObjFunction {
    const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
    out_gpair->Resize(preds.Size());

+    dmlc::OMPException exc;
    #pragma omp parallel
    {
-      // parallel construct, declare random number generator here, so that each
-      // thread use its own random number generator, seed by thread id and current iteration
-      std::minstd_rand rnd((iter + 1) * 1111);
-      std::vector<LambdaPair> pairs;
-      std::vector<ListEntry>  lst;
-      std::vector< std::pair<bst_float, unsigned> > rec;
+      exc.Run([&]() {
+        // parallel construct, declare random number generator here, so that each
+        // thread use its own random number generator, seed by thread id and current iteration
+        std::minstd_rand rnd((iter + 1) * 1111);
+        std::vector<LambdaPair> pairs;
+        std::vector<ListEntry>  lst;
+        std::vector< std::pair<bst_float, unsigned> > rec;

-      #pragma omp for schedule(static)
-      for (bst_omp_uint k = 0; k < ngroup; ++k) {
-        lst.clear(); pairs.clear();
-        for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) {
-          lst.emplace_back(preds_h[j], labels[j], j);
-          gpair[j] = GradientPair(0.0f, 0.0f);
-        }
-        std::stable_sort(lst.begin(), lst.end(), ListEntry::CmpPred);
-        rec.resize(lst.size());
-        for (unsigned i = 0; i < lst.size(); ++i) {
-          rec[i] = std::make_pair(lst[i].label, i);
-        }
-        std::stable_sort(rec.begin(), rec.end(), common::CmpFirst);
-        // enumerate buckets with same label, for each item in the lst, grab another sample randomly
-        for (unsigned i = 0; i < rec.size(); ) {
-          unsigned j = i + 1;
-          while (j < rec.size() && rec[j].first == rec[i].first) ++j;
-          // bucket in [i,j), get a sample outside bucket
-          unsigned nleft = i, nright = static_cast<unsigned>(rec.size() - j);
-          if (nleft + nright != 0) {
-            int nsample = param_.num_pairsample;
-            while (nsample --) {
-              for (unsigned pid = i; pid < j; ++pid) {
-                unsigned ridx = std::uniform_int_distribution<unsigned>(0, nleft + nright - 1)(rnd);
-                if (ridx < nleft) {
-                  pairs.emplace_back(rec[ridx].second, rec[pid].second,
-                      info.GetWeight(k) * weight_normalization_factor);
-                } else {
-                  pairs.emplace_back(rec[pid].second, rec[ridx+j-i].second,
-                      info.GetWeight(k) * weight_normalization_factor);
+        #pragma omp for schedule(static)
+        for (bst_omp_uint k = 0; k < ngroup; ++k) {
+          exc.Run([&]() {
+            lst.clear(); pairs.clear();
+            for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) {
+              lst.emplace_back(preds_h[j], labels[j], j);
+              gpair[j] = GradientPair(0.0f, 0.0f);
+            }
+            std::stable_sort(lst.begin(), lst.end(), ListEntry::CmpPred);
+            rec.resize(lst.size());
+            for (unsigned i = 0; i < lst.size(); ++i) {
+              rec[i] = std::make_pair(lst[i].label, i);
+            }
+            std::stable_sort(rec.begin(), rec.end(), common::CmpFirst);
+            // enumerate buckets with same label
+            // for each item in the lst, grab another sample randomly
+            for (unsigned i = 0; i < rec.size(); ) {
+              unsigned j = i + 1;
+              while (j < rec.size() && rec[j].first == rec[i].first) ++j;
+              // bucket in [i,j), get a sample outside bucket
+              unsigned nleft = i, nright = static_cast<unsigned>(rec.size() - j);
+              if (nleft + nright != 0) {
+                int nsample = param_.num_pairsample;
+                while (nsample --) {
+                  for (unsigned pid = i; pid < j; ++pid) {
+                    unsigned ridx =
+                        std::uniform_int_distribution<unsigned>(0, nleft + nright - 1)(rnd);
+                    if (ridx < nleft) {
+                      pairs.emplace_back(rec[ridx].second, rec[pid].second,
+                          info.GetWeight(k) * weight_normalization_factor);
+                    } else {
+                      pairs.emplace_back(rec[pid].second, rec[ridx+j-i].second,
+                          info.GetWeight(k) * weight_normalization_factor);
+                    }
+                  }
                }
              }
+              i = j;
            }
-          }
-          i = j;
+            // get lambda weight for the pairs
+            LambdaWeightComputerT::GetLambdaWeight(lst, &pairs);
+            // rescale each gradient and hessian so that the lst have constant weighted
+            float scale = 1.0f / param_.num_pairsample;
+            if (param_.fix_list_weight != 0.0f) {
+              scale *= param_.fix_list_weight / (gptr[k + 1] - gptr[k]);
+            }
+            for (auto & pair : pairs) {
+              const ListEntry &pos = lst[pair.pos_index];
+              const ListEntry &neg = lst[pair.neg_index];
+              const bst_float w = pair.weight * scale;
+              const float eps = 1e-16f;
+              bst_float p = common::Sigmoid(pos.pred - neg.pred);
+              bst_float g = p - 1.0f;
+              bst_float h = std::max(p * (1.0f - p), eps);
+              // accumulate gradient and hessian in both pid, and nid
+              gpair[pos.rindex] += GradientPair(g * w, 2.0f*w*h);
+              gpair[neg.rindex] += GradientPair(-g * w, 2.0f*w*h);
+            }
+          });
        }
-        // get lambda weight for the pairs
-        LambdaWeightComputerT::GetLambdaWeight(lst, &pairs);
-        // rescale each gradient and hessian so that the lst have constant weighted
-        float scale = 1.0f / param_.num_pairsample;
-        if (param_.fix_list_weight != 0.0f) {
-          scale *= param_.fix_list_weight / (gptr[k + 1] - gptr[k]);
-        }
-        for (auto & pair : pairs) {
-          const ListEntry &pos = lst[pair.pos_index];
-          const ListEntry &neg = lst[pair.neg_index];
-          const bst_float w = pair.weight * scale;
-          const float eps = 1e-16f;
-          bst_float p = common::Sigmoid(pos.pred - neg.pred);
-          bst_float g = p - 1.0f;
-          bst_float h = std::max(p * (1.0f - p), eps);
-          // accumulate gradient and hessian in both pid, and nid
-          gpair[pos.rindex] += GradientPair(g * w, 2.0f*w*h);
-          gpair[neg.rindex] += GradientPair(-g * w, 2.0f*w*h);
-        }
-      }
+      });
    }
+    exc.Rethrow();
  }

 #if defined(__CUDACC__)
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -19,6 +19,7 @@

 #include "../common/transform.h"
 #include "../common/common.h"
+#include "../common/threading_utils.h"
 #include "./regression_loss.h"


@@ -345,10 +346,9 @@ class CoxRegression : public ObjFunction {
  void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
    std::vector<bst_float> &preds = io_preds->HostVector();
    const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
-#pragma omp parallel for schedule(static)
-    for (long j = 0; j < ndata; ++j) {  // NOLINT(*)
+    common::ParallelFor(ndata, [&](long j) { // NOLINT(*)
      preds[j] = std::exp(preds[j]);
-    }
+    });
  }
  void EvalTransform(HostDeviceVector<bst_float> *io_preds) override {
    PredTransform(io_preds);