Remove omp_get_max_threads (#7608)

This is the one last PR for removing omp global variable. * Add context object to the `DMatrix`. This bridges `DMatrix` with https://github.com/dmlc/xgboost/issues/7308 . * Require context to be available at the construction time of booster. * Add `n_threads` support for R csc DMatrix constructor. * Remove `omp_get_max_threads` in R glue code. * Remove threading utilities that rely on omp global variable.
2022-01-28 16:09:22 +08:00
parent 028bdc1740
commit 81210420c6
31 changed files with 195 additions and 211 deletions
--- a/src/gbm/gblinear.cc
+++ b/src/gbm/gblinear.cc
@@ -1,5 +1,5 @@
 /*!
- * Copyright 2014-2021 by Contributors
+ * Copyright 2014-2022 by XGBoost Contributors
 * \file gblinear.cc
 * \brief Implementation of Linear booster, with L1/L2 regularization: Elastic Net
 *        the update rule is parallel coordinate descent (shotgun)
@@ -71,8 +71,9 @@ void LinearCheckLayer(unsigned layer_begin) {
 */
 class GBLinear : public GradientBooster {
 public:
-  explicit GBLinear(LearnerModelParam const* learner_model_param)
-      : learner_model_param_{learner_model_param},
+  explicit GBLinear(LearnerModelParam const* learner_model_param, GenericParameter const* ctx)
+      : GradientBooster{ctx},
+        learner_model_param_{learner_model_param},
        model_{learner_model_param},
        previous_model_{learner_model_param},
        sum_instance_weight_(0),
@@ -190,7 +191,7 @@ class GBLinear : public GradientBooster {
      // parallel over local batch
      const auto nsize = static_cast<bst_omp_uint>(batch.Size());
      auto page = batch.GetView();
-      common::ParallelFor(nsize, [&](bst_omp_uint i) {
+      common::ParallelFor(nsize, ctx_->Threads(), [&](bst_omp_uint i) {
        auto inst = page[i];
        auto row_idx = static_cast<size_t>(batch.base_rowid + i);
        // loop over output groups
@@ -282,7 +283,7 @@ class GBLinear : public GradientBooster {
      if (base_margin.Size() != 0) {
        CHECK_EQ(base_margin.Size(), nsize * ngroup);
      }
-      common::ParallelFor(nsize, [&](omp_ulong i) {
+      common::ParallelFor(nsize, ctx_->Threads(), [&](omp_ulong i) {
        const size_t ridx = page.base_rowid + i;
        // loop over output groups
        for (int gid = 0; gid < ngroup; ++gid) {
@@ -351,8 +352,8 @@ DMLC_REGISTER_PARAMETER(GBLinearTrainParam);

 XGBOOST_REGISTER_GBM(GBLinear, "gblinear")
    .describe("Linear booster, implement generalized linear model.")
-    .set_body([](LearnerModelParam const* booster_config) {
-      return new GBLinear(booster_config);
+    .set_body([](LearnerModelParam const* booster_config, GenericParameter const* ctx) {
+      return new GBLinear(booster_config, ctx);
    });
 }  // namespace gbm
 }  // namespace xgboost
--- a/src/gbm/gbm.cc
+++ b/src/gbm/gbm.cc
@@ -1,5 +1,5 @@
 /*!
- * Copyright 2015-2020 by Contributors
+ * Copyright 2015-2022 by XGBoost Contributors
 * \file gbm.cc
 * \brief Registry of gradient boosters.
 */
@@ -17,16 +17,13 @@ DMLC_REGISTRY_ENABLE(::xgboost::GradientBoosterReg);
 }  // namespace dmlc

 namespace xgboost {
-GradientBooster* GradientBooster::Create(
-    const std::string& name,
-    GenericParameter const* generic_param,
-    LearnerModelParam const* learner_model_param) {
+GradientBooster* GradientBooster::Create(const std::string& name, GenericParameter const* ctx,
+                                         LearnerModelParam const* learner_model_param) {
  auto *e = ::dmlc::Registry< ::xgboost::GradientBoosterReg>::Get()->Find(name);
  if (e == nullptr) {
    LOG(FATAL) << "Unknown gbm type " << name;
  }
-  auto p_bst =  (e->body)(learner_model_param);
-  p_bst->ctx_ = generic_param;
+  auto p_bst =  (e->body)(learner_model_param, ctx);
  return p_bst;
 }
 }  // namespace xgboost
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@@ -636,8 +636,8 @@ void GPUDartInplacePredictInc(common::Span<float> out_predts,

 class Dart : public GBTree {
 public:
-  explicit Dart(LearnerModelParam const* booster_config) :
-      GBTree(booster_config) {}
+  explicit Dart(LearnerModelParam const* booster_config, GenericParameter const* ctx)
+      : GBTree(booster_config, ctx) {}

  void Configure(const Args& cfg) override {
    GBTree::Configure(cfg);
@@ -1018,16 +1018,16 @@ DMLC_REGISTER_PARAMETER(GBTreeTrainParam);
 DMLC_REGISTER_PARAMETER(DartTrainParam);

 XGBOOST_REGISTER_GBM(GBTree, "gbtree")
-.describe("Tree booster, gradient boosted trees.")
-.set_body([](LearnerModelParam const* booster_config) {
-    auto* p = new GBTree(booster_config);
-    return p;
-  });
+    .describe("Tree booster, gradient boosted trees.")
+    .set_body([](LearnerModelParam const* booster_config, GenericParameter const* ctx) {
+      auto* p = new GBTree(booster_config, ctx);
+      return p;
+    });
 XGBOOST_REGISTER_GBM(Dart, "dart")
-.describe("Tree booster, dart.")
-.set_body([](LearnerModelParam const* booster_config) {
-    GBTree* p = new Dart(booster_config);
-    return p;
-  });
+    .describe("Tree booster, dart.")
+    .set_body([](LearnerModelParam const* booster_config, GenericParameter const* ctx) {
+      GBTree* p = new Dart(booster_config, ctx);
+      return p;
+    });
 }  // namespace gbm
 }  // namespace xgboost
--- a/src/gbm/gbtree.h
+++ b/src/gbm/gbtree.h
@@ -202,8 +202,8 @@ inline bool SliceTrees(int32_t layer_begin, int32_t layer_end, int32_t step,
 // gradient boosted trees
 class GBTree : public GradientBooster {
 public:
-  explicit GBTree(LearnerModelParam const* booster_config) :
-      model_(booster_config) {}
+  explicit GBTree(LearnerModelParam const* booster_config, GenericParameter const* ctx)
+      : GradientBooster{ctx}, model_(booster_config, ctx_) {}

  void Configure(const Args& cfg) override;
  // Revise `tree_method` and `updater` parameters after seeing the training
--- a/src/gbm/gbtree_model.cc
+++ b/src/gbm/gbtree_model.cc
@@ -69,7 +69,8 @@ void GBTreeModel::SaveModel(Json* p_out) const {
  out["gbtree_model_param"] = ToJson(param);
  std::vector<Json> trees_json(trees.size());

-  common::ParallelFor(trees.size(), omp_get_max_threads(), [&](auto t) {
+  CHECK(ctx_);
+  common::ParallelFor(trees.size(), ctx_->Threads(), [&](auto t) {
    auto const& tree = trees[t];
    Json tree_json{Object()};
    tree->SaveModel(&tree_json);
@@ -95,7 +96,8 @@ void GBTreeModel::LoadModel(Json const& in) {
  auto const& trees_json = get<Array const>(in["trees"]);
  trees.resize(trees_json.size());

-  common::ParallelFor(trees_json.size(), omp_get_max_threads(), [&](auto t) {
+  CHECK(ctx_);
+  common::ParallelFor(trees_json.size(), ctx_->Threads(), [&](auto t) {
    auto tree_id = get<Integer>(trees_json[t]["id"]);
    trees.at(tree_id).reset(new RegTree());
    trees.at(tree_id)->LoadModel(trees_json[t]);
--- a/src/gbm/gbtree_model.h
+++ b/src/gbm/gbtree_model.h
@@ -83,8 +83,8 @@ struct GBTreeModelParam : public dmlc::Parameter<GBTreeModelParam> {

 struct GBTreeModel : public Model {
 public:
-  explicit GBTreeModel(LearnerModelParam const* learner_model) :
-      learner_model_param{learner_model} {}
+  explicit GBTreeModel(LearnerModelParam const* learner_model, GenericParameter const* ctx)
+      : learner_model_param{learner_model}, ctx_{ctx} {}
  void Configure(const Args& cfg) {
    // initialize model parameters if not yet been initialized.
    if (trees.size() == 0) {
@@ -135,6 +135,9 @@ struct GBTreeModel : public Model {
  std::vector<std::unique_ptr<RegTree> > trees_to_update;
  /*! \brief some information indicator of the tree, reserved */
  std::vector<int> tree_info;
+
+ private:
+  GenericParameter const* ctx_;
 };
 }  // namespace gbm
 }  // namespace xgboost