Remove omp_get_max_threads in CPU predictor. (#7519)

This is part of the on going effort to remove the dependency on global omp variables.
2022-01-04 22:12:15 +08:00
parent 5516281881
commit 68cdbc9c16
6 changed files with 68 additions and 68 deletions
--- a/src/predictor/gpu_predictor.cu
+++ b/src/predictor/gpu_predictor.cu
@@ -633,12 +633,12 @@ class GPUPredictor : public xgboost::Predictor {
                       size_t num_features,
                       HostDeviceVector<bst_float>* predictions,
                       size_t batch_offset, bool is_dense) const {
-    batch.offset.SetDevice(generic_param_->gpu_id);
-    batch.data.SetDevice(generic_param_->gpu_id);
+    batch.offset.SetDevice(ctx_->gpu_id);
+    batch.data.SetDevice(ctx_->gpu_id);
    const uint32_t BLOCK_THREADS = 128;
    size_t num_rows = batch.Size();
    auto GRID_SIZE = static_cast<uint32_t>(common::DivRoundUp(num_rows, BLOCK_THREADS));
-    auto max_shared_memory_bytes = ConfigureDevice(generic_param_->gpu_id);
+    auto max_shared_memory_bytes = ConfigureDevice(ctx_->gpu_id);
    size_t shared_memory_bytes =
        SharedMemoryBytes<BLOCK_THREADS>(num_features, max_shared_memory_bytes);
    bool use_shared = shared_memory_bytes != 0;
@@ -694,10 +694,10 @@ class GPUPredictor : public xgboost::Predictor {
    if (tree_end - tree_begin == 0) {
      return;
    }
-    out_preds->SetDevice(generic_param_->gpu_id);
+    out_preds->SetDevice(ctx_->gpu_id);
    auto const& info = dmat->Info();
    DeviceModel d_model;
-    d_model.Init(model, tree_begin, tree_end, generic_param_->gpu_id);
+    d_model.Init(model, tree_begin, tree_end, ctx_->gpu_id);

    if (dmat->PageExists<SparsePage>()) {
      size_t batch_offset = 0;
@@ -709,10 +709,10 @@ class GPUPredictor : public xgboost::Predictor {
    } else {
      size_t batch_offset = 0;
      for (auto const& page : dmat->GetBatches<EllpackPage>()) {
-        dmat->Info().feature_types.SetDevice(generic_param_->gpu_id);
+        dmat->Info().feature_types.SetDevice(ctx_->gpu_id);
        auto feature_types = dmat->Info().feature_types.ConstDeviceSpan();
        this->PredictInternal(
-            page.Impl()->GetDeviceAccessor(generic_param_->gpu_id, feature_types),
+            page.Impl()->GetDeviceAccessor(ctx_->gpu_id, feature_types),
            d_model,
            out_preds,
            batch_offset);
@@ -726,15 +726,15 @@ class GPUPredictor : public xgboost::Predictor {
      Predictor::Predictor{generic_param} {}

  ~GPUPredictor() override {
-    if (generic_param_->gpu_id >= 0 && generic_param_->gpu_id < common::AllVisibleGPUs()) {
-      dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
+    if (ctx_->gpu_id >= 0 && ctx_->gpu_id < common::AllVisibleGPUs()) {
+      dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
    }
  }

  void PredictBatch(DMatrix* dmat, PredictionCacheEntry* predts,
                    const gbm::GBTreeModel& model, uint32_t tree_begin,
                    uint32_t tree_end = 0) const override {
-    int device = generic_param_->gpu_id;
+    int device = ctx_->gpu_id;
    CHECK_GE(device, 0) << "Set `gpu_id' to positive value for processing GPU data.";
    auto* out_preds = &predts->predictions;
    if (tree_end == 0) {
@@ -754,7 +754,7 @@ class GPUPredictor : public xgboost::Predictor {
    CHECK_EQ(m->NumColumns(), model.learner_model_param->num_feature)
        << "Number of columns in data must equal to trained model.";
    CHECK_EQ(dh::CurrentDevice(), m->DeviceIdx())
-        << "XGBoost is running on device: " << this->generic_param_->gpu_id << ", "
+        << "XGBoost is running on device: " << this->ctx_->gpu_id << ", "
        << "but data is on: " << m->DeviceIdx();
    if (p_m) {
      p_m->Info().num_row_ = m->NumRows();
@@ -821,8 +821,8 @@ class GPUPredictor : public xgboost::Predictor {
    if (tree_weights != nullptr) {
      LOG(FATAL) << "Dart booster feature " << not_implemented;
    }
-    dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
-    out_contribs->SetDevice(generic_param_->gpu_id);
+    dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
+    out_contribs->SetDevice(ctx_->gpu_id);
    if (tree_end == 0 || tree_end > model.trees.size()) {
      tree_end = static_cast<uint32_t>(model.trees.size());
    }
@@ -840,12 +840,12 @@ class GPUPredictor : public xgboost::Predictor {
    dh::device_vector<gpu_treeshap::PathElement<ShapSplitCondition>>
        device_paths;
    DeviceModel d_model;
-    d_model.Init(model, 0, tree_end, generic_param_->gpu_id);
+    d_model.Init(model, 0, tree_end, ctx_->gpu_id);
    dh::device_vector<uint32_t> categories;
-    ExtractPaths(&device_paths, &d_model, &categories, generic_param_->gpu_id);
+    ExtractPaths(&device_paths, &d_model, &categories, ctx_->gpu_id);
    for (auto& batch : p_fmat->GetBatches<SparsePage>()) {
-      batch.data.SetDevice(generic_param_->gpu_id);
-      batch.offset.SetDevice(generic_param_->gpu_id);
+      batch.data.SetDevice(ctx_->gpu_id);
+      batch.offset.SetDevice(ctx_->gpu_id);
      SparsePageView X(batch.data.DeviceSpan(), batch.offset.DeviceSpan(),
                       model.learner_model_param->num_feature);
      auto begin = dh::tbegin(phis) + batch.base_rowid * contributions_columns;
@@ -854,7 +854,7 @@ class GPUPredictor : public xgboost::Predictor {
          dh::tend(phis));
    }
    // Add the base margin term to last column
-    p_fmat->Info().base_margin_.SetDevice(generic_param_->gpu_id);
+    p_fmat->Info().base_margin_.SetDevice(ctx_->gpu_id);
    const auto margin = p_fmat->Info().base_margin_.Data()->ConstDeviceSpan();
    float base_score = model.learner_model_param->base_score;
    dh::LaunchN(
@@ -879,8 +879,8 @@ class GPUPredictor : public xgboost::Predictor {
    if (tree_weights != nullptr) {
      LOG(FATAL) << "Dart booster feature " << not_implemented;
    }
-    dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
-    out_contribs->SetDevice(generic_param_->gpu_id);
+    dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
+    out_contribs->SetDevice(ctx_->gpu_id);
    if (tree_end == 0 || tree_end > model.trees.size()) {
      tree_end = static_cast<uint32_t>(model.trees.size());
    }
@@ -899,12 +899,12 @@ class GPUPredictor : public xgboost::Predictor {
    dh::device_vector<gpu_treeshap::PathElement<ShapSplitCondition>>
        device_paths;
    DeviceModel d_model;
-    d_model.Init(model, 0, tree_end, generic_param_->gpu_id);
+    d_model.Init(model, 0, tree_end, ctx_->gpu_id);
    dh::device_vector<uint32_t> categories;
-    ExtractPaths(&device_paths, &d_model, &categories, generic_param_->gpu_id);
+    ExtractPaths(&device_paths, &d_model, &categories, ctx_->gpu_id);
    for (auto& batch : p_fmat->GetBatches<SparsePage>()) {
-      batch.data.SetDevice(generic_param_->gpu_id);
-      batch.offset.SetDevice(generic_param_->gpu_id);
+      batch.data.SetDevice(ctx_->gpu_id);
+      batch.offset.SetDevice(ctx_->gpu_id);
      SparsePageView X(batch.data.DeviceSpan(), batch.offset.DeviceSpan(),
                       model.learner_model_param->num_feature);
      auto begin = dh::tbegin(phis) + batch.base_rowid * contributions_columns;
@@ -913,7 +913,7 @@ class GPUPredictor : public xgboost::Predictor {
          dh::tend(phis));
    }
    // Add the base margin term to last column
-    p_fmat->Info().base_margin_.SetDevice(generic_param_->gpu_id);
+    p_fmat->Info().base_margin_.SetDevice(ctx_->gpu_id);
    const auto margin = p_fmat->Info().base_margin_.Data()->ConstDeviceSpan();
    float base_score = model.learner_model_param->base_score;
    size_t n_features = model.learner_model_param->num_feature;
@@ -938,8 +938,8 @@ class GPUPredictor : public xgboost::Predictor {
  void PredictLeaf(DMatrix *p_fmat, HostDeviceVector<bst_float> *predictions,
                   const gbm::GBTreeModel &model,
                   unsigned tree_end) const override {
-    dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
-    auto max_shared_memory_bytes = ConfigureDevice(generic_param_->gpu_id);
+    dh::safe_cuda(cudaSetDevice(ctx_->gpu_id));
+    auto max_shared_memory_bytes = ConfigureDevice(ctx_->gpu_id);

    const MetaInfo& info = p_fmat->Info();
    constexpr uint32_t kBlockThreads = 128;
@@ -953,15 +953,15 @@ class GPUPredictor : public xgboost::Predictor {
    if (tree_end == 0 || tree_end > model.trees.size()) {
      tree_end = static_cast<uint32_t>(model.trees.size());
    }
-    predictions->SetDevice(generic_param_->gpu_id);
+    predictions->SetDevice(ctx_->gpu_id);
    predictions->Resize(num_rows * tree_end);
    DeviceModel d_model;
-    d_model.Init(model, 0, tree_end, this->generic_param_->gpu_id);
+    d_model.Init(model, 0, tree_end, this->ctx_->gpu_id);

    if (p_fmat->PageExists<SparsePage>()) {
      for (auto const& batch : p_fmat->GetBatches<SparsePage>()) {
-        batch.data.SetDevice(generic_param_->gpu_id);
-        batch.offset.SetDevice(generic_param_->gpu_id);
+        batch.data.SetDevice(ctx_->gpu_id);
+        batch.offset.SetDevice(ctx_->gpu_id);
        bst_row_t batch_offset = 0;
        SparsePageView data{batch.data.DeviceSpan(), batch.offset.DeviceSpan(),
                            model.learner_model_param->num_feature};
@@ -986,7 +986,7 @@ class GPUPredictor : public xgboost::Predictor {
    } else {
      for (auto const& batch : p_fmat->GetBatches<EllpackPage>()) {
        bst_row_t batch_offset = 0;
-        EllpackDeviceAccessor data{batch.Impl()->GetDeviceAccessor(generic_param_->gpu_id)};
+        EllpackDeviceAccessor data{batch.Impl()->GetDeviceAccessor(ctx_->gpu_id)};
        size_t num_rows = batch.Size();
        auto grid =
            static_cast<uint32_t>(common::DivRoundUp(num_rows, kBlockThreads));