Move thread local entry into Learner. (#5396)

* Move thread local entry into Learner. This is an attempt to workaround CUDA context issue in static variable, where the CUDA context can be released before device vector. * Add PredictionEntry to thread local entry. This eliminates one copy of prediction vector. * Don't define CUDA C API in a namespace.
2020-03-07 15:37:39 +08:00
parent 1ba6706167
commit 0dd97c206b
6 changed files with 87 additions and 63 deletions
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -6,6 +6,7 @@
 */
 #include <dmlc/io.h>
 #include <dmlc/parameter.h>
+#include <dmlc/thread_local.h>

 #include <algorithm>
 #include <iomanip>
@@ -192,6 +193,9 @@ void GenericParameter::ConfigureGpuId(bool require_gpu) {
 #endif  // defined(XGBOOST_USE_CUDA)
 }

+using XGBAPIThreadLocalStore =
+    dmlc::ThreadLocalStore<std::map<Learner const *, XGBAPIThreadLocalEntry>>;
+
 /*!
 * \brief learner that performs gradient boosting for a specific objective
 * function. It does training and prediction.
@@ -205,6 +209,12 @@ class LearnerImpl : public Learner {
      cache_.Cache(d, GenericParameter::kCpuId);
    }
  }
+  ~LearnerImpl() override {
+    auto local_map = XGBAPIThreadLocalStore::Get();
+    if (local_map->find(this) != local_map->cend()) {
+      local_map->erase(this);
+    }
+  }
  // Configuration before data is known.
  void Configure() override {
    if (!this->need_configuration_) { return; }
@@ -873,6 +883,9 @@ class LearnerImpl : public Learner {
    }
  }

+  XGBAPIThreadLocalEntry& GetThreadLocal() const override {
+    return (*XGBAPIThreadLocalStore::Get())[this];
+  }
  const std::map<std::string, std::string>& GetConfigurationArguments() const override {
    return cfg_;
  }