Move thread local entry into Learner. (#5396)

* Move thread local entry into Learner.

This is an attempt to workaround CUDA context issue in static variable, where
the CUDA context can be released before device vector.

* Add PredictionEntry to thread local entry.

This eliminates one copy of prediction vector.

* Don't define CUDA C API in a namespace.
This commit is contained in:
Jiaming Yuan
2020-03-07 15:37:39 +08:00
committed by GitHub
parent 1ba6706167
commit 0dd97c206b
6 changed files with 87 additions and 63 deletions

View File

@@ -6,6 +6,7 @@
*/
#include <dmlc/io.h>
#include <dmlc/parameter.h>
#include <dmlc/thread_local.h>
#include <algorithm>
#include <iomanip>
@@ -192,6 +193,9 @@ void GenericParameter::ConfigureGpuId(bool require_gpu) {
#endif // defined(XGBOOST_USE_CUDA)
}
using XGBAPIThreadLocalStore =
dmlc::ThreadLocalStore<std::map<Learner const *, XGBAPIThreadLocalEntry>>;
/*!
* \brief learner that performs gradient boosting for a specific objective
* function. It does training and prediction.
@@ -205,6 +209,12 @@ class LearnerImpl : public Learner {
cache_.Cache(d, GenericParameter::kCpuId);
}
}
~LearnerImpl() override {
auto local_map = XGBAPIThreadLocalStore::Get();
if (local_map->find(this) != local_map->cend()) {
local_map->erase(this);
}
}
// Configuration before data is known.
void Configure() override {
if (!this->need_configuration_) { return; }
@@ -873,6 +883,9 @@ class LearnerImpl : public Learner {
}
}
XGBAPIThreadLocalEntry& GetThreadLocal() const override {
return (*XGBAPIThreadLocalStore::Get())[this];
}
const std::map<std::string, std::string>& GetConfigurationArguments() const override {
return cfg_;
}