Thread-safe prediction by making the prediction cache thread-local. (#5853)

Co-authored-by: Jiaming Yuan <jm.yuan@outlook.com>
2020-07-30 06:33:50 +02:00
parent fa3715f584
commit d268a2a463
5 changed files with 71 additions and 14 deletions
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -221,13 +221,13 @@ void GenericParameter::ConfigureGpuId(bool require_gpu) {
 using LearnerAPIThreadLocalStore =
    dmlc::ThreadLocalStore<std::map<Learner const *, XGBAPIThreadLocalEntry>>;

+using ThreadLocalPredictionCache =
+    dmlc::ThreadLocalStore<std::map<Learner const *, PredictionContainer>>;
+
 class LearnerConfiguration : public Learner {
 protected:
  static std::string const kEvalMetric;  // NOLINT

- protected:
-  PredictionContainer cache_;
-
 protected:
  std::atomic<bool> need_configuration_;
  std::map<std::string, std::string> cfg_;
@@ -244,12 +244,19 @@ class LearnerConfiguration : public Learner {
  explicit LearnerConfiguration(std::vector<std::shared_ptr<DMatrix> > cache)
      : need_configuration_{true} {
    monitor_.Init("Learner");
+    auto& local_cache = (*ThreadLocalPredictionCache::Get())[this];
    for (std::shared_ptr<DMatrix> const& d : cache) {
-      cache_.Cache(d, GenericParameter::kCpuId);
+      local_cache.Cache(d, GenericParameter::kCpuId);
+    }
+  }
+  ~LearnerConfiguration() override {
+    auto local_cache = ThreadLocalPredictionCache::Get();
+    if (local_cache->find(this) != local_cache->cend()) {
+      local_cache->erase(this);
    }
  }
-  // Configuration before data is known.

+  // Configuration before data is known.
  void Configure() override {
    // Varient of double checked lock
    if (!this->need_configuration_) { return; }
@@ -316,6 +323,10 @@ class LearnerConfiguration : public Learner {
    monitor_.Stop("Configure");
  }

+  virtual PredictionContainer* GetPredictionCache() const {
+    return &((*ThreadLocalPredictionCache::Get())[this]);
+  }
+
  void LoadConfig(Json const& in) override {
    CHECK(IsA<Object>(in));
    Version::Load(in, true);
@@ -511,7 +522,8 @@ class LearnerConfiguration : public Learner {
    if (mparam_.num_feature == 0) {
      // TODO(hcho3): Change num_feature to 64-bit integer
      unsigned num_feature = 0;
-      for (auto& matrix : cache_.Container()) {
+      auto local_cache = this->GetPredictionCache();
+      for (auto& matrix : local_cache->Container()) {
        CHECK(matrix.first);
        CHECK(!matrix.second.ref.expired());
        const uint64_t num_col = matrix.first->Info().num_col_;
@@ -948,7 +960,8 @@ class LearnerImpl : public LearnerIO {
    this->CheckDataSplitMode();
    this->ValidateDMatrix(train.get(), true);

-    auto& predt = this->cache_.Cache(train, generic_parameters_.gpu_id);
+    auto local_cache = this->GetPredictionCache();
+    auto& predt = local_cache->Cache(train, generic_parameters_.gpu_id);

    monitor_.Start("PredictRaw");
    this->PredictRaw(train.get(), &predt, true);
@@ -973,9 +986,10 @@ class LearnerImpl : public LearnerIO {
    }
    this->CheckDataSplitMode();
    this->ValidateDMatrix(train.get(), true);
-    this->cache_.Cache(train, generic_parameters_.gpu_id);
+    auto local_cache = this->GetPredictionCache();
+    local_cache->Cache(train, generic_parameters_.gpu_id);

-    gbm_->DoBoost(train.get(), in_gpair, &cache_.Entry(train.get()));
+    gbm_->DoBoost(train.get(), in_gpair, &local_cache->Entry(train.get()));
    monitor_.Stop("BoostOneIter");
  }

@@ -991,9 +1005,11 @@ class LearnerImpl : public LearnerIO {
      metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric(), &generic_parameters_));
      metrics_.back()->Configure({cfg_.begin(), cfg_.end()});
    }
+
+    auto local_cache = this->GetPredictionCache();
    for (size_t i = 0; i < data_sets.size(); ++i) {
      std::shared_ptr<DMatrix> m = data_sets[i];
-      auto &predt = this->cache_.Cache(m, generic_parameters_.gpu_id);
+      auto &predt = local_cache->Cache(m, generic_parameters_.gpu_id);
      this->ValidateDMatrix(m.get(), false);
      this->PredictRaw(m.get(), &predt, false);

@@ -1030,7 +1046,8 @@ class LearnerImpl : public LearnerIO {
    } else if (pred_leaf) {
      gbm_->PredictLeaf(data.get(), &out_preds->HostVector(), ntree_limit);
    } else {
-      auto& prediction = cache_.Cache(data, generic_parameters_.gpu_id);
+      auto local_cache = this->GetPredictionCache();
+      auto& prediction = local_cache->Cache(data, generic_parameters_.gpu_id);
      this->PredictRaw(data.get(), &prediction, training, ntree_limit);
      // Copy the prediction cache to output prediction. out_preds comes from C API
      out_preds->SetDevice(generic_parameters_.gpu_id);
--- a/src/predictor/predictor.cc
+++ b/src/predictor/predictor.cc
@@ -26,7 +26,6 @@ void PredictionContainer::ClearExpiredEntries() {
 }

 PredictionCacheEntry &PredictionContainer::Cache(std::shared_ptr<DMatrix> m, int32_t device) {
-  std::lock_guard<std::mutex> guard { cache_lock_ };
  this->ClearExpiredEntries();
  container_[m.get()].ref = m;
  if (device != GenericParameter::kCpuId) {
--- a/src/tree/updater_quantile_hist.cc
+++ b/src/tree/updater_quantile_hist.cc
@@ -1384,6 +1384,5 @@ XGBOOST_REGISTER_TREE_UPDATER(QuantileHistMaker, "grow_quantile_histmaker")
    []() {
      return new QuantileHistMaker();
    });
-
 }  // namespace tree
 }  // namespace xgboost