Enhance inplace prediction. (#6653)

* Accept array interface for csr and array. * Accept an optional proxy dmatrix for metainfo. This constructs an explicit `_ProxyDMatrix` type in Python. * Remove unused doc. * Add strict output.
2021-02-02 11:41:46 +08:00
parent 87ab1ad607
commit 411592a347
22 changed files with 955 additions and 530 deletions
--- a/src/predictor/cpu_predictor.cc
+++ b/src/predictor/cpu_predictor.cc
@@ -1,5 +1,5 @@
 /*!
- * Copyright by Contributors 2017-2020
+ * Copyright by Contributors 2017-2021
 */
 #include <dmlc/omp.h>
 #include <dmlc/any.h>
@@ -287,7 +287,7 @@ class CPUPredictor : public Predictor {
  }

  template <typename Adapter>
-  void DispatchedInplacePredict(dmlc::any const &x,
+  void DispatchedInplacePredict(dmlc::any const &x, std::shared_ptr<DMatrix> p_m,
                                const gbm::GBTreeModel &model, float missing,
                                PredictionCacheEntry *out_preds,
                                uint32_t tree_begin, uint32_t tree_end) const {
@@ -295,33 +295,44 @@ class CPUPredictor : public Predictor {
    auto m = dmlc::get<std::shared_ptr<Adapter>>(x);
    CHECK_EQ(m->NumColumns(), model.learner_model_param->num_feature)
        << "Number of columns in data must equal to trained model.";
-    MetaInfo info;
-    info.num_col_ = m->NumColumns();
-    info.num_row_ = m->NumRows();
-    this->InitOutPredictions(info, &(out_preds->predictions), model);
-    std::vector<Entry> workspace(info.num_col_ * 8 * threads);
+    if (p_m) {
+      p_m->Info().num_row_ = m->NumRows();
+      this->InitOutPredictions(p_m->Info(), &(out_preds->predictions), model);
+    } else {
+      MetaInfo info;
+      info.num_row_ = m->NumRows();
+      this->InitOutPredictions(info, &(out_preds->predictions), model);
+    }
+    std::vector<Entry> workspace(m->NumColumns() * 8 * threads);
    auto &predictions = out_preds->predictions.HostVector();
    std::vector<RegTree::FVec> thread_temp;
-    InitThreadTemp(threads*kBlockOfRowsSize, model.learner_model_param->num_feature,
-                   &thread_temp);
-    PredictBatchByBlockOfRowsKernel<AdapterView<Adapter>,
-                       kBlockOfRowsSize>(AdapterView<Adapter>(
-                                          m.get(), missing, common::Span<Entry>{workspace}),
-                                          &predictions, model, tree_begin, tree_end, &thread_temp);
+    InitThreadTemp(threads * kBlockOfRowsSize,
+                   model.learner_model_param->num_feature, &thread_temp);
+    PredictBatchByBlockOfRowsKernel<AdapterView<Adapter>, kBlockOfRowsSize>(
+        AdapterView<Adapter>(m.get(), missing, common::Span<Entry>{workspace}),
+        &predictions, model, tree_begin, tree_end, &thread_temp);
  }

-  void InplacePredict(dmlc::any const &x, const gbm::GBTreeModel &model,
-                      float missing, PredictionCacheEntry *out_preds,
-                      uint32_t tree_begin, unsigned tree_end) const override {
+  bool InplacePredict(dmlc::any const &x, std::shared_ptr<DMatrix> p_m,
+                      const gbm::GBTreeModel &model, float missing,
+                      PredictionCacheEntry *out_preds, uint32_t tree_begin,
+                      unsigned tree_end) const override {
    if (x.type() == typeid(std::shared_ptr<data::DenseAdapter>)) {
      this->DispatchedInplacePredict<data::DenseAdapter>(
-          x, model, missing, out_preds, tree_begin, tree_end);
+          x, p_m, model, missing, out_preds, tree_begin, tree_end);
    } else if (x.type() == typeid(std::shared_ptr<data::CSRAdapter>)) {
      this->DispatchedInplacePredict<data::CSRAdapter>(
-          x, model, missing, out_preds, tree_begin, tree_end);
+          x, p_m, model, missing, out_preds, tree_begin, tree_end);
+    } else if (x.type() == typeid(std::shared_ptr<data::ArrayAdapter>)) {
+      this->DispatchedInplacePredict<data::ArrayAdapter> (
+          x, p_m, model, missing, out_preds, tree_begin, tree_end);
+    } else if (x.type() == typeid(std::shared_ptr<data::CSRArrayAdapter>)) {
+      this->DispatchedInplacePredict<data::CSRArrayAdapter> (
+          x, p_m, model, missing, out_preds, tree_begin, tree_end);
    } else {
-      LOG(FATAL) << "Data type is not supported by CPU Predictor.";
+      return false;
    }
+    return true;
  }

  void PredictInstance(const SparsePage::Inst& inst,
--- a/src/predictor/gpu_predictor.cu
+++ b/src/predictor/gpu_predictor.cu
@@ -1,5 +1,5 @@
 /*!
- * Copyright 2017-2020 by Contributors
+ * Copyright 2017-2021 by Contributors
 */
 #include <thrust/copy.h>
 #include <thrust/device_ptr.h>
@@ -644,7 +644,7 @@ class GPUPredictor : public xgboost::Predictor {
  }

  template <typename Adapter, typename Loader>
-  void DispatchedInplacePredict(dmlc::any const &x,
+  void DispatchedInplacePredict(dmlc::any const &x, std::shared_ptr<DMatrix> p_m,
                                const gbm::GBTreeModel &model, float,
                                PredictionCacheEntry *out_preds,
                                uint32_t tree_begin, uint32_t tree_end) const {
@@ -659,16 +659,20 @@ class GPUPredictor : public xgboost::Predictor {
    CHECK_EQ(this->generic_param_->gpu_id, m->DeviceIdx())
        << "XGBoost is running on device: " << this->generic_param_->gpu_id << ", "
        << "but data is on: " << m->DeviceIdx();
-    MetaInfo info;
-    info.num_col_ = m->NumColumns();
-    info.num_row_ = m->NumRows();
-    this->InitOutPredictions(info, &(out_preds->predictions), model);
+    if (p_m) {
+      p_m->Info().num_row_ = m->NumRows();
+      this->InitOutPredictions(p_m->Info(), &(out_preds->predictions), model);
+    } else {
+      MetaInfo info;
+      info.num_row_ = m->NumRows();
+      this->InitOutPredictions(info, &(out_preds->predictions), model);
+    }

    const uint32_t BLOCK_THREADS = 128;
-    auto GRID_SIZE = static_cast<uint32_t>(common::DivRoundUp(info.num_row_, BLOCK_THREADS));
+    auto GRID_SIZE = static_cast<uint32_t>(common::DivRoundUp(m->NumRows(), BLOCK_THREADS));

    size_t shared_memory_bytes =
-        SharedMemoryBytes<BLOCK_THREADS>(info.num_col_, max_shared_memory_bytes);
+        SharedMemoryBytes<BLOCK_THREADS>(m->NumColumns(), max_shared_memory_bytes);
    bool use_shared = shared_memory_bytes != 0;
    size_t entry_start = 0;

@@ -680,23 +684,25 @@ class GPUPredictor : public xgboost::Predictor {
        d_model.categories_tree_segments.ConstDeviceSpan(),
        d_model.categories_node_segments.ConstDeviceSpan(),
        d_model.categories.ConstDeviceSpan(), tree_begin, tree_end, m->NumColumns(),
-        info.num_row_, entry_start, use_shared, output_groups);
+        m->NumRows(), entry_start, use_shared, output_groups);
  }

-  void InplacePredict(dmlc::any const &x, const gbm::GBTreeModel &model,
-                      float missing, PredictionCacheEntry *out_preds,
-                      uint32_t tree_begin, unsigned tree_end) const override {
+  bool InplacePredict(dmlc::any const &x, std::shared_ptr<DMatrix> p_m,
+                      const gbm::GBTreeModel &model, float missing,
+                      PredictionCacheEntry *out_preds, uint32_t tree_begin,
+                      unsigned tree_end) const override {
    if (x.type() == typeid(std::shared_ptr<data::CupyAdapter>)) {
      this->DispatchedInplacePredict<
          data::CupyAdapter, DeviceAdapterLoader<data::CupyAdapterBatch>>(
-          x, model, missing, out_preds, tree_begin, tree_end);
+          x, p_m, model, missing, out_preds, tree_begin, tree_end);
    } else if (x.type() == typeid(std::shared_ptr<data::CudfAdapter>)) {
      this->DispatchedInplacePredict<
          data::CudfAdapter, DeviceAdapterLoader<data::CudfAdapterBatch>>(
-          x, model, missing, out_preds, tree_begin, tree_end);
+          x, p_m, model, missing, out_preds, tree_begin, tree_end);
    } else {
-      LOG(FATAL) << "Only CuPy and CuDF are supported by GPU Predictor.";
+      return false;
    }
+    return true;
  }

  void PredictContribution(DMatrix* p_fmat,