fix gpu predictor when dmatrix is mismatched with model (#4613)

2019-06-27 16:03:02 -07:00 · 2019-06-27 16:03:02 -07:00 · 63ec95623d
commit 63ec95623d
parent 4d6590be3c
2 changed files with 24 additions and 12 deletions
--- a/src/predictor/gpu_predictor.cu
+++ b/src/predictor/gpu_predictor.cu
@ -278,7 +278,7 @@ class GPUPredictor : public xgboost::Predictor {
    }

    void PredictInternal
-    (const SparsePage& batch, const MetaInfo& info,
+    (const SparsePage& batch, size_t num_features,
     HostDeviceVector<bst_float>* predictions) {
      if (predictions->DeviceSize(device_) == 0) { return; }
      dh::safe_cuda(cudaSetDevice(device_));
@ -287,7 +287,7 @@ class GPUPredictor : public xgboost::Predictor {
      const int GRID_SIZE = static_cast<int>(dh::DivRoundUp(num_rows, BLOCK_THREADS));

      int shared_memory_bytes = static_cast<int>
-        (sizeof(float) * info.num_col_ * BLOCK_THREADS);
+        (sizeof(float) * num_features * BLOCK_THREADS);
      bool use_shared = true;
      if (shared_memory_bytes > max_shared_memory_bytes_) {
        shared_memory_bytes = 0;
@ -300,7 +300,7 @@ class GPUPredictor : public xgboost::Predictor {
      PredictKernel<BLOCK_THREADS><<<GRID_SIZE, BLOCK_THREADS, shared_memory_bytes>>>
        (dh::ToSpan(nodes_), predictions->DeviceSpan(device_), dh::ToSpan(tree_segments_),
         dh::ToSpan(tree_group_), batch.offset.DeviceSpan(device_),
-         batch.data.DeviceSpan(device_), this->tree_begin_, this->tree_end_, info.num_col_,
+         batch.data.DeviceSpan(device_), this->tree_begin_, this->tree_end_, num_features,
         num_rows, entry_start, use_shared, this->num_group_);
    }

@ -363,7 +363,7 @@ class GPUPredictor : public xgboost::Predictor {
      batch.data.Reshard(GPUDistribution::Explicit(devices_, device_offsets));

      dh::ExecuteIndexShards(&shards_, [&](int idx, DeviceShard& shard) {
-        shard.PredictInternal(batch, dmat->Info(), out_preds);
+        shard.PredictInternal(batch, model.param.num_feature, out_preds);
      });
      batch_offset += batch.Size() * model.param.num_output_group;
    }
--- a/tests/cpp/predictor/test_gpu_predictor.cu
+++ b/tests/cpp/predictor/test_gpu_predictor.cu
@ -38,11 +38,11 @@ TEST(gpu_predictor, Test) {
  gpu_predictor->Init({}, {});
  cpu_predictor->Init({}, {});

-  gbm::GBTreeModel model = CreateTestModel();
-
  int n_row = 5;
  int n_col = 5;

+  gbm::GBTreeModel model = CreateTestModel();
+  model.param.num_feature = n_col;
  auto dmat = CreateDMatrix(n_row, n_col, 0);

  // Test predict batch
@ -95,6 +95,8 @@ TEST(gpu_predictor, ExternalMemoryTest) {
      std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam));
  gpu_predictor->Init({}, {});
  gbm::GBTreeModel model = CreateTestModel();
+  int n_col = 3;
+  model.param.num_feature = n_col;
  std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(32, 64);

  // Test predict batch
@ -116,17 +118,25 @@ TEST(gpu_predictor, ExternalMemoryTest) {
  // Test predict contribution
  std::vector<float> out_contribution;
  gpu_predictor->PredictContribution(dmat.get(), &out_contribution, model);
-  EXPECT_EQ(out_contribution.size(), dmat->Info().num_row_);
-  for (const auto& v : out_contribution) {
-    ASSERT_EQ(v, 1.5);
+  EXPECT_EQ(out_contribution.size(), dmat->Info().num_row_ * (n_col + 1));
+  for (int i = 0; i < out_contribution.size(); i++) {
+    if (i % (n_col + 1) == n_col) {
+      ASSERT_EQ(out_contribution[i], 1.5);
+    } else {
+      ASSERT_EQ(out_contribution[i], 0);
+    }
  }

  // Test predict contribution (approximate method)
  std::vector<float> out_contribution_approximate;
  gpu_predictor->PredictContribution(dmat.get(), &out_contribution_approximate, model, true);
-  EXPECT_EQ(out_contribution_approximate.size(), dmat->Info().num_row_);
-  for (const auto& v : out_contribution_approximate) {
-    ASSERT_EQ(v, 1.5);
+  EXPECT_EQ(out_contribution.size(), dmat->Info().num_row_ * (n_col + 1));
+  for (int i = 0; i < out_contribution.size(); i++) {
+    if (i % (n_col + 1) == n_col) {
+      ASSERT_EQ(out_contribution[i], 1.5);
+    } else {
+      ASSERT_EQ(out_contribution[i], 0);
+    }
  }
 }

@ -226,6 +236,7 @@ TEST(gpu_predictor, MGPU_Test) {
    auto dmat = CreateDMatrix(n_row, n_col, 0);

    gbm::GBTreeModel model = CreateTestModel();
+    model.param.num_feature = n_col;

    // Test predict batch
    HostDeviceVector<float> gpu_out_predictions;
@ -253,6 +264,7 @@ TEST(gpu_predictor, MGPU_ExternalMemoryTest) {
  gpu_predictor->Init({}, {});

  gbm::GBTreeModel model = CreateTestModel();
+  model.param.num_feature = 3;
  const int n_classes = 3;
  model.param.num_output_group = n_classes;
  std::vector<std::unique_ptr<DMatrix>> dmats;