diff --git a/src/predictor/gpu_predictor.cu b/src/predictor/gpu_predictor.cu
index d830d1f7b..a817fdccb 100644
--- a/src/predictor/gpu_predictor.cu
+++ b/src/predictor/gpu_predictor.cu
@@ -278,7 +278,7 @@ class GPUPredictor : public xgboost::Predictor {
     }
 
     void PredictInternal
-    (const SparsePage& batch, const MetaInfo& info,
+    (const SparsePage& batch, size_t num_features,
      HostDeviceVector<bst_float>* predictions) {
       if (predictions->DeviceSize(device_) == 0) { return; }
       dh::safe_cuda(cudaSetDevice(device_));
@@ -287,7 +287,7 @@ class GPUPredictor : public xgboost::Predictor {
       const int GRID_SIZE = static_cast<int>(dh::DivRoundUp(num_rows, BLOCK_THREADS));
 
       int shared_memory_bytes = static_cast<int>
-        (sizeof(float) * info.num_col_ * BLOCK_THREADS);
+        (sizeof(float) * num_features * BLOCK_THREADS);
       bool use_shared = true;
       if (shared_memory_bytes > max_shared_memory_bytes_) {
         shared_memory_bytes = 0;
@@ -300,7 +300,7 @@ class GPUPredictor : public xgboost::Predictor {
       PredictKernel<BLOCK_THREADS><<<GRID_SIZE, BLOCK_THREADS, shared_memory_bytes>>>
         (dh::ToSpan(nodes_), predictions->DeviceSpan(device_), dh::ToSpan(tree_segments_),
          dh::ToSpan(tree_group_), batch.offset.DeviceSpan(device_),
-         batch.data.DeviceSpan(device_), this->tree_begin_, this->tree_end_, info.num_col_,
+         batch.data.DeviceSpan(device_), this->tree_begin_, this->tree_end_, num_features,
          num_rows, entry_start, use_shared, this->num_group_);
     }
 
@@ -363,7 +363,7 @@ class GPUPredictor : public xgboost::Predictor {
       batch.data.Reshard(GPUDistribution::Explicit(devices_, device_offsets));
 
       dh::ExecuteIndexShards(&shards_, [&](int idx, DeviceShard& shard) {
-        shard.PredictInternal(batch, dmat->Info(), out_preds);
+        shard.PredictInternal(batch, model.param.num_feature, out_preds);
       });
       batch_offset += batch.Size() * model.param.num_output_group;
     }
diff --git a/tests/cpp/predictor/test_gpu_predictor.cu b/tests/cpp/predictor/test_gpu_predictor.cu
index 24b450737..2dffab8b6 100644
--- a/tests/cpp/predictor/test_gpu_predictor.cu
+++ b/tests/cpp/predictor/test_gpu_predictor.cu
@@ -38,11 +38,11 @@ TEST(gpu_predictor, Test) {
   gpu_predictor->Init({}, {});
   cpu_predictor->Init({}, {});
 
-  gbm::GBTreeModel model = CreateTestModel();
-
   int n_row = 5;
   int n_col = 5;
 
+  gbm::GBTreeModel model = CreateTestModel();
+  model.param.num_feature = n_col;
   auto dmat = CreateDMatrix(n_row, n_col, 0);
 
   // Test predict batch
@@ -95,6 +95,8 @@ TEST(gpu_predictor, ExternalMemoryTest) {
       std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam));
   gpu_predictor->Init({}, {});
   gbm::GBTreeModel model = CreateTestModel();
+  int n_col = 3;
+  model.param.num_feature = n_col;
   std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(32, 64);
 
   // Test predict batch
@@ -116,17 +118,25 @@ TEST(gpu_predictor, ExternalMemoryTest) {
   // Test predict contribution
   std::vector<float> out_contribution;
   gpu_predictor->PredictContribution(dmat.get(), &out_contribution, model);
-  EXPECT_EQ(out_contribution.size(), dmat->Info().num_row_);
-  for (const auto& v : out_contribution) {
-    ASSERT_EQ(v, 1.5);
+  EXPECT_EQ(out_contribution.size(), dmat->Info().num_row_ * (n_col + 1));
+  for (int i = 0; i < out_contribution.size(); i++) {
+    if (i % (n_col + 1) == n_col) {
+      ASSERT_EQ(out_contribution[i], 1.5);
+    } else {
+      ASSERT_EQ(out_contribution[i], 0);
+    }
   }
 
   // Test predict contribution (approximate method)
   std::vector<float> out_contribution_approximate;
   gpu_predictor->PredictContribution(dmat.get(), &out_contribution_approximate, model, true);
-  EXPECT_EQ(out_contribution_approximate.size(), dmat->Info().num_row_);
-  for (const auto& v : out_contribution_approximate) {
-    ASSERT_EQ(v, 1.5);
+  EXPECT_EQ(out_contribution.size(), dmat->Info().num_row_ * (n_col + 1));
+  for (int i = 0; i < out_contribution.size(); i++) {
+    if (i % (n_col + 1) == n_col) {
+      ASSERT_EQ(out_contribution[i], 1.5);
+    } else {
+      ASSERT_EQ(out_contribution[i], 0);
+    }
   }
 }
 
@@ -226,6 +236,7 @@ TEST(gpu_predictor, MGPU_Test) {
     auto dmat = CreateDMatrix(n_row, n_col, 0);
 
     gbm::GBTreeModel model = CreateTestModel();
+    model.param.num_feature = n_col;
 
     // Test predict batch
     HostDeviceVector<float> gpu_out_predictions;
@@ -253,6 +264,7 @@ TEST(gpu_predictor, MGPU_ExternalMemoryTest) {
   gpu_predictor->Init({}, {});
 
   gbm::GBTreeModel model = CreateTestModel();
+  model.param.num_feature = 3;
   const int n_classes = 3;
   model.param.num_output_group = n_classes;
   std::vector<std::unique_ptr<DMatrix>> dmats;