diff --git a/src/predictor/gpu_predictor.cu b/src/predictor/gpu_predictor.cu index d830d1f7b..a817fdccb 100644 --- a/src/predictor/gpu_predictor.cu +++ b/src/predictor/gpu_predictor.cu @@ -278,7 +278,7 @@ class GPUPredictor : public xgboost::Predictor { } void PredictInternal - (const SparsePage& batch, const MetaInfo& info, + (const SparsePage& batch, size_t num_features, HostDeviceVector* predictions) { if (predictions->DeviceSize(device_) == 0) { return; } dh::safe_cuda(cudaSetDevice(device_)); @@ -287,7 +287,7 @@ class GPUPredictor : public xgboost::Predictor { const int GRID_SIZE = static_cast(dh::DivRoundUp(num_rows, BLOCK_THREADS)); int shared_memory_bytes = static_cast - (sizeof(float) * info.num_col_ * BLOCK_THREADS); + (sizeof(float) * num_features * BLOCK_THREADS); bool use_shared = true; if (shared_memory_bytes > max_shared_memory_bytes_) { shared_memory_bytes = 0; @@ -300,7 +300,7 @@ class GPUPredictor : public xgboost::Predictor { PredictKernel<<>> (dh::ToSpan(nodes_), predictions->DeviceSpan(device_), dh::ToSpan(tree_segments_), dh::ToSpan(tree_group_), batch.offset.DeviceSpan(device_), - batch.data.DeviceSpan(device_), this->tree_begin_, this->tree_end_, info.num_col_, + batch.data.DeviceSpan(device_), this->tree_begin_, this->tree_end_, num_features, num_rows, entry_start, use_shared, this->num_group_); } @@ -363,7 +363,7 @@ class GPUPredictor : public xgboost::Predictor { batch.data.Reshard(GPUDistribution::Explicit(devices_, device_offsets)); dh::ExecuteIndexShards(&shards_, [&](int idx, DeviceShard& shard) { - shard.PredictInternal(batch, dmat->Info(), out_preds); + shard.PredictInternal(batch, model.param.num_feature, out_preds); }); batch_offset += batch.Size() * model.param.num_output_group; } diff --git a/tests/cpp/predictor/test_gpu_predictor.cu b/tests/cpp/predictor/test_gpu_predictor.cu index 24b450737..2dffab8b6 100644 --- a/tests/cpp/predictor/test_gpu_predictor.cu +++ b/tests/cpp/predictor/test_gpu_predictor.cu @@ -38,11 +38,11 @@ TEST(gpu_predictor, Test) { gpu_predictor->Init({}, {}); cpu_predictor->Init({}, {}); - gbm::GBTreeModel model = CreateTestModel(); - int n_row = 5; int n_col = 5; + gbm::GBTreeModel model = CreateTestModel(); + model.param.num_feature = n_col; auto dmat = CreateDMatrix(n_row, n_col, 0); // Test predict batch @@ -95,6 +95,8 @@ TEST(gpu_predictor, ExternalMemoryTest) { std::unique_ptr(Predictor::Create("gpu_predictor", &lparam)); gpu_predictor->Init({}, {}); gbm::GBTreeModel model = CreateTestModel(); + int n_col = 3; + model.param.num_feature = n_col; std::unique_ptr dmat = CreateSparsePageDMatrix(32, 64); // Test predict batch @@ -116,17 +118,25 @@ TEST(gpu_predictor, ExternalMemoryTest) { // Test predict contribution std::vector out_contribution; gpu_predictor->PredictContribution(dmat.get(), &out_contribution, model); - EXPECT_EQ(out_contribution.size(), dmat->Info().num_row_); - for (const auto& v : out_contribution) { - ASSERT_EQ(v, 1.5); + EXPECT_EQ(out_contribution.size(), dmat->Info().num_row_ * (n_col + 1)); + for (int i = 0; i < out_contribution.size(); i++) { + if (i % (n_col + 1) == n_col) { + ASSERT_EQ(out_contribution[i], 1.5); + } else { + ASSERT_EQ(out_contribution[i], 0); + } } // Test predict contribution (approximate method) std::vector out_contribution_approximate; gpu_predictor->PredictContribution(dmat.get(), &out_contribution_approximate, model, true); - EXPECT_EQ(out_contribution_approximate.size(), dmat->Info().num_row_); - for (const auto& v : out_contribution_approximate) { - ASSERT_EQ(v, 1.5); + EXPECT_EQ(out_contribution.size(), dmat->Info().num_row_ * (n_col + 1)); + for (int i = 0; i < out_contribution.size(); i++) { + if (i % (n_col + 1) == n_col) { + ASSERT_EQ(out_contribution[i], 1.5); + } else { + ASSERT_EQ(out_contribution[i], 0); + } } } @@ -226,6 +236,7 @@ TEST(gpu_predictor, MGPU_Test) { auto dmat = CreateDMatrix(n_row, n_col, 0); gbm::GBTreeModel model = CreateTestModel(); + model.param.num_feature = n_col; // Test predict batch HostDeviceVector gpu_out_predictions; @@ -253,6 +264,7 @@ TEST(gpu_predictor, MGPU_ExternalMemoryTest) { gpu_predictor->Init({}, {}); gbm::GBTreeModel model = CreateTestModel(); + model.param.num_feature = 3; const int n_classes = 3; model.param.num_output_group = n_classes; std::vector> dmats;