Initial support for external memory in gpu_predictor (#4284)

2019-05-02 18:01:27 -07:00
parent 54980b8959
commit feb6ae3e18
11 changed files with 73 additions and 15 deletions
--- a/tests/cpp/data/test_sparse_page_dmatrix.cc
+++ b/tests/cpp/data/test_sparse_page_dmatrix.cc
@@ -26,7 +26,7 @@ TEST(SparsePageDMatrix, MetaInfo) {
 }

 TEST(SparsePageDMatrix, RowAccess) {
-  std::unique_ptr<xgboost::DMatrix> dmat = xgboost::CreateSparsePageDMatrix();
+  std::unique_ptr<xgboost::DMatrix> dmat = xgboost::CreateSparsePageDMatrix(12, 64);

  // Test the data read into the first row
  auto &batch = *dmat->GetRowBatches().begin();
--- a/tests/cpp/helpers.cc
+++ b/tests/cpp/helpers.cc
@@ -143,13 +143,13 @@ std::shared_ptr<xgboost::DMatrix>* CreateDMatrix(int rows, int columns,
  return static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
 }

-std::unique_ptr<DMatrix> CreateSparsePageDMatrix() {
+std::unique_ptr<DMatrix> CreateSparsePageDMatrix(size_t n_entries, size_t page_size) {
  // Create sufficiently large data to make two row pages
  dmlc::TemporaryDirectory tempdir;
  const std::string tmp_file = tempdir.path + "/big.libsvm";
-  CreateBigTestData(tmp_file, 12);
+  CreateBigTestData(tmp_file, n_entries);
  std::unique_ptr<DMatrix> dmat = std::unique_ptr<DMatrix>(DMatrix::Load(
-      tmp_file + "#" + tmp_file + ".cache", true, false, "auto", 64UL));
+      tmp_file + "#" + tmp_file + ".cache", true, false, "auto", page_size));
  EXPECT_TRUE(FileExists(tmp_file + ".cache.row.page"));

  // Loop over the batches and count the records
@@ -159,7 +159,7 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrix() {
    batch_count++;
    row_count += batch.Size();
  }
-  EXPECT_EQ(batch_count, 2);
+  EXPECT_GE(batch_count, 2);
  EXPECT_EQ(row_count, dmat->Info().num_row_);

  return dmat;
--- a/tests/cpp/helpers.h
+++ b/tests/cpp/helpers.h
@@ -154,7 +154,7 @@ class SimpleRealUniformDistribution {
 std::shared_ptr<xgboost::DMatrix> *CreateDMatrix(int rows, int columns,
                                                 float sparsity, int seed = 0);

-std::unique_ptr<DMatrix> CreateSparsePageDMatrix();
+std::unique_ptr<DMatrix> CreateSparsePageDMatrix(size_t n_entries, size_t page_size);

 gbm::GBTreeModel CreateTestModel();

--- a/tests/cpp/predictor/test_cpu_predictor.cc
+++ b/tests/cpp/predictor/test_cpu_predictor.cc
@@ -55,7 +55,7 @@ TEST(cpu_predictor, Test) {
 }

 TEST(cpu_predictor, ExternalMemoryTest) {
-  std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix();
+  std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(12, 64);

  std::unique_ptr<Predictor> cpu_predictor =
      std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor"));
--- a/tests/cpp/predictor/test_gpu_predictor.cu
+++ b/tests/cpp/predictor/test_gpu_predictor.cu
@@ -84,6 +84,46 @@ TEST(gpu_predictor, Test) {
  delete dmat;
 }

+TEST(gpu_predictor, ExternalMemoryTest) {
+  std::unique_ptr<Predictor> gpu_predictor =
+      std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor"));
+  gpu_predictor->Init({}, {});
+  gbm::GBTreeModel model = CreateTestModel();
+  std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(32, 64);
+
+  // Test predict batch
+  HostDeviceVector<float> out_predictions;
+  gpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
+  EXPECT_EQ(out_predictions.Size(), dmat->Info().num_row_);
+  for (const auto& v : out_predictions.HostVector()) {
+    ASSERT_EQ(v, 1.5);
+  }
+
+  // Test predict leaf
+  std::vector<float> leaf_out_predictions;
+  gpu_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
+  EXPECT_EQ(leaf_out_predictions.size(), dmat->Info().num_row_);
+  for (const auto& v : leaf_out_predictions) {
+    ASSERT_EQ(v, 0);
+  }
+
+  // Test predict contribution
+  std::vector<float> out_contribution;
+  gpu_predictor->PredictContribution(dmat.get(), &out_contribution, model);
+  EXPECT_EQ(out_contribution.size(), dmat->Info().num_row_);
+  for (const auto& v : out_contribution) {
+    ASSERT_EQ(v, 1.5);
+  }
+
+  // Test predict contribution (approximate method)
+  std::vector<float> out_contribution_approximate;
+  gpu_predictor->PredictContribution(dmat.get(), &out_contribution_approximate, model, true);
+  EXPECT_EQ(out_contribution_approximate.size(), dmat->Info().num_row_);
+  for (const auto& v : out_contribution_approximate) {
+    ASSERT_EQ(v, 1.5);
+  }
+}
+
 #if defined(XGBOOST_USE_NCCL)
 // Test whether pickling preserves predictor parameters
 TEST(gpu_predictor, MGPU_PicklingTest) {
@@ -195,4 +235,4 @@ TEST(gpu_predictor, MGPU_Test) {
 }
 #endif  // defined(XGBOOST_USE_NCCL)
 }  // namespace predictor
-}  // namespace xgboost
+}  // namespace xgboost