Use ellpack for prediction only when sparsepage doesn't exist. (#5504)

2020-04-10 12:15:46 +08:00
parent ad826e913f
commit 6671b42dd4
35 changed files with 166 additions and 116 deletions
--- a/tests/cpp/predictor/test_predictor.cc
+++ b/tests/cpp/predictor/test_predictor.cc
@@ -21,7 +21,7 @@ TEST(Predictor, PredictionCache) {
  DMatrix* m;
  // Add a cache that is immediately expired.
  auto add_cache = [&]() {
-    auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatix();
+    auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
    container.Cache(p_dmat, GenericParameter::kCpuId);
    m = p_dmat.get();
  };
@@ -32,17 +32,16 @@ TEST(Predictor, PredictionCache) {
  EXPECT_ANY_THROW(container.Entry(m));
 }

-// Only run this test when CUDA is enabled.
-void TestTrainingPrediction(size_t rows, std::string tree_method) {
+void TestTrainingPrediction(size_t rows, std::string tree_method,
+                            std::shared_ptr<DMatrix> p_full,
+                            std::shared_ptr<DMatrix> p_hist) {
  size_t constexpr kCols = 16;
  size_t constexpr kClasses = 3;
  size_t constexpr kIters = 3;

  std::unique_ptr<Learner> learner;
-  auto train = [&](std::string predictor, HostDeviceVector<float>* out) {
-    auto p_m = RandomDataGenerator(rows, kCols, 0).GenerateDMatix();
-
-    auto &h_label = p_m->Info().labels_.HostVector();
+  auto train = [&](std::string predictor, HostDeviceVector<float> *out) {
+    auto &h_label = p_hist->Info().labels_.HostVector();
    h_label.resize(rows);

    for (size_t i = 0; i < rows; ++i) {
@@ -52,30 +51,31 @@ void TestTrainingPrediction(size_t rows, std::string tree_method) {
    learner.reset(Learner::Create({}));
    learner->SetParam("tree_method", tree_method);
    learner->SetParam("objective", "multi:softprob");
-    learner->SetParam("predictor", predictor);
    learner->SetParam("num_feature", std::to_string(kCols));
    learner->SetParam("num_class", std::to_string(kClasses));
    learner->Configure();

    for (size_t i = 0; i < kIters; ++i) {
-      learner->UpdateOneIter(i, p_m);
+      learner->UpdateOneIter(i, p_hist);
+    }
+
+    HostDeviceVector<float> from_full;
+    learner->Predict(p_full, false, &from_full);
+
+    HostDeviceVector<float> from_hist;
+    learner->Predict(p_hist, false, &from_hist);
+
+    for (size_t i = 0; i < rows; ++i) {
+      EXPECT_NEAR(from_hist.ConstHostVector()[i],
+                  from_full.ConstHostVector()[i], kRtEps);
    }
-    learner->Predict(p_m, false, out);
  };
-  // Alternate the predictor, CPU predictor can not use ellpack while GPU predictor can
-  // not use CPU histogram index.  So it's guaranteed one of the following is not
-  // predicting from histogram index.  Note: As of writing only GPU supports predicting
-  // from gradient index, the test is written for future portability.
+
  HostDeviceVector<float> predictions_0;
  train("cpu_predictor", &predictions_0);

  HostDeviceVector<float> predictions_1;
  train("gpu_predictor", &predictions_1);
-
-  for (size_t i = 0; i < rows; ++i) {
-    EXPECT_NEAR(predictions_1.ConstHostVector()[i],
-                predictions_0.ConstHostVector()[i], kRtEps);
-  }
 }

 void TestInplacePrediction(dmlc::any x, std::string predictor,
@@ -83,7 +83,7 @@ void TestInplacePrediction(dmlc::any x, std::string predictor,
                           int32_t device) {
  size_t constexpr kClasses { 4 };
  auto gen = RandomDataGenerator{rows, cols, 0.5}.Device(device);
-  std::shared_ptr<DMatrix> m = gen.GenerateDMatix(true, false, kClasses);
+  std::shared_ptr<DMatrix> m = gen.GenerateDMatrix(true, false, kClasses);

  std::unique_ptr<Learner> learner {
    Learner::Create({m})