[GPU-Plugin] Major refactor 2 (#2664)

* Change cmake option * Move source files * Move google tests * Move python tests * Move benchmarks * Move documentation * Remove makefile support * Fix test run * Move GPU tests
2017-09-08 09:57:16 +12:00
parent 8244f6f120
commit 15267eedf2
21 changed files with 76 additions and 249 deletions
--- a/tests/cpp/common/test_device_helpers.cu
+++ b/tests/cpp/common/test_device_helpers.cu
@@ -0,0 +1,78 @@
+
+/*!
+ * Copyright 2017 XGBoost contributors
+ */
+#include <thrust/device_vector.h>
+#include <xgboost/base.h>
+#include "../../../src/common/device_helpers.cuh"
+#include "gtest/gtest.h"
+
+void CreateTestData(xgboost::bst_uint num_rows, int max_row_size,
+                    thrust::host_vector<int> *row_ptr,
+                    thrust::host_vector<xgboost::bst_uint> *rows) {
+  row_ptr->resize(num_rows + 1);
+  int sum = 0;
+  for (int i = 0; i <= num_rows; i++) {
+    (*row_ptr)[i] = sum;
+    sum += rand() % max_row_size;  // NOLINT
+
+    if (i < num_rows) {
+      for (int j = (*row_ptr)[i]; j < sum; j++) {
+        (*rows).push_back(i);
+      }
+    }
+  }
+}
+
+void SpeedTest() {
+  int num_rows = 1000000;
+  int max_row_size = 100;
+  dh::CubMemory temp_memory;
+  thrust::host_vector<int> h_row_ptr;
+  thrust::host_vector<xgboost::bst_uint> h_rows;
+  CreateTestData(num_rows, max_row_size, &h_row_ptr, &h_rows);
+  thrust::device_vector<int> row_ptr = h_row_ptr;
+  thrust::device_vector<int> output_row(h_rows.size());
+  auto d_output_row = output_row.data();
+
+  dh::Timer t;
+  dh::TransformLbs(
+      0, &temp_memory, h_rows.size(), dh::raw(row_ptr), row_ptr.size() - 1, false,
+      [=] __device__(size_t idx, size_t ridx) { d_output_row[idx] = ridx; });
+
+  dh::safe_cuda(cudaDeviceSynchronize());
+  double time = t.elapsedSeconds();
+  const int mb_size = 1048576;
+  size_t size = (sizeof(int) * h_rows.size()) / mb_size;
+  printf("size: %llumb, time: %fs, bandwidth: %fmb/s\n", size, time,
+         size / time);
+}
+
+void TestLbs() {
+  srand(17);
+  dh::CubMemory temp_memory;
+
+  std::vector<int> test_rows = {4, 100, 1000};
+  std::vector<int> test_max_row_sizes = {4, 100, 1300};
+
+  for (auto num_rows : test_rows) {
+    for (auto max_row_size : test_max_row_sizes) {
+      thrust::host_vector<int> h_row_ptr;
+      thrust::host_vector<xgboost::bst_uint> h_rows;
+      CreateTestData(num_rows, max_row_size, &h_row_ptr, &h_rows);
+      thrust::device_vector<size_t> row_ptr = h_row_ptr;
+      thrust::device_vector<int> output_row(h_rows.size());
+      auto d_output_row = output_row.data();
+
+      dh::TransformLbs(0, &temp_memory, h_rows.size(), dh::raw(row_ptr),
+                       row_ptr.size() - 1, false,
+                       [=] __device__(size_t idx, size_t ridx) {
+                         d_output_row[idx] = ridx;
+                       });
+
+      dh::safe_cuda(cudaDeviceSynchronize());
+      ASSERT_TRUE(h_rows == output_row);
+    }
+  }
+}
+TEST(cub_lbs, Test) { TestLbs(); }
--- a/tests/cpp/predictor/test_gpu_predictor.cu
+++ b/tests/cpp/predictor/test_gpu_predictor.cu
@@ -0,0 +1,73 @@
+
+/*!
+ * Copyright 2017 XGBoost contributors
+ */
+#include <xgboost/c_api.h>
+#include <xgboost/predictor.h>
+#include "gtest/gtest.h"
+#include "../helpers.h"
+
+namespace xgboost {
+namespace predictor {
+TEST(gpu_predictor, Test) {
+  std::unique_ptr<Predictor> gpu_predictor =
+      std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor"));
+  std::unique_ptr<Predictor> cpu_predictor =
+      std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor"));
+
+  std::vector<std::unique_ptr<RegTree>> trees;
+  trees.push_back(std::make_unique<RegTree>());
+  trees.back()->InitModel();
+  (*trees.back())[0].set_leaf(1.5f);
+  gbm::GBTreeModel model(0.5);
+  model.CommitModel(std::move(trees), 0);
+  model.param.num_output_group = 1;
+
+  int n_row = 5;
+  int n_col = 5;
+
+  auto dmat = CreateDMatrix(n_row, n_col, 0);
+
+  // Test predict batch
+  std::vector<float> gpu_out_predictions;
+  std::vector<float> cpu_out_predictions;
+  gpu_predictor->PredictBatch(dmat.get(), &gpu_out_predictions, model, 0);
+  cpu_predictor->PredictBatch(dmat.get(), &cpu_out_predictions, model, 0);
+  float abs_tolerance = 0.001;
+  for (int i = 0; i < gpu_out_predictions.size(); i++) {
+    ASSERT_LT(std::abs(gpu_out_predictions[i] - cpu_out_predictions[i]),
+              abs_tolerance);
+  }
+
+  // Test predict instance
+  auto batch = dmat->RowIterator()->Value();
+  for (int i = 0; i < batch.size; i++) {
+    std::vector<float> gpu_instance_out_predictions;
+    std::vector<float> cpu_instance_out_predictions;
+    cpu_predictor->PredictInstance(batch[i], &cpu_instance_out_predictions,
+                                   model);
+    gpu_predictor->PredictInstance(batch[i], &gpu_instance_out_predictions,
+                                   model);
+    ASSERT_EQ(gpu_instance_out_predictions[0], cpu_instance_out_predictions[0]);
+  }
+
+  // Test predict leaf
+  std::vector<float> gpu_leaf_out_predictions;
+  std::vector<float> cpu_leaf_out_predictions;
+  cpu_predictor->PredictLeaf(dmat.get(), &cpu_leaf_out_predictions, model);
+  gpu_predictor->PredictLeaf(dmat.get(), &gpu_leaf_out_predictions, model);
+  for (int i = 0; i < gpu_leaf_out_predictions.size(); i++) {
+    ASSERT_EQ(gpu_leaf_out_predictions[i], cpu_leaf_out_predictions[i]);
+  }
+
+  // Test predict contribution
+  std::vector<float> gpu_out_contribution;
+  std::vector<float> cpu_out_contribution;
+  cpu_predictor->PredictContribution(dmat.get(), &cpu_out_contribution, model);
+  gpu_predictor->PredictContribution(dmat.get(), &gpu_out_contribution, model);
+  for (int i = 0; i < gpu_out_contribution.size(); i++) {
+    ASSERT_EQ(gpu_out_contribution[i], cpu_out_contribution[i]);
+  }
+}
+}  // namespace predictor
+}  // namespace xgboost
--- a/tests/cpp/xgboost_test.mk
+++ b/tests/cpp/xgboost_test.mk
@@ -5,17 +5,6 @@ UNITTEST=$(UTEST_ROOT)/xgboost_test
 UNITTEST_SRC=$(wildcard $(UTEST_ROOT)/*.cc $(UTEST_ROOT)/*/*.cc)
 UNITTEST_OBJ=$(patsubst $(UTEST_ROOT)%.cc, $(UTEST_OBJ_ROOT)%.o, $(UNITTEST_SRC))

-# for if and when we add cuda source files into xgboost core
-UNITTEST_CU_SRC=$(wildcard $(UTEST_ROOT)/*.cu $(UTEST_ROOT)/*/*.cu)
-UNITTEST_OBJ += $(patsubst $(UTEST_ROOT)%.cu, $(UTEST_OBJ_ROOT)%.o, $(UNITTEST_CU_SRC))
-
-# tests from grow_gpu plugin (only if CUDA path is enabled!)
-ifeq ($(PLUGIN_UPDATER_GPU),ON)
-  GPU_PLUGIN_FOLDER = plugin/updater_gpu
-  UNITTEST_CU_PLUGIN_SRC = $(wildcard $(GPU_PLUGIN_FOLDER)/test/cpp/*.cu)
-  UNITTEST_OBJ += $(patsubst %.cu, $(UTEST_OBJ_ROOT)/%.o, $(UNITTEST_CU_PLUGIN_SRC))
-endif
-
 GTEST_LIB=$(GTEST_PATH)/lib/
 GTEST_INC=$(GTEST_PATH)/include/

@@ -26,14 +15,6 @@ UNITTEST_DEPS=lib/libxgboost.a $(DMLC_CORE)/libdmlc.a $(RABIT)/lib/$(LIB_RABIT)
 COVER_OBJ=$(patsubst %.o, %.gcda, $(ALL_OBJ)) $(patsubst %.o, %.gcda, $(UNITTEST_OBJ))

 # the order of the below targets matter!
-$(UTEST_OBJ_ROOT)/$(GPU_PLUGIN_FOLDER)/test/cpp/%.o: $(GPU_PLUGIN_FOLDER)/test/cpp/%.cu
-	@mkdir -p $(@D)
-	$(NVCC) $(NVCC_FLAGS) -I$(GTEST_INC) -o $@ -c $<
-
-$(UTEST_OBJ_ROOT)/%.o: $(UTEST_ROOT)/%.cu
-	@mkdir -p $(@D)
-	$(NVCC) $(NVCC_FLAGS) -I$(GTEST_INC) -o $@ -c $<
-
 $(UTEST_OBJ_ROOT)/$(GTEST_PATH)/%.o: $(GTEST_PATH)/%.cc
 	@mkdir -p $(@D)
 	$(CXX) $(UNITTEST_CFLAGS) -I$(GTEST_INC) -I$(GTEST_PATH) -o $@ -c $<