Implement iterative DMatrix. (#5837)

2020-07-03 11:44:52 +08:00
parent 4d277d750d
commit 1a0801238e
15 changed files with 855 additions and 84 deletions
--- a/tests/cpp/data/test_iterative_device_dmatrix.cu
+++ b/tests/cpp/data/test_iterative_device_dmatrix.cu
@@ -0,0 +1,166 @@
+/*!
+ * Copyright 2020 XGBoost contributors
+ */
+#include <gtest/gtest.h>
+
+#include "../helpers.h"
+#include "../../../src/data/iterative_device_dmatrix.h"
+#include "../../../src/data/ellpack_page.cuh"
+#include "../../../src/data/device_adapter.cuh"
+
+namespace xgboost {
+namespace data {
+
+void TestEquivalent(float sparsity) {
+  CudaArrayIterForTest iter{sparsity};
+  IterativeDeviceDMatrix m(
+      &iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(),
+      0, 256);
+  size_t offset = 0;
+  auto first = (*m.GetEllpackBatches({}).begin()).Impl();
+  std::unique_ptr<EllpackPageImpl> page_concatenated {
+    new EllpackPageImpl(0, first->Cuts(), first->is_dense,
+                        first->row_stride, 1000 * 100)};
+  for (auto& batch : m.GetBatches<EllpackPage>()) {
+    auto page = batch.Impl();
+    size_t num_elements = page_concatenated->Copy(0, page, offset);
+    offset += num_elements;
+  }
+  auto from_iter = page_concatenated->GetDeviceAccessor(0);
+  ASSERT_EQ(m.Info().num_col_, CudaArrayIterForTest::kCols);
+  ASSERT_EQ(m.Info().num_row_, CudaArrayIterForTest::kRows);
+
+  std::string interface_str = iter.AsArray();
+  auto adapter = CupyAdapter(interface_str);
+  std::unique_ptr<DMatrix> dm{
+      DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 0)};
+  BatchParam bp {0, 256};
+  for (auto& ellpack : dm->GetBatches<EllpackPage>(bp)) {
+    auto from_data = ellpack.Impl()->GetDeviceAccessor(0);
+
+    std::vector<float> cuts_from_iter(from_iter.gidx_fvalue_map.size());
+    std::vector<float> min_fvalues_iter(from_iter.min_fvalue.size());
+    std::vector<uint32_t> cut_ptrs_iter(from_iter.feature_segments.size());
+    dh::CopyDeviceSpanToVector(&cuts_from_iter, from_iter.gidx_fvalue_map);
+    dh::CopyDeviceSpanToVector(&min_fvalues_iter, from_iter.min_fvalue);
+    dh::CopyDeviceSpanToVector(&cut_ptrs_iter, from_iter.feature_segments);
+
+    std::vector<float> cuts_from_data(from_data.gidx_fvalue_map.size());
+    std::vector<float> min_fvalues_data(from_data.min_fvalue.size());
+    std::vector<uint32_t> cut_ptrs_data(from_data.feature_segments.size());
+    dh::CopyDeviceSpanToVector(&cuts_from_data, from_data.gidx_fvalue_map);
+    dh::CopyDeviceSpanToVector(&min_fvalues_data, from_data.min_fvalue);
+    dh::CopyDeviceSpanToVector(&cut_ptrs_data, from_data.feature_segments);
+
+    ASSERT_EQ(cuts_from_iter.size(), cuts_from_data.size());
+    for (size_t i = 0; i < cuts_from_iter.size(); ++i) {
+      EXPECT_NEAR(cuts_from_iter[i], cuts_from_data[i], kRtEps);
+    }
+    ASSERT_EQ(min_fvalues_iter.size(), min_fvalues_data.size());
+    for (size_t i = 0; i < min_fvalues_iter.size(); ++i) {
+      ASSERT_NEAR(min_fvalues_iter[i], min_fvalues_data[i], kRtEps);
+    }
+    ASSERT_EQ(cut_ptrs_iter.size(), cut_ptrs_data.size());
+    for (size_t i = 0; i < cut_ptrs_iter.size(); ++i) {
+      ASSERT_EQ(cut_ptrs_iter[i], cut_ptrs_data[i]);
+    }
+
+    auto const& buffer_from_iter = page_concatenated->gidx_buffer;
+    auto const& buffer_from_data = ellpack.Impl()->gidx_buffer;
+    ASSERT_NE(buffer_from_data.Size(), 0);
+    ASSERT_EQ(buffer_from_data.ConstHostVector(), buffer_from_data.ConstHostVector());
+  }
+}
+
+TEST(IterativeDeviceDMatrix, Basic) {
+  TestEquivalent(0.0);
+  TestEquivalent(0.5);
+}
+
+TEST(IterativeDeviceDMatrix, RowMajor) {
+  CudaArrayIterForTest iter(0.0f);
+  IterativeDeviceDMatrix m(
+      &iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(),
+      0, 256);
+  size_t n_batches = 0;
+  std::string interface_str = iter.AsArray();
+  for (auto& ellpack : m.GetBatches<EllpackPage>()) {
+    n_batches ++;
+    auto impl = ellpack.Impl();
+    common::CompressedIterator<uint32_t> iterator(
+        impl->gidx_buffer.HostVector().data(), impl->NumSymbols());
+    auto cols = CudaArrayIterForTest::kCols;
+    auto rows = CudaArrayIterForTest::kRows;
+
+    auto j_interface =
+        Json::Load({interface_str.c_str(), interface_str.size()});
+    ArrayInterface loaded {get<Object const>(j_interface)};
+    std::vector<float> h_data(cols * rows);
+    common::Span<float> s_data{static_cast<float*>(loaded.data), cols * rows};
+    dh::CopyDeviceSpanToVector(&h_data, s_data);
+
+    for(auto i = 0ull; i < rows * cols; i++) {
+      int column_idx = i % cols;
+      EXPECT_EQ(impl->Cuts().SearchBin(h_data[i], column_idx), iterator[i]);
+    }
+    EXPECT_EQ(m.Info().num_col_, cols);
+    EXPECT_EQ(m.Info().num_row_, rows);
+    EXPECT_EQ(m.Info().num_nonzero_, rows * cols);
+  }
+  // All batches are concatenated.
+  ASSERT_EQ(n_batches, 1);
+}
+
+TEST(IterativeDeviceDMatrix, RowMajorMissing) {
+  const float kMissing = std::numeric_limits<float>::quiet_NaN();
+  size_t rows = 10;
+  size_t cols = 2;
+  CudaArrayIterForTest iter(0.0f, rows, cols, 2);
+  std::string interface_str = iter.AsArray();
+  auto j_interface =
+      Json::Load({interface_str.c_str(), interface_str.size()});
+  ArrayInterface loaded {get<Object const>(j_interface)};
+  std::vector<float> h_data(cols * rows);
+  common::Span<float> s_data{static_cast<float*>(loaded.data), cols * rows};
+  dh::CopyDeviceSpanToVector(&h_data, s_data);
+  h_data[1] = kMissing;
+  h_data[5] = kMissing;
+  h_data[6] = kMissing;
+  auto ptr = thrust::device_ptr<float>(
+      reinterpret_cast<float *>(get<Integer>(j_interface["data"][0])));
+  thrust::copy(h_data.cbegin(), h_data.cend(), ptr);
+
+  IterativeDeviceDMatrix m(
+      &iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(),
+      0, 256);
+  auto &ellpack = *m.GetBatches<EllpackPage>({0, 256, 0}).begin();
+  auto impl = ellpack.Impl();
+  common::CompressedIterator<uint32_t> iterator(
+      impl->gidx_buffer.HostVector().data(), impl->NumSymbols());
+  EXPECT_EQ(iterator[1], impl->GetDeviceAccessor(0).NullValue());
+  EXPECT_EQ(iterator[5], impl->GetDeviceAccessor(0).NullValue());
+  // null values get placed after valid values in a row
+  EXPECT_EQ(iterator[7], impl->GetDeviceAccessor(0).NullValue());
+  EXPECT_EQ(m.Info().num_col_, cols);
+  EXPECT_EQ(m.Info().num_row_, rows);
+  EXPECT_EQ(m.Info().num_nonzero_, rows* cols - 3);
+}
+
+TEST(IterativeDeviceDMatrix, IsDense) {
+  int num_bins = 16;
+  auto test = [num_bins] (float sparsity) {
+    CudaArrayIterForTest iter(sparsity);
+    IterativeDeviceDMatrix m(
+        &iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(),
+        0, 256);
+    if (sparsity == 0.0) {
+      ASSERT_TRUE(m.IsDense());
+    } else {
+      ASSERT_FALSE(m.IsDense());
+    }
+  };
+  test(0.0);
+  test(0.1);
+}
+}  // namespace data
+}  // namespace xgboost
--- a/tests/cpp/helpers.cu
+++ b/tests/cpp/helpers.cu
@@ -1,17 +1,43 @@
+#include <xgboost/c_api.h>
+
 #include "helpers.h"
 #include "../../src/data/device_adapter.cuh"
-#include "../../src/data/device_dmatrix.h"
+#include "../../src/data/iterative_device_dmatrix.h"

 namespace xgboost {
+
+CudaArrayIterForTest::CudaArrayIterForTest(float sparsity, size_t rows,
+                                           size_t cols, size_t batches)
+    : rows_{rows}, cols_{cols}, n_batches_{batches} {
+  XGProxyDMatrixCreate(&proxy_);
+  rng_.reset(new RandomDataGenerator{rows_, cols_, sparsity});
+  rng_->Device(0);
+  std::tie(batches_, interface_) =
+      rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);
+  this->Reset();
+}
+
+CudaArrayIterForTest::~CudaArrayIterForTest() { XGDMatrixFree(proxy_); }
+
+int CudaArrayIterForTest::Next() {
+  if (iter_ == n_batches_) {
+    return 0;
+  }
+  XGDeviceQuantileDMatrixSetDataCudaArrayInterface(proxy_, batches_[iter_].c_str());
+  iter_++;
+  return 1;
+}
+
+size_t constexpr CudaArrayIterForTest::kRows;
+size_t constexpr CudaArrayIterForTest::kCols;
+
 std::shared_ptr<DMatrix> RandomDataGenerator::GenerateDeviceDMatrix(bool with_label,
                                                                    bool float_label,
                                                                    size_t classes) {
-  std::vector<HostDeviceVector<float>> storage(cols_);
-  std::string arr = this->GenerateColumnarArrayInterface(&storage);
-  auto adapter = data::CudfAdapter(arr);
-  std::shared_ptr<DMatrix> m {
-    new data::DeviceDMatrix{&adapter,
-          std::numeric_limits<float>::quiet_NaN(), 1, 256}};
+  CudaArrayIterForTest iter{this->sparsity_, this->rows_, this->cols_, 1};
+  auto m = std::make_shared<data::IterativeDeviceDMatrix>(
+      &iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(),
+      0, bins_);
  return m;
 }
 }  // namespace xgboost
--- a/tests/cpp/helpers.h
+++ b/tests/cpp/helpers.h
@@ -304,5 +304,51 @@ inline HostDeviceVector<GradientPair> GenerateRandomGradients(const size_t n_row
  HostDeviceVector<GradientPair> gpair(h_gpair);
  return gpair;
 }
+
+typedef void *DMatrixHandle;  // NOLINT(*);
+
+class CudaArrayIterForTest {
+  HostDeviceVector<float> data_;
+  size_t iter_ {0};
+  DMatrixHandle proxy_;
+  std::unique_ptr<RandomDataGenerator> rng_;
+
+  std::vector<std::string> batches_;
+  std::string interface_;
+  size_t rows_;
+  size_t cols_;
+  size_t n_batches_;
+
+ public:
+  size_t static constexpr kRows { 1000 };
+  size_t static constexpr kBatches { 100 };
+  size_t static constexpr kCols { 13 };
+
+  explicit CudaArrayIterForTest(float sparsity, size_t rows = kRows,
+                                size_t cols = kCols, size_t batches = kBatches);
+  ~CudaArrayIterForTest();
+
+  std::string AsArray() const {
+    return interface_;
+  }
+
+  int Next();
+  void Reset() {
+    iter_ = 0;
+  }
+  size_t Iter() const { return iter_; }
+  auto Proxy() -> decltype(proxy_) { return proxy_; }
+};
+
+typedef void *DataIterHandle;  // NOLINT(*)
+
+inline void Reset(DataIterHandle self) {
+  static_cast<CudaArrayIterForTest*>(self)->Reset();
+}
+
+inline int Next(DataIterHandle self) {
+  return static_cast<CudaArrayIterForTest*>(self)->Next();
+}
+
 }  // namespace xgboost
 #endif
--- a/tests/cpp/predictor/test_gpu_predictor.cu
+++ b/tests/cpp/predictor/test_gpu_predictor.cu
@@ -76,15 +76,15 @@ TEST(GPUPredictor, EllpackTraining) {
       .Bins(kBins)
       .Device(0)
       .GenerateDeviceDMatrix(true);
-  std::vector<HostDeviceVector<float>> storage(kCols);
+  HostDeviceVector<float> storage(kRows * kCols);
  auto columnar = RandomDataGenerator{kRows, kCols, 0.0}
       .Device(0)
-       .GenerateColumnarArrayInterface(&storage);
-  auto adapter = data::CudfAdapter(columnar);
+       .GenerateArrayInterface(&storage);
+  auto adapter = data::CupyAdapter(columnar);
  std::shared_ptr<DMatrix> p_full {
    DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)
  };
-  TestTrainingPrediction(kRows, "gpu_hist", p_full, p_ellpack);
+  TestTrainingPrediction(kRows, kBins, "gpu_hist", p_full, p_ellpack);
 }

 TEST(GPUPredictor, ExternalMemoryTest) {
--- a/tests/cpp/predictor/test_predictor.cc
+++ b/tests/cpp/predictor/test_predictor.cc
@@ -32,7 +32,8 @@ TEST(Predictor, PredictionCache) {
  EXPECT_ANY_THROW(container.Entry(m));
 }

-void TestTrainingPrediction(size_t rows, std::string tree_method,
+void TestTrainingPrediction(size_t rows, size_t bins,
+                            std::string tree_method,
                            std::shared_ptr<DMatrix> p_full,
                            std::shared_ptr<DMatrix> p_hist) {
  size_t constexpr kCols = 16;
--- a/tests/cpp/predictor/test_predictor.h
+++ b/tests/cpp/predictor/test_predictor.h
@@ -52,7 +52,7 @@ void TestPredictionFromGradientIndex(std::string name, size_t rows, size_t cols,
 }

 // p_full and p_hist should come from the same data set.
-void TestTrainingPrediction(size_t rows, std::string tree_method,
+void TestTrainingPrediction(size_t rows, size_t bins, std::string tree_method,
                            std::shared_ptr<DMatrix> p_full,
                            std::shared_ptr<DMatrix> p_hist);