Dmatrix refactor stage 1 (#3301)

* Use sparse page as singular CSR matrix representation * Simplify dmatrix methods * Reduce statefullness of batch iterators * BREAKING CHANGE: Remove prob_buffer_row parameter. Users are instead recommended to sample their dataset as a preprocessing step before using XGBoost.
2018-06-07 10:25:58 +12:00
parent 286dccb8e8
commit a96039141a
47 changed files with 650 additions and 1036 deletions
--- a/tests/cpp/tree/test_gpu_hist.cu
+++ b/tests/cpp/tree/test_gpu_hist.cu
@@ -26,10 +26,10 @@ TEST(gpu_hist_experimental, TestSparseShard) {
  TrainParam p;
  p.max_depth = 6;

-  dmlc::DataIter<RowBatch>* iter = dmat->RowIterator();
+  dmlc::DataIter<SparsePage>* iter = dmat->RowIterator();
  iter->BeforeFirst();
  CHECK(iter->Next());
-  const RowBatch& batch = iter->Value();
+  const SparsePage& batch = iter->Value();
  DeviceShard shard(0, 0, 0, rows, hmat.row_ptr.back(), p);
  shard.Init(hmat, batch);
  CHECK(!iter->Next());
@@ -67,10 +67,10 @@ TEST(gpu_hist_experimental, TestDenseShard) {
  TrainParam p;
  p.max_depth = 6;

-  dmlc::DataIter<RowBatch>* iter = dmat->RowIterator();
+  dmlc::DataIter<SparsePage>* iter = dmat->RowIterator();
  iter->BeforeFirst();
  CHECK(iter->Next());
-  const RowBatch& batch = iter->Value();
+  const SparsePage& batch = iter->Value();

  DeviceShard shard(0, 0, 0, rows, hmat.row_ptr.back(), p);
  shard.Init(hmat, batch);