Refactor DMatrix to return batches of different page types (#4686)

* Use explicit template parameter for specifying page type.
2019-08-03 12:10:34 -07:00
parent e930a8e54f
commit 6edddd7966
41 changed files with 477 additions and 470 deletions
--- a/src/linear/coordinate_common.h
+++ b/src/linear/coordinate_common.h
@@ -80,7 +80,7 @@ inline std::pair<double, double> GetGradient(int group_idx, int num_group, int f
                                             const std::vector<GradientPair> &gpair,
                                             DMatrix *p_fmat) {
  double sum_grad = 0.0, sum_hess = 0.0;
-  for (const auto &batch : p_fmat->GetColumnBatches()) {
+  for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
    auto col = batch[fidx];
    const auto ndata = static_cast<bst_omp_uint>(col.size());
    for (bst_omp_uint j = 0; j < ndata; ++j) {
@@ -109,7 +109,7 @@ inline std::pair<double, double> GetGradientParallel(int group_idx, int num_grou
                                                     const std::vector<GradientPair> &gpair,
                                                     DMatrix *p_fmat) {
  double sum_grad = 0.0, sum_hess = 0.0;
-  for (const auto &batch : p_fmat->GetColumnBatches()) {
+  for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
    auto col = batch[fidx];
    const auto ndata = static_cast<bst_omp_uint>(col.size());
 #pragma omp parallel for schedule(static) reduction(+ : sum_grad, sum_hess)
@@ -164,7 +164,7 @@ inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
                                   float dw, std::vector<GradientPair> *in_gpair,
                                   DMatrix *p_fmat) {
  if (dw == 0.0f) return;
-  for (const auto &batch : p_fmat->GetColumnBatches()) {
+  for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
    auto col = batch[fidx];
    // update grad value
    const auto num_row = static_cast<bst_omp_uint>(col.size());
@@ -332,7 +332,7 @@ class GreedyFeatureSelector : public FeatureSelector {
    const bst_omp_uint nfeat = model.param.num_feature;
    // Calculate univariate gradient sums
    std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
-  for (const auto &batch : p_fmat->GetColumnBatches()) {
+  for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
      #pragma omp parallel for schedule(static)
      for (bst_omp_uint i = 0; i < nfeat; ++i) {
        const auto col = batch[i];
@@ -397,7 +397,7 @@ class ThriftyFeatureSelector : public FeatureSelector {
    }
    // Calculate univariate gradient sums
    std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
-    for (const auto &batch : p_fmat->GetColumnBatches()) {
+    for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
 // column-parallel is usually faster than row-parallel
 #pragma omp parallel for schedule(static)
      for (bst_omp_uint i = 0; i < nfeat; ++i) {
--- a/src/linear/updater_gpu_coordinate.cu
+++ b/src/linear/updater_gpu_coordinate.cu
@@ -186,7 +186,7 @@ class GPUCoordinateUpdater : public LinearUpdater {
    }

    CHECK(p_fmat->SingleColBlock());
-    SparsePage const& batch = *(p_fmat->GetColumnBatches().begin());
+    SparsePage const& batch = *(p_fmat->GetBatches<CSCPage>().begin());

    shards_.resize(n_devices);
    // Create device shards
--- a/src/linear/updater_shotgun.cc
+++ b/src/linear/updater_shotgun.cc
@@ -42,7 +42,7 @@ class ShotgunUpdater : public LinearUpdater {
    // lock-free parallel updates of weights
    selector_->Setup(*model, in_gpair->ConstHostVector(), p_fmat,
                     param_.reg_alpha_denorm, param_.reg_lambda_denorm, 0);
-    for (const auto &batch : p_fmat->GetColumnBatches()) {
+    for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
      const auto nfeat = static_cast<bst_omp_uint>(batch.Size());
 #pragma omp parallel for schedule(static)
      for (bst_omp_uint i = 0; i < nfeat; ++i) {