Use view for SparsePage exclusively. (#6590)

2021-01-11 18:04:55 +08:00
parent 78f2cd83d7
commit f2f7dd87b8
23 changed files with 151 additions and 113 deletions
--- a/src/linear/coordinate_common.h
+++ b/src/linear/coordinate_common.h
@@ -82,7 +82,8 @@ inline std::pair<double, double> GetGradient(int group_idx, int num_group, int f
                                             DMatrix *p_fmat) {
  double sum_grad = 0.0, sum_hess = 0.0;
  for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
-    auto col = batch[fidx];
+    auto page = batch.GetView();
+    auto col = page[fidx];
    const auto ndata = static_cast<bst_omp_uint>(col.size());
    for (bst_omp_uint j = 0; j < ndata; ++j) {
      const bst_float v = col[j].fvalue;
@@ -111,7 +112,8 @@ inline std::pair<double, double> GetGradientParallel(int group_idx, int num_grou
                                                     DMatrix *p_fmat) {
  double sum_grad = 0.0, sum_hess = 0.0;
  for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
-    auto col = batch[fidx];
+    auto page = batch.GetView();
+    auto col = page[fidx];
    const auto ndata = static_cast<bst_omp_uint>(col.size());
 #pragma omp parallel for schedule(static) reduction(+ : sum_grad, sum_hess)
    for (bst_omp_uint j = 0; j < ndata; ++j) {
@@ -166,7 +168,8 @@ inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
                                   DMatrix *p_fmat) {
  if (dw == 0.0f) return;
  for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
-    auto col = batch[fidx];
+    auto page = batch.GetView();
+    auto col = page[fidx];
    // update grad value
    const auto num_row = static_cast<bst_omp_uint>(col.size());
 #pragma omp parallel for schedule(static)
@@ -334,9 +337,10 @@ class GreedyFeatureSelector : public FeatureSelector {
    // Calculate univariate gradient sums
    std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
  for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
-      #pragma omp parallel for schedule(static)
+    auto page = batch.GetView();
+#pragma omp parallel for schedule(static)
      for (bst_omp_uint i = 0; i < nfeat; ++i) {
-        const auto col = batch[i];
+        const auto col = page[i];
        const bst_uint ndata = col.size();
        auto &sums = gpair_sums_[group_idx * nfeat + i];
        for (bst_uint j = 0u; j < ndata; ++j) {
@@ -399,10 +403,11 @@ class ThriftyFeatureSelector : public FeatureSelector {
    // Calculate univariate gradient sums
    std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
    for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
-// column-parallel is usually faster than row-parallel
+      auto page = batch.GetView();
+      // column-parallel is usually fastaer than row-parallel
 #pragma omp parallel for schedule(static)
      for (bst_omp_uint i = 0; i < nfeat; ++i) {
-        const auto col = batch[i];
+        const auto col = page[i];
        const bst_uint ndata = col.size();
        for (bst_uint gid = 0u; gid < ngroup; ++gid) {
          auto &sums = gpair_sums_[gid * nfeat + i];
--- a/src/linear/updater_gpu_coordinate.cu
+++ b/src/linear/updater_gpu_coordinate.cu
@@ -60,6 +60,7 @@ class GPUCoordinateUpdater : public LinearUpdater {  // NOLINT

    CHECK(p_fmat->SingleColBlock());
    SparsePage const& batch = *(p_fmat->GetBatches<CSCPage>().begin());
+    auto page = batch.GetView();

    if (IsEmpty()) {
      return;
@@ -72,7 +73,7 @@ class GPUCoordinateUpdater : public LinearUpdater {  // NOLINT
    row_ptr_ = {0};
    // iterate through columns
    for (size_t fidx = 0; fidx < batch.Size(); fidx++) {
-      common::Span<Entry const> col = batch[fidx];
+      common::Span<Entry const> col = page[fidx];
      auto cmp = [](Entry e1, Entry e2) {
        return e1.index < e2.index;
      };
@@ -89,7 +90,7 @@ class GPUCoordinateUpdater : public LinearUpdater {  // NOLINT
    data_.resize(row_ptr_.back());
    gpair_.resize(num_row_ * model_param.num_output_group);
    for (size_t fidx = 0; fidx < batch.Size(); fidx++) {
-      auto col = batch[fidx];
+      auto col = page[fidx];
      auto seg = column_segments[fidx];
      dh::safe_cuda(cudaMemcpy(
          data_.data().get() + row_ptr_[fidx],
--- a/src/linear/updater_shotgun.cc
+++ b/src/linear/updater_shotgun.cc
@@ -52,6 +52,7 @@ class ShotgunUpdater : public LinearUpdater {
    selector_->Setup(*model, in_gpair->ConstHostVector(), p_fmat,
                     param_.reg_alpha_denorm, param_.reg_lambda_denorm, 0);
    for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
+      auto page = batch.GetView();
      const auto nfeat = static_cast<bst_omp_uint>(batch.Size());
 #pragma omp parallel for schedule(static)
      for (bst_omp_uint i = 0; i < nfeat; ++i) {
@@ -60,7 +61,7 @@ class ShotgunUpdater : public LinearUpdater {
           param_.reg_lambda_denorm);
        if (ii < 0) continue;
        const bst_uint fid = ii;
-        auto col = batch[ii];
+        auto col = page[ii];
        for (int gid = 0; gid < ngroup; ++gid) {
          double sum_grad = 0.0, sum_hess = 0.0;
          for (auto& c : col) {