Small cleanup for histogram routines. (#9427)

* Small cleanup for histogram routines. - Extract hist train param from GPU hist. - Make histogram const after construction. - Unify parameter names.
2023-08-02 18:28:26 +08:00
parent c2b85ab68a
commit e93a274823
17 changed files with 182 additions and 111 deletions
--- a/src/common/hist_util.cc
+++ b/src/common/hist_util.cc
@@ -81,11 +81,11 @@ void InitilizeHistByZeroes(GHistRow hist, size_t begin, size_t end) {
 /*!
 * \brief Increment hist as dst += add in range [begin, end)
 */
-void IncrementHist(GHistRow dst, const GHistRow add, size_t begin, size_t end) {
-  double* pdst = reinterpret_cast<double*>(dst.data());
+void IncrementHist(GHistRow dst, ConstGHistRow add, std::size_t begin, std::size_t end) {
+  double *pdst = reinterpret_cast<double *>(dst.data());
  const double *padd = reinterpret_cast<const double *>(add.data());

-  for (size_t i = 2 * begin; i < 2 * end; ++i) {
+  for (std::size_t i = 2 * begin; i < 2 * end; ++i) {
    pdst[i] += padd[i];
  }
 }
@@ -207,18 +207,23 @@ void RowsWiseBuildHistKernel(Span<GradientPair const> gpair,

  const size_t size = row_indices.Size();
  const size_t *rid = row_indices.begin;
-  auto const *pgh = reinterpret_cast<const float *>(gpair.data());
+  auto const *p_gpair = reinterpret_cast<const float *>(gpair.data());
  const BinIdxType *gradient_index = gmat.index.data<BinIdxType>();

  auto const &row_ptr = gmat.row_ptr.data();
  auto base_rowid = gmat.base_rowid;
-  const uint32_t *offsets = gmat.index.Offset();
-  auto get_row_ptr = [&](size_t ridx) {
+  uint32_t const *offsets = gmat.index.Offset();
+  // There's no feature-based compression if missing value is present.
+  if (kAnyMissing) {
+    CHECK(!offsets);
+  } else {
+    CHECK(offsets);
+  }
+
+  auto get_row_ptr = [&](bst_row_t ridx) {
    return kFirstPage ? row_ptr[ridx] : row_ptr[ridx - base_rowid];
  };
-  auto get_rid = [&](size_t ridx) {
-    return kFirstPage ? ridx : (ridx - base_rowid);
-  };
+  auto get_rid = [&](bst_row_t ridx) { return kFirstPage ? ridx : (ridx - base_rowid); };

  const size_t n_features =
      get_row_ptr(row_indices.begin[0] + 1) - get_row_ptr(row_indices.begin[0]);
@@ -228,7 +233,7 @@ void RowsWiseBuildHistKernel(Span<GradientPair const> gpair,
                          // So we need to multiply each row-index/bin-index by 2
                          // to work with gradient pairs as a singe row FP array

-  for (size_t i = 0; i < size; ++i) {
+  for (std::size_t i = 0; i < size; ++i) {
    const size_t icol_start =
        kAnyMissing ? get_row_ptr(rid[i]) : get_rid(rid[i]) * n_features;
    const size_t icol_end =
@@ -246,7 +251,7 @@ void RowsWiseBuildHistKernel(Span<GradientPair const> gpair,
          kAnyMissing ? get_row_ptr(rid[i + Prefetch::kPrefetchOffset] + 1)
                      : icol_start_prefetch + n_features;

-      PREFETCH_READ_T0(pgh + two * rid[i + Prefetch::kPrefetchOffset]);
+      PREFETCH_READ_T0(p_gpair + two * rid[i + Prefetch::kPrefetchOffset]);
      for (size_t j = icol_start_prefetch; j < icol_end_prefetch;
           j += Prefetch::GetPrefetchStep<uint32_t>()) {
        PREFETCH_READ_T0(gradient_index + j);
@@ -255,12 +260,12 @@ void RowsWiseBuildHistKernel(Span<GradientPair const> gpair,
    const BinIdxType *gr_index_local = gradient_index + icol_start;

    // The trick with pgh_t buffer helps the compiler to generate faster binary.
-    const float pgh_t[] = {pgh[idx_gh], pgh[idx_gh + 1]};
+    const float pgh_t[] = {p_gpair[idx_gh], p_gpair[idx_gh + 1]};
    for (size_t j = 0; j < row_size; ++j) {
-      const uint32_t idx_bin = two * (static_cast<uint32_t>(gr_index_local[j]) +
-                                      (kAnyMissing ? 0 : offsets[j]));
+      const uint32_t idx_bin =
+          two * (static_cast<uint32_t>(gr_index_local[j]) + (kAnyMissing ? 0 : offsets[j]));
      auto hist_local = hist_data + idx_bin;
-      *(hist_local)     += pgh_t[0];
+      *(hist_local) += pgh_t[0];
      *(hist_local + 1) += pgh_t[1];
    }
  }
@@ -281,12 +286,10 @@ void ColsWiseBuildHistKernel(Span<GradientPair const> gpair,
  auto const &row_ptr = gmat.row_ptr.data();
  auto base_rowid = gmat.base_rowid;
  const uint32_t *offsets = gmat.index.Offset();
-  auto get_row_ptr = [&](size_t ridx) {
+  auto get_row_ptr = [&](bst_row_t ridx) {
    return kFirstPage ? row_ptr[ridx] : row_ptr[ridx - base_rowid];
  };
-  auto get_rid = [&](size_t ridx) {
-    return kFirstPage ? ridx : (ridx - base_rowid);
-  };
+  auto get_rid = [&](bst_row_t ridx) { return kFirstPage ? ridx : (ridx - base_rowid); };

  const size_t n_features = gmat.cut.Ptrs().size() - 1;
  const size_t n_columns = n_features;
--- a/src/common/hist_util.h
+++ b/src/common/hist_util.h
@@ -362,6 +362,7 @@ bst_bin_t XGBOOST_HOST_DEV_INLINE BinarySearchBin(std::size_t begin, std::size_t
 }

 using GHistRow = Span<xgboost::GradientPairPrecise>;
+using ConstGHistRow = Span<xgboost::GradientPairPrecise const>;

 /*!
 * \brief fill a histogram by zeros
@@ -371,7 +372,7 @@ void InitilizeHistByZeroes(GHistRow hist, size_t begin, size_t end);
 /*!
 * \brief Increment hist as dst += add in range [begin, end)
 */
-void IncrementHist(GHistRow dst, const GHistRow add, size_t begin, size_t end);
+void IncrementHist(GHistRow dst, ConstGHistRow add, std::size_t begin, std::size_t end);

 /*!
 * \brief Copy hist from src to dst in range [begin, end)
--- a/src/common/threading_utils.h
+++ b/src/common/threading_utils.h
@@ -136,7 +136,7 @@ class BlockedSpace2d {
 // Wrapper to implement nested parallelism with simple omp parallel for
 template <typename Func>
 void ParallelFor2d(const BlockedSpace2d& space, int nthreads, Func func) {
-  const size_t num_blocks_in_space = space.Size();
+  std::size_t n_blocks_in_space = space.Size();
  CHECK_GE(nthreads, 1);

  dmlc::OMPException exc;
@@ -144,11 +144,10 @@ void ParallelFor2d(const BlockedSpace2d& space, int nthreads, Func func) {
  {
    exc.Run([&]() {
      size_t tid = omp_get_thread_num();
-      size_t chunck_size =
-          num_blocks_in_space / nthreads + !!(num_blocks_in_space % nthreads);
+      size_t chunck_size = n_blocks_in_space / nthreads + !!(n_blocks_in_space % nthreads);

-      size_t begin = chunck_size * tid;
-      size_t end = std::min(begin + chunck_size, num_blocks_in_space);
+      std::size_t begin = chunck_size * tid;
+      std::size_t end = std::min(begin + chunck_size, n_blocks_in_space);
      for (auto i = begin; i < end; i++) {
        func(space.GetFirstDimension(i), space.GetRange(i));
      }