Clean up training code. (#3825)

* Remove GHistRow, GHistEntry, GHistIndexRow. * Remove kSimpleStats. * Remove CheckInfo, SetLeafVec in GradStats and in SKStats. * Clean up the GradStats. * Cleanup calcgain. * Move LossChangeMissing out of common. * Remove [] operator from GHistIndexBlock.
2019-02-07 14:22:13 +08:00
parent 325b16bccd
commit 017c97b8ce
19 changed files with 306 additions and 406 deletions
--- a/src/common/device_helpers.cuh
+++ b/src/common/device_helpers.cuh
@@ -1005,7 +1005,7 @@ class AllReducer {
   */
  void Synchronize() {
 #ifdef XGBOOST_USE_NCCL
-    for (int i = 0; i < device_ordinals.size(); i++) {
+    for (size_t i = 0; i < device_ordinals.size(); i++) {
      dh::safe_cuda(cudaSetDevice(device_ordinals[i]));
      dh::safe_cuda(cudaStreamSynchronize(streams[i]));
    }
@@ -1051,7 +1051,7 @@ template <typename T, typename FunctionT>
 void ExecuteIndexShards(std::vector<T> *shards, FunctionT f) {
  SaveCudaContext{[&]() {
 #pragma omp parallel for schedule(static, 1) if (shards->size() > 1)
-    for (int shard = 0; shard < shards->size(); ++shard) {
+    for (size_t shard = 0; shard < shards->size(); ++shard) {
      f(shard, shards->at(shard));
    }
  }};
--- a/src/common/hist_util.cc
+++ b/src/common/hist_util.cc
@@ -1,5 +1,5 @@
 /*!
- * Copyright 2017 by Contributors
+ * Copyright 2017-2018 by Contributors
 * \file hist_util.h
 * \brief Utilities to store histograms
 * \author Philip Cho, Tianqi Chen
@@ -417,7 +417,7 @@ void GHistBuilder::BuildHist(const std::vector<GradientPair>& gpair,
  const size_t* row_ptr =  gmat.row_ptr.data();
  const float* pgh = reinterpret_cast<const float*>(gpair.data());

-  double* hist_data = reinterpret_cast<double*>(hist.begin);
+  double* hist_data = reinterpret_cast<double*>(hist.data());
  double* data = reinterpret_cast<double*>(data_.data());

  const size_t block_size = 512;
@@ -432,11 +432,11 @@ void GHistBuilder::BuildHist(const std::vector<GradientPair>& gpair,
  size_t no_prefetch_size = prefetch_offset + cache_line_size/sizeof(*rid);
  no_prefetch_size = no_prefetch_size > nrows ? nrows : no_prefetch_size;

-  #pragma omp parallel for num_threads(nthread_to_process) schedule(guided)
+#pragma omp parallel for num_threads(nthread_to_process) schedule(guided)
  for (bst_omp_uint iblock = 0; iblock < n_blocks; iblock++) {
    dmlc::omp_uint tid = omp_get_thread_num();
    double* data_local_hist = ((nthread_to_process == 1) ? hist_data :
-            reinterpret_cast<double*>(data_.data() + tid * nbins_));
+                               reinterpret_cast<double*>(data_.data() + tid * nbins_));

    if (!thread_init_[tid]) {
      memset(data_local_hist, '\0', 2*nbins_*sizeof(double));
@@ -477,7 +477,7 @@ void GHistBuilder::BuildHist(const std::vector<GradientPair>& gpair,
      }
    }

-    #pragma omp parallel for num_threads(std::min(nthread, n_blocks)) schedule(guided)
+#pragma omp parallel for num_threads(std::min(nthread, n_blocks)) schedule(guided)
    for (bst_omp_uint iblock = 0; iblock < n_blocks; iblock++) {
      const size_t istart = iblock * block_size;
      const size_t iend = (((iblock + 1) * block_size > size) ? size : istart + block_size);
@@ -507,8 +507,9 @@ void GHistBuilder::BuildBlockHist(const std::vector<GradientPair>& gpair,
 #if defined(_OPENMP)
  const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
 #endif
+  tree::GradStats* p_hist = hist.data();

-  #pragma omp parallel for num_threads(nthread) schedule(guided)
+#pragma omp parallel for num_threads(nthread) schedule(guided)
  for (bst_omp_uint bid = 0; bid < nblock; ++bid) {
    auto gmat = gmatb[bid];

@@ -517,20 +518,17 @@ void GHistBuilder::BuildBlockHist(const std::vector<GradientPair>& gpair,
      size_t ibegin[kUnroll];
      size_t iend[kUnroll];
      GradientPair stat[kUnroll];
+
      for (int k = 0; k < kUnroll; ++k) {
        rid[k] = row_indices.begin[i + k];
-      }
-      for (int k = 0; k < kUnroll; ++k) {
        ibegin[k] = gmat.row_ptr[rid[k]];
        iend[k] = gmat.row_ptr[rid[k] + 1];
-      }
-      for (int k = 0; k < kUnroll; ++k) {
        stat[k] = gpair[rid[k]];
      }
      for (int k = 0; k < kUnroll; ++k) {
        for (size_t j = ibegin[k]; j < iend[k]; ++j) {
          const uint32_t bin = gmat.index[j];
-          hist.begin[bin].Add(stat[k]);
+          p_hist[bin].Add(stat[k]);
        }
      }
    }
@@ -541,7 +539,7 @@ void GHistBuilder::BuildBlockHist(const std::vector<GradientPair>& gpair,
      const GradientPair stat = gpair[rid];
      for (size_t j = ibegin; j < iend; ++j) {
        const uint32_t bin = gmat.index[j];
-        hist.begin[bin].Add(stat);
+        p_hist[bin].Add(stat);
      }
    }
  }
@@ -555,24 +553,27 @@ void GHistBuilder::SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow pa
 #if defined(_OPENMP)
  const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
 #endif
+  tree::GradStats* p_self = self.data();
+  tree::GradStats* p_sibling = sibling.data();
+  tree::GradStats* p_parent = parent.data();

-  #pragma omp parallel for num_threads(nthread) schedule(static)
+#pragma omp parallel for num_threads(nthread) schedule(static)
  for (bst_omp_uint bin_id = 0;
       bin_id < static_cast<bst_omp_uint>(nbins - rest); bin_id += kUnroll) {
-    GHistEntry pb[kUnroll];
-    GHistEntry sb[kUnroll];
+    tree::GradStats pb[kUnroll];
+    tree::GradStats sb[kUnroll];
    for (int k = 0; k < kUnroll; ++k) {
-      pb[k] = parent.begin[bin_id + k];
+      pb[k] = p_parent[bin_id + k];
    }
    for (int k = 0; k < kUnroll; ++k) {
-      sb[k] = sibling.begin[bin_id + k];
+      sb[k] = p_sibling[bin_id + k];
    }
    for (int k = 0; k < kUnroll; ++k) {
-      self.begin[bin_id + k].SetSubtract(pb[k], sb[k]);
+      p_self[bin_id + k].SetSubstract(pb[k], sb[k]);
    }
  }
  for (uint32_t bin_id = nbins - rest; bin_id < nbins; ++bin_id) {
-    self.begin[bin_id].SetSubtract(parent.begin[bin_id], sibling.begin[bin_id]);
+    p_self[bin_id].SetSubstract(p_parent[bin_id], p_sibling[bin_id]);
  }
 }

--- a/src/common/hist_util.h
+++ b/src/common/hist_util.h
@@ -16,45 +16,8 @@
 #include "../include/rabit/rabit.h"

 namespace xgboost {
-
 namespace common {

-/*! \brief sums of gradient statistics corresponding to a histogram bin */
-struct GHistEntry {
-  /*! \brief sum of first-order gradient statistics */
-  double sum_grad{0};
-  /*! \brief sum of second-order gradient statistics */
-  double sum_hess{0};
-
-  GHistEntry()  = default;
-
-  inline void Clear() {
-    sum_grad = sum_hess = 0;
-  }
-
-  /*! \brief add a GradientPair to the sum */
-  inline void Add(const GradientPair& e) {
-    sum_grad += e.GetGrad();
-    sum_hess += e.GetHess();
-  }
-
-  /*! \brief add a GHistEntry to the sum */
-  inline void Add(const GHistEntry& e) {
-    sum_grad += e.sum_grad;
-    sum_hess += e.sum_hess;
-  }
-
-  inline static void Reduce(GHistEntry& a, const GHistEntry& b) { // NOLINT(*)
-    a.Add(b);
-  }
-
-  /*! \brief set sum to be difference of two GHistEntry's */
-  inline void SetSubtract(const GHistEntry& a, const GHistEntry& b) {
-    sum_grad = a.sum_grad - b.sum_grad;
-    sum_hess = a.sum_hess - b.sum_hess;
-  }
-};
-
 /*! \brief Cut configuration for all the features. */
 struct HistCutMatrix {
  /*! \brief Unit pointer to rows by element position */
@@ -83,15 +46,7 @@ void DeviceSketch
 * \brief A single row in global histogram index.
 *  Directly represent the global index in the histogram entry.
 */
-struct GHistIndexRow {
-  /*! \brief The index of the histogram */
-  const uint32_t* index;
-  /*! \brief The size of the histogram */
-  size_t size;
-  GHistIndexRow() = default;
-  GHistIndexRow(const uint32_t* index, size_t size)
-      : index(index), size(size) {}
-};
+using GHistIndexRow = Span<uint32_t const>;

 /*!
 * \brief preprocessed global index matrix, in CSR format
@@ -111,7 +66,9 @@ struct GHistIndexMatrix {
  void Init(DMatrix* p_fmat, int max_num_bins);
  // get i-th row
  inline GHistIndexRow operator[](size_t i) const {
-    return {&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]};
+    return {&index[0] + row_ptr[i],
+            static_cast<GHistIndexRow::index_type>(
+                row_ptr[i + 1] - row_ptr[i])};
  }
  inline void GetFeatureCounts(size_t* counts) const {
    auto nfeature = cut.row_ptr.size() - 1;
@@ -134,11 +91,6 @@ struct GHistIndexBlock {

  inline GHistIndexBlock(const size_t* row_ptr, const uint32_t* index)
    : row_ptr(row_ptr), index(index) {}
-
-  // get i-th row
-  inline GHistIndexRow operator[](size_t i) const {
-    return {&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]};
-  }
 };

 class ColumnMatrix;
@@ -171,21 +123,12 @@ class GHistIndexBlockMatrix {
 };

 /*!
- * \brief histogram of gradient statistics for a single node.
- *  Consists of multiple GHistEntry's, each entry showing total graident statistics
+ * \brief histogram of graident statistics for a single node.
+ *  Consists of multiple GradStats, each entry showing total graident statistics
 *     for that particular bin
 *  Uses global bin id so as to represent all features simultaneously
 */
-struct GHistRow {
-  /*! \brief base pointer to first entry */
-  GHistEntry* begin;
-  /*! \brief number of entries */
-  uint32_t size;
-
-  GHistRow() = default;
-  GHistRow(GHistEntry* begin, uint32_t size)
-    : begin(begin), size(size) {}
-};
+using GHistRow = Span<tree::GradStats>;

 /*!
 * \brief histogram of gradient statistics for multiple nodes
@@ -193,27 +136,29 @@ struct GHistRow {
 class HistCollection {
 public:
  // access histogram for i-th node
-  inline GHistRow operator[](bst_uint nid) const {
+  GHistRow operator[](bst_uint nid) const {
    constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
    CHECK_NE(row_ptr_[nid], kMax);
-    return {const_cast<GHistEntry*>(dmlc::BeginPtr(data_) + row_ptr_[nid]), nbins_};
+    tree::GradStats* ptr =
+        const_cast<tree::GradStats*>(dmlc::BeginPtr(data_) + row_ptr_[nid]);
+    return {ptr, nbins_};
  }

  // have we computed a histogram for i-th node?
-  inline bool RowExists(bst_uint nid) const {
+  bool RowExists(bst_uint nid) const {
    const uint32_t k_max = std::numeric_limits<uint32_t>::max();
    return (nid < row_ptr_.size() && row_ptr_[nid] != k_max);
  }

  // initialize histogram collection
-  inline void Init(uint32_t nbins) {
+  void Init(uint32_t nbins) {
    nbins_ = nbins;
    row_ptr_.clear();
    data_.clear();
  }

  // create an empty histogram for i-th node
-  inline void AddHistRow(bst_uint nid) {
+  void AddHistRow(bst_uint nid) {
    constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
    if (nid >= row_ptr_.size()) {
      row_ptr_.resize(nid + 1, kMax);
@@ -228,7 +173,7 @@ class HistCollection {
  /*! \brief number of all bins over all features */
  uint32_t nbins_;

-  std::vector<GHistEntry> data_;
+  std::vector<tree::GradStats> data_;

  /*! \brief row_ptr_[nid] locates bin for historgram of node nid */
  std::vector<size_t> row_ptr_;
@@ -268,8 +213,8 @@ class GHistBuilder {
  size_t nthread_;
  /*! \brief number of all bins over all features */
  uint32_t nbins_;
-  std::vector<GHistEntry> data_;
  std::vector<size_t> thread_init_;
+  std::vector<tree::GradStats> data_;
 };


--- a/src/common/host_device_vector.h
+++ b/src/common/host_device_vector.h
@@ -140,7 +140,7 @@ class GPUDistribution {
    return begin;
  }

-  size_t ShardSize(size_t size, int index) const {
+  size_t ShardSize(size_t size, size_t index) const {
    if (size == 0) { return 0; }
    if (offsets_.size() > 0) {
      // explicit offsets are provided
@@ -154,7 +154,7 @@ class GPUDistribution {
    return end - begin;
  }

-  size_t ShardProperSize(size_t size, int index) const {
+  size_t ShardProperSize(size_t size, size_t index) const {
    if (size == 0) { return 0; }
    return ShardSize(size, index) - (devices_.Size() - 1 > index ? overlap_ : 0);
  }
--- a/src/common/span.h
+++ b/src/common/span.h
@@ -554,8 +554,8 @@ class Span {
      detail::ptrdiff_t _offset,
      detail::ptrdiff_t _count = dynamic_extent) const {
    SPAN_CHECK(_offset >= 0 && _offset < size());
-    SPAN_CHECK(_count == dynamic_extent ||
-               _count >= 0 && _offset + _count <= size());
+    SPAN_CHECK((_count == dynamic_extent) ||
+               (_count >= 0 && _offset + _count <= size()));

    return {data() + _offset, _count ==
            dynamic_extent ? size() - _offset : _count};
--- a/src/common/transform.h
+++ b/src/common/transform.h
@@ -58,12 +58,12 @@ class Transform {
   public:
    Evaluator(Functor func, Range range, GPUSet devices, bool reshard) :
        func_(func), range_{std::move(range)},
-        distribution_{std::move(GPUDistribution::Block(devices))},
-        reshard_{reshard} {}
+        reshard_{reshard},
+        distribution_{std::move(GPUDistribution::Block(devices))} {}
    Evaluator(Functor func, Range range, GPUDistribution dist,
              bool reshard) :
-        func_(func), range_{std::move(range)}, distribution_{std::move(dist)},
-        reshard_{reshard} {}
+        func_(func), range_{std::move(range)}, reshard_{reshard},
+        distribution_{std::move(dist)} {}

    /*!
     * \brief Evaluate the functor with input pointers to HostDeviceVector.
@@ -159,7 +159,7 @@ class Transform {

    template <typename... HDV>
    void LaunchCPU(Functor func, HDV*... vectors) const {
-      auto end = *(range_.end());
+      omp_ulong end = static_cast<omp_ulong>(*(range_.end()));
 #pragma omp parallel for schedule(static)
      for (omp_ulong idx = 0; idx < end; ++idx) {
        func(idx, UnpackHDV(vectors)...);