Fix clang-tidy warnings. (#4149)

* Upgrade gtest for clang-tidy. * Use CMake to install GTest instead of mv. * Don't enforce clang-tidy to return 0 due to errors in thrust. * Add a small test for tidy itself. * Reformat.
2019-03-13 02:25:51 +08:00
parent 259fb809e9
commit 7b9043cf71
41 changed files with 775 additions and 628 deletions
--- a/src/common/column_matrix.h
+++ b/src/common/column_matrix.h
@@ -42,7 +42,9 @@ class Column {
  uint32_t GetBaseIdx() const { return index_base_; }
  ColumnType GetType() const { return type_; }
  size_t GetRowIdx(size_t idx) const {
-    return type_ == ColumnType::kDenseColumn ? idx : row_ind_[idx];
+    // clang-tidy worries that row_ind_ might be a nullptr, which is possible,
+    // but low level structure is not safe anyway.
+    return type_ == ColumnType::kDenseColumn ? idx : row_ind_[idx];  // NOLINT
  }
  bool IsMissing(size_t idx) const {
    return index_[idx] == std::numeric_limits<uint32_t>::max();
@@ -68,7 +70,7 @@ class ColumnMatrix {

  // construct column matrix from GHistIndexMatrix
  inline void Init(const GHistIndexMatrix& gmat,
-                double  sparse_threshold) {
+                   double  sparse_threshold) {
    const auto nfeature = static_cast<bst_uint>(gmat.cut.row_ptr.size() - 1);
    const size_t nrow = gmat.row_ptr.size() - 1;

--- a/src/common/device_helpers.cuh
+++ b/src/common/device_helpers.cuh
@@ -772,7 +772,7 @@ template <typename T>
 typename std::iterator_traits<T>::value_type SumReduction(
    dh::CubMemory &tmp_mem, T in, int nVals) {
  using ValueT = typename std::iterator_traits<T>::value_type;
-  size_t tmpSize;
+  size_t tmpSize {0};
  ValueT *dummy_out = nullptr;
  dh::safe_cuda(cub::DeviceReduce::Sum(nullptr, tmpSize, in, dummy_out, nVals));
  // Allocate small extra memory for the return value
--- a/src/common/hist_util.cc
+++ b/src/common/hist_util.cc
@@ -548,7 +548,7 @@ void GHistBuilder::BuildBlockHist(const std::vector<GradientPair>& gpair,
  const size_t rest = nrows % kUnroll;

 #if defined(_OPENMP)
-  const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
+  const auto nthread = static_cast<bst_omp_uint>(this->nthread_);  // NOLINT
 #endif  // defined(_OPENMP)
  tree::GradStats* p_hist = hist.data();

@@ -594,7 +594,7 @@ void GHistBuilder::SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow pa
  const uint32_t rest = nbins % kUnroll;

 #if defined(_OPENMP)
-  const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
+  const auto nthread = static_cast<bst_omp_uint>(this->nthread_);  // NOLINT
 #endif  // defined(_OPENMP)
  tree::GradStats* p_self = self.data();
  tree::GradStats* p_sibling = sibling.data();
--- a/src/common/hist_util.cu
+++ b/src/common/hist_util.cu
@@ -24,13 +24,14 @@ namespace common {

 using WXQSketch = HistCutMatrix::WXQSketch;

-__global__ void find_cuts_k
+__global__ void FindCutsK
 (WXQSketch::Entry* __restrict__ cuts, const bst_float* __restrict__ data,
 const float* __restrict__ cum_weights, int nsamples, int ncuts) {
  // ncuts < nsamples
  int icut = threadIdx.x + blockIdx.x * blockDim.x;
-  if (icut >= ncuts)
+  if (icut >= ncuts) {
    return;
+  }
  WXQSketch::Entry v;
  int isample = 0;
  if (icut == 0) {
@@ -55,7 +56,7 @@ struct IsNotNaN {
  __device__ bool operator()(float a) const { return !isnan(a); }
 };

-__global__ void unpack_features_k
+__global__ void UnpackFeaturesK
 (float* __restrict__ fvalues, float* __restrict__ feature_weights,
 const size_t* __restrict__ row_ptrs, const float* __restrict__ weights,
 Entry* entries, size_t nrows_array, int ncols, size_t row_begin_ptr,
@@ -75,7 +76,7 @@ __global__ void unpack_features_k
  // if and only if it is also written to features
  if (!isnan(entry.fvalue) && (weights == nullptr || !isnan(weights[irow]))) {
    fvalues[ind] = entry.fvalue;
-    if (feature_weights != nullptr) {
+    if (feature_weights != nullptr && weights != nullptr) {
      feature_weights[ind] = weights[irow];
    }
  }
@@ -84,7 +85,7 @@ __global__ void unpack_features_k
 // finds quantiles on the GPU
 struct GPUSketcher {
  // manage memory for a single GPU
-  struct DeviceShard {
+  class DeviceShard {
    int device_;
    bst_uint row_begin_;  // The row offset for this shard
    bst_uint row_end_;
@@ -110,6 +111,7 @@ struct GPUSketcher {
    thrust::device_vector<size_t> num_elements_;
    thrust::device_vector<char> tmp_storage_;

+   public:
    DeviceShard(int device, bst_uint row_begin, bst_uint row_end,
                tree::TrainParam param) :
      device_(device), row_begin_(row_begin), row_end_(row_end),
@@ -268,7 +270,7 @@ struct GPUSketcher {
      } else if (n_cuts_cur_[icol] > 0) {
        // if more elements than cuts: use binary search on cumulative weights
        int block = 256;
-        find_cuts_k<<<dh::DivRoundUp(n_cuts_cur_[icol], block), block>>>
+        FindCutsK<<<dh::DivRoundUp(n_cuts_cur_[icol], block), block>>>
          (cuts_d_.data().get() + icol * n_cuts_, fvalues_cur_.data().get(),
           weights2_.data().get(), n_unique, n_cuts_cur_[icol]);
        dh::safe_cuda(cudaGetLastError());  // NOLINT
@@ -309,7 +311,7 @@ struct GPUSketcher {
      dim3 block3(64, 4, 1);
      dim3 grid3(dh::DivRoundUp(batch_nrows, block3.x),
                 dh::DivRoundUp(num_cols_, block3.y), 1);
-      unpack_features_k<<<grid3, block3>>>
+      UnpackFeaturesK<<<grid3, block3>>>
        (fvalues_.data().get(), has_weights_ ? feature_weights_.data().get() : nullptr,
         row_ptrs_.data().get() + batch_row_begin,
         has_weights_ ? weights_.data().get() : nullptr, entries_.data().get(),
@@ -340,6 +342,10 @@ struct GPUSketcher {
        SketchBatch(row_batch, info, gpu_batch);
      }
    }
+
+    void GetSummary(WXQSketch::SummaryContainer *summary, size_t const icol) {
+      sketches_[icol].GetSummary(summary);
+    }
  };

  void Sketch(const SparsePage& batch, const MetaInfo& info,
@@ -368,8 +374,8 @@ struct GPUSketcher {
    WXQSketch::SummaryContainer summary;
    for (int icol = 0; icol < num_cols; ++icol) {
      sketches[icol].Init(batch.Size(), 1.0 / (8 * param_.max_bin));
-      for (int shard = 0; shard < shards_.size(); ++shard) {
-        shards_[shard]->sketches_[icol].GetSummary(&summary);
+      for (auto &shard : shards_) {
+        shard->GetSummary(&summary, icol);
        sketches[icol].PushSummary(summary);
      }
    }
@@ -381,6 +387,7 @@ struct GPUSketcher {
    dist_ = GPUDistribution::Block(GPUSet::All(param_.gpu_id, param_.n_gpus, n_rows));
  }

+ private:
  std::vector<std::unique_ptr<DeviceShard>> shards_;
  tree::TrainParam param_;
  GPUDistribution dist_;
--- a/src/common/hist_util.h
+++ b/src/common/hist_util.h
@@ -38,6 +38,7 @@ struct HistCutMatrix {
  void Init(std::vector<WXQSketch>* sketchs, uint32_t max_num_bins);

  HistCutMatrix();
+  size_t NumBins() const { return row_ptr.back(); }

 protected:
  virtual size_t SearchGroupIndFromBaseRow(
--- a/src/common/host_device_vector.cc
+++ b/src/common/host_device_vector.cc
@@ -18,6 +18,11 @@ struct HostDeviceVectorImpl {
  explicit HostDeviceVectorImpl(size_t size, T v) : data_h_(size, v), distribution_() {}
  HostDeviceVectorImpl(std::initializer_list<T> init) : data_h_(init), distribution_() {}
  explicit HostDeviceVectorImpl(std::vector<T>  init) : data_h_(std::move(init)), distribution_() {}
+
+  std::vector<T>& Vec() { return data_h_; }
+  GPUDistribution& Dist() { return distribution_; }
+
+ private:
  std::vector<T> data_h_;
  GPUDistribution distribution_;
 };
@@ -64,14 +69,14 @@ HostDeviceVector<T>& HostDeviceVector<T>::operator=(const HostDeviceVector<T>& o
 }

 template <typename T>
-size_t HostDeviceVector<T>::Size() const { return impl_->data_h_.size(); }
+size_t HostDeviceVector<T>::Size() const { return impl_->Vec().size(); }

 template <typename T>
 GPUSet HostDeviceVector<T>::Devices() const { return GPUSet::Empty(); }

 template <typename T>
 const GPUDistribution& HostDeviceVector<T>::Distribution() const {
-  return impl_->distribution_;
+  return impl_->Dist();
 }

 template <typename T>
@@ -93,16 +98,16 @@ common::Span<const T> HostDeviceVector<T>::ConstDeviceSpan(int device) const {
 }

 template <typename T>
-std::vector<T>& HostDeviceVector<T>::HostVector() { return impl_->data_h_; }
+std::vector<T>& HostDeviceVector<T>::HostVector() { return impl_->Vec(); }

 template <typename T>
 const std::vector<T>& HostDeviceVector<T>::ConstHostVector() const {
-  return impl_->data_h_;
+  return impl_->Vec();
 }

 template <typename T>
 void HostDeviceVector<T>::Resize(size_t new_size, T v) {
-  impl_->data_h_.resize(new_size, v);
+  impl_->Vec().resize(new_size, v);
 }

 template <typename T>
--- a/src/common/host_device_vector.cu
+++ b/src/common/host_device_vector.cu
@@ -23,10 +23,10 @@ void SetCudaSetDeviceHandler(void (*handler)(int)) {
 // wrapper over access with useful methods
 class Permissions {
  GPUAccess access_;
-  explicit Permissions(GPUAccess access) : access_(access) {}
+  explicit Permissions(GPUAccess access) : access_{access} {}

 public:
-  Permissions() : access_(GPUAccess::kNone) {}
+  Permissions() : access_{GPUAccess::kNone} {}
  explicit Permissions(bool perm)
    : access_(perm ? GPUAccess::kWrite : GPUAccess::kNone) {}

@@ -46,8 +46,8 @@ template <typename T>
 struct HostDeviceVectorImpl {
  struct DeviceShard {
    DeviceShard()
-        : proper_size_(0), device_(-1), start_(0), perm_d_(false),
-        cached_size_(~0), vec_(nullptr) {}
+      : proper_size_{0}, device_{-1}, start_{0}, perm_d_{false},
+        cached_size_{static_cast<size_t>(~0)}, vec_{nullptr} {}

    void Init(HostDeviceVectorImpl<T>* vec, int device) {
      if (vec_ == nullptr) { vec_ = vec; }
@@ -154,6 +154,13 @@ struct HostDeviceVectorImpl {
      }
    }

+    T*     Raw()                    { return data_.data().get(); }
+    size_t Start()            const { return start_; }
+    size_t DataSize()         const { return data_.size(); }
+    Permissions& Perm()             { return perm_d_; }
+    Permissions const& Perm() const { return perm_d_; }
+
+   private:
    int device_;
    thrust::device_vector<T> data_;
    // cached vector size
@@ -216,41 +223,42 @@ struct HostDeviceVectorImpl {
  T* DevicePointer(int device) {
    CHECK(distribution_.devices_.Contains(device));
    LazySyncDevice(device, GPUAccess::kWrite);
-    return shards_.at(distribution_.devices_.Index(device)).data_.data().get();
+    return shards_.at(distribution_.devices_.Index(device)).Raw();
  }

  const T* ConstDevicePointer(int device) {
    CHECK(distribution_.devices_.Contains(device));
    LazySyncDevice(device, GPUAccess::kRead);
-    return shards_.at(distribution_.devices_.Index(device)).data_.data().get();
+    return shards_.at(distribution_.devices_.Index(device)).Raw();
  }

  common::Span<T> DeviceSpan(int device) {
    GPUSet devices = distribution_.devices_;
    CHECK(devices.Contains(device));
    LazySyncDevice(device, GPUAccess::kWrite);
-    return {shards_.at(devices.Index(device)).data_.data().get(),
-        static_cast<typename common::Span<T>::index_type>(DeviceSize(device))};
+    return {shards_.at(devices.Index(device)).Raw(),
+          static_cast<typename common::Span<T>::index_type>(DeviceSize(device))};
  }

  common::Span<const T> ConstDeviceSpan(int device) {
    GPUSet devices = distribution_.devices_;
    CHECK(devices.Contains(device));
    LazySyncDevice(device, GPUAccess::kRead);
-    return {shards_.at(devices.Index(device)).data_.data().get(),
-        static_cast<typename common::Span<const T>::index_type>(DeviceSize(device))};
+    using SpanInd = typename common::Span<const T>::index_type;
+    return {shards_.at(devices.Index(device)).Raw(),
+          static_cast<SpanInd>(DeviceSize(device))};
  }

  size_t DeviceSize(int device) {
    CHECK(distribution_.devices_.Contains(device));
    LazySyncDevice(device, GPUAccess::kRead);
-    return shards_.at(distribution_.devices_.Index(device)).data_.size();
+    return shards_.at(distribution_.devices_.Index(device)).DataSize();
  }

  size_t DeviceStart(int device) {
    CHECK(distribution_.devices_.Contains(device));
    LazySyncDevice(device, GPUAccess::kRead);
-    return shards_.at(distribution_.devices_.Index(device)).start_;
+    return shards_.at(distribution_.devices_.Index(device)).Start();
  }

  thrust::device_ptr<T> tbegin(int device) {  // NOLINT
@@ -293,7 +301,7 @@ struct HostDeviceVectorImpl {
    }
  }

-  void Fill(T v) {
+  void Fill(T v) {  // NOLINT
    if (perm_h_.CanWrite()) {
      std::fill(data_h_.begin(), data_h_.end(), v);
    } else {
@@ -389,7 +397,7 @@ struct HostDeviceVectorImpl {
    if (perm_h_.CanRead()) {
      // data is present, just need to deny access to the device
      dh::ExecuteIndexShards(&shards_, [&](int idx, DeviceShard& shard) {
-          shard.perm_d_.DenyComplementary(access);
+          shard.Perm().DenyComplementary(access);
        });
      perm_h_.Grant(access);
      return;
@@ -412,9 +420,10 @@ struct HostDeviceVectorImpl {
  bool DeviceCanAccess(int device, GPUAccess access) {
    GPUSet devices = distribution_.Devices();
    if (!devices.Contains(device)) { return false; }
-    return shards_.at(devices.Index(device)).perm_d_.CanAccess(access);
+    return shards_.at(devices.Index(device)).Perm().CanAccess(access);
  }

+ private:
  std::vector<T> data_h_;
  Permissions perm_h_;
  // the total size of the data stored on the devices
--- a/src/common/math.h
+++ b/src/common/math.h
@@ -7,6 +7,8 @@
 #ifndef XGBOOST_COMMON_MATH_H_
 #define XGBOOST_COMMON_MATH_H_

+#include <xgboost/base.h>
+
 #include <utility>
 #include <vector>
 #include <cmath>
--- a/src/common/span.h
+++ b/src/common/span.h
@@ -622,8 +622,8 @@ XGBOOST_DEVICE auto as_writable_bytes(Span<T, E> s) __span_noexcept ->  // NOLIN
  return {reinterpret_cast<byte*>(s.data()), s.size_bytes()};
 }

-}  // namespace common NOLINT
-}  // namespace xgboost NOLINT
+}  // namespace common
+}  // namespace xgboost

 #if defined(_MSC_VER) &&_MSC_VER < 1910
 #undef constexpr