Upgrade clang-tidy on CI. (#5469)

* Correct all clang-tidy errors. * Upgrade clang-tidy to 10 on CI. Co-authored-by: Hyunsu Cho <chohyu01@cs.washington.edu>
2020-04-05 04:42:29 +08:00
parent 30e94ddd04
commit 0012f2ef93
107 changed files with 932 additions and 903 deletions
--- a/src/common/bitfield.h
+++ b/src/common/bitfield.h
@@ -56,8 +56,8 @@ __forceinline__ __device__ BitFieldAtomicType AtomicAnd(BitFieldAtomicType* addr
 */
 template <typename VT, typename Direction>
 struct BitFieldContainer {
-  using value_type = VT;
-  using pointer = value_type*;
+  using value_type = VT;  // NOLINT
+  using pointer = value_type*;  // NOLINT

  static value_type constexpr kValueSize = sizeof(value_type) * 8;
  static value_type constexpr kOne = 1;  // force correct type.
@@ -67,6 +67,7 @@ struct BitFieldContainer {
    value_type bit_pos {0};
  };

+ private:
  common::Span<value_type> bits_;
  static_assert(!std::is_signed<VT>::value, "Must use unsiged type as underlying storage.");

@@ -82,9 +83,12 @@ struct BitFieldContainer {

 public:
  BitFieldContainer() = default;
-  XGBOOST_DEVICE BitFieldContainer(common::Span<value_type> bits) : bits_{bits} {}
+  XGBOOST_DEVICE explicit BitFieldContainer(common::Span<value_type> bits) : bits_{bits} {}
  XGBOOST_DEVICE BitFieldContainer(BitFieldContainer const& other) : bits_{other.bits_} {}

+  common::Span<value_type>       Bits()       { return bits_; }
+  common::Span<value_type const> Bits() const { return bits_; }
+
  /*\brief Compute the size of needed memory allocation.  The returned value is in terms
   *       of number of elements with `BitFieldContainer::value_type'.
   */
@@ -190,7 +194,7 @@ template <typename VT>
 struct LBitsPolicy : public BitFieldContainer<VT, LBitsPolicy<VT>> {
  using Container = BitFieldContainer<VT, LBitsPolicy<VT>>;
  using Pos = typename Container::Pos;
-  using value_type = typename Container::value_type;
+  using value_type = typename Container::value_type;  // NOLINT

  XGBOOST_DEVICE static Pos Shift(Pos pos) {
    pos.bit_pos = Container::kValueSize - pos.bit_pos - Container::kOne;
@@ -204,7 +208,7 @@ template <typename VT>
 struct RBitsPolicy : public BitFieldContainer<VT, RBitsPolicy<VT>> {
  using Container = BitFieldContainer<VT, RBitsPolicy<VT>>;
  using Pos = typename Container::Pos;
-  using value_type = typename Container::value_type;
+  using value_type = typename Container::value_type;  // NOLINT

  XGBOOST_DEVICE static Pos Shift(Pos pos) {
    return pos;
--- a/src/common/column_matrix.h
+++ b/src/common/column_matrix.h
@@ -141,7 +141,7 @@ class ColumnMatrix {
      feature_offsets_[fid] = accum_index_;
    }

-    SetTypeSize(gmat.max_num_bins_);
+    SetTypeSize(gmat.max_num_bins);

    index_.resize(feature_offsets_[nfeature] * bins_type_size_, 0);
    if (!all_dense) {
@@ -161,24 +161,24 @@ class ColumnMatrix {

    // pre-fill index_ for dense columns
    if (all_dense) {
-      BinTypeSize gmat_bin_size = gmat.index.getBinTypeSize();
-      if (gmat_bin_size == UINT8_BINS_TYPE_SIZE) {
+      BinTypeSize gmat_bin_size = gmat.index.GetBinTypeSize();
+      if (gmat_bin_size == kUint8BinsTypeSize) {
          SetIndexAllDense(gmat.index.data<uint8_t>(), gmat, nrow, nfeature, noMissingValues);
-      } else if (gmat_bin_size == UINT16_BINS_TYPE_SIZE) {
+      } else if (gmat_bin_size == kUint16BinsTypeSize) {
          SetIndexAllDense(gmat.index.data<uint16_t>(), gmat, nrow, nfeature, noMissingValues);
      } else {
-          CHECK_EQ(gmat_bin_size, UINT32_BINS_TYPE_SIZE);
+          CHECK_EQ(gmat_bin_size, kUint32BinsTypeSize);
          SetIndexAllDense(gmat.index.data<uint32_t>(), gmat, nrow, nfeature, noMissingValues);
      }
-    /* For sparse DMatrix gmat.index.getBinTypeSize() returns always UINT32_BINS_TYPE_SIZE
+    /* For sparse DMatrix gmat.index.getBinTypeSize() returns always kUint32BinsTypeSize
       but for ColumnMatrix we still have a chance to reduce the memory consumption */
    } else {
-      if (bins_type_size_ == UINT8_BINS_TYPE_SIZE) {
+      if (bins_type_size_ == kUint8BinsTypeSize) {
          SetIndex<uint8_t>(gmat.index.data<uint32_t>(), gmat, nrow, nfeature);
-      } else if (bins_type_size_ == UINT16_BINS_TYPE_SIZE) {
+      } else if (bins_type_size_ == kUint16BinsTypeSize) {
          SetIndex<uint16_t>(gmat.index.data<uint32_t>(), gmat, nrow, nfeature);
      } else {
-          CHECK_EQ(bins_type_size_, UINT32_BINS_TYPE_SIZE);
+          CHECK_EQ(bins_type_size_, kUint32BinsTypeSize);
          SetIndex<uint32_t>(gmat.index.data<uint32_t>(), gmat, nrow, nfeature);
      }
    }
@@ -187,11 +187,11 @@ class ColumnMatrix {
  /* Set the number of bytes based on numeric limit of maximum number of bins provided by user */
  void SetTypeSize(size_t max_num_bins) {
    if ( (max_num_bins - 1) <= static_cast<int>(std::numeric_limits<uint8_t>::max()) ) {
-      bins_type_size_ = UINT8_BINS_TYPE_SIZE;
+      bins_type_size_ = kUint8BinsTypeSize;
    } else if ((max_num_bins - 1) <= static_cast<int>(std::numeric_limits<uint16_t>::max())) {
-      bins_type_size_ = UINT16_BINS_TYPE_SIZE;
+      bins_type_size_ = kUint16BinsTypeSize;
    } else {
-      bins_type_size_ = UINT32_BINS_TYPE_SIZE;
+      bins_type_size_ = kUint32BinsTypeSize;
    }
  }

@@ -227,7 +227,7 @@ class ColumnMatrix {
    /* missing values make sense only for column with type kDenseColumn,
       and if no missing values were observed it could be handled much faster. */
    if (noMissingValues) {
-      const int32_t nthread = omp_get_max_threads();
+      const int32_t nthread = omp_get_max_threads();  // NOLINT
      #pragma omp parallel for num_threads(nthread)
      for (omp_ulong rid = 0; rid < nrow; ++rid) {
        const size_t ibegin = rid*nfeature;
@@ -241,7 +241,7 @@ class ColumnMatrix {
    } else {
      /* to handle rows in all batches, sum of all batch sizes equal to gmat.row_ptr.size() - 1 */
      size_t rbegin = 0;
-      for (const auto &batch : gmat.p_fmat_->GetBatches<SparsePage>()) {
+      for (const auto &batch : gmat.p_fmat->GetBatches<SparsePage>()) {
        const xgboost::Entry* data_ptr = batch.data.HostVector().data();
        const std::vector<bst_row_t>& offset_vec = batch.offset.HostVector();
        const size_t batch_size = batch.Size();
@@ -276,7 +276,7 @@ class ColumnMatrix {

    T* local_index = reinterpret_cast<T*>(&index_[0]);
    size_t rbegin = 0;
-    for (const auto &batch : gmat.p_fmat_->GetBatches<SparsePage>()) {
+    for (const auto &batch : gmat.p_fmat->GetBatches<SparsePage>()) {
      const xgboost::Entry* data_ptr = batch.data.HostVector().data();
      const std::vector<bst_row_t>& offset_vec = batch.offset.HostVector();
      const size_t batch_size = batch.Size();
--- a/src/common/common.h
+++ b/src/common/common.h
@@ -118,7 +118,7 @@ class Range {
    XGBOOST_DEVICE explicit Iterator(DifferenceType start, DifferenceType step) :
        i_{start}, step_{step} {}

-   public:
+   private:
    int64_t i_;
    DifferenceType step_ = 1;
  };
--- a/src/common/compressed_iterator.h
+++ b/src/common/compressed_iterator.h
@@ -112,7 +112,7 @@ class CompressedBufferWriter {
    size_t ibyte_start = ibit_start / 8, ibyte_end = ibit_end / 8;

    symbol <<= 7 - ibit_end % 8;
-    for (ptrdiff_t ibyte = ibyte_end; ibyte >= (ptrdiff_t)ibyte_start; --ibyte) {
+    for (ptrdiff_t ibyte = ibyte_end; ibyte >= static_cast<ptrdiff_t>(ibyte_start); --ibyte) {
      dh::AtomicOrByte(reinterpret_cast<unsigned int*>(buffer + detail::kPadding),
                       ibyte, symbol & 0xff);
      symbol >>= 8;
@@ -182,14 +182,14 @@ class CompressedIterator {
  typedef value_type reference;             // NOLINT

 private:
-  const CompressedByteT *buffer_;
-  size_t symbol_bits_;
-  size_t offset_;
+  const CompressedByteT *buffer_ {nullptr};
+  size_t symbol_bits_ {0};
+  size_t offset_ {0};

 public:
-  CompressedIterator() : buffer_(nullptr), symbol_bits_(0), offset_(0) {}
+  CompressedIterator() = default;
  CompressedIterator(const CompressedByteT *buffer, size_t num_symbols)
-      : buffer_(buffer), offset_(0) {
+      : buffer_(buffer) {
    symbol_bits_ = detail::SymbolBits(num_symbols);
  }

--- a/src/common/config.h
+++ b/src/common/config.h
@@ -29,8 +29,8 @@ class ConfigParser {
   * \brief Constructor for INI-style configuration parser
   * \param path path to configuration file
   */
-  explicit ConfigParser(const std::string& path)
-      : path_(path),
+  explicit ConfigParser(const std::string path)
+      : path_(std::move(path)),
      line_comment_regex_("^#"),
      key_regex_(R"rx(^([^#"'=\r\n\t ]+)[\t ]*=)rx"),
      key_regex_escaped_(R"rx(^(["'])([^"'=\r\n]+)\1[\t ]*=)rx"),
@@ -58,12 +58,12 @@ class ConfigParser {
  std::string NormalizeConfigEOL(std::string const& config_str) {
    std::string result;
    std::stringstream ss(config_str);
-    for (size_t i = 0; i < config_str.size(); ++i) {
-      if (config_str[i] == '\r') {
+    for (auto c : config_str) {
+      if (c == '\r') {
        result.push_back('\n');
        continue;
      }
-      result.push_back(config_str[i]);
+      result.push_back(c);
    }
    return result;
  }
--- a/src/common/device_helpers.cu
+++ b/src/common/device_helpers.cu
@@ -37,7 +37,7 @@ void AllReducer::Init(int _device_ordinal) {
 #ifdef XGBOOST_USE_NCCL
  LOG(DEBUG) << "Running nccl init on: " << __CUDACC_VER_MAJOR__ << "." << __CUDACC_VER_MINOR__;

-  device_ordinal = _device_ordinal;
+  device_ordinal_ = _device_ordinal;
  int32_t const rank = rabit::GetRank();

 #if __CUDACC_VER_MAJOR__ > 9
@@ -46,7 +46,7 @@ void AllReducer::Init(int _device_ordinal) {
  std::vector<uint64_t> uuids(world * kUuidLength, 0);
  auto s_uuid = xgboost::common::Span<uint64_t>{uuids.data(), uuids.size()};
  auto s_this_uuid = s_uuid.subspan(rank * kUuidLength, kUuidLength);
-  GetCudaUUID(world, rank, device_ordinal, s_this_uuid);
+  GetCudaUUID(world, rank, device_ordinal_, s_this_uuid);

  // No allgather yet.
  rabit::Allreduce<rabit::op::Sum, uint64_t>(uuids.data(), uuids.size());
@@ -66,10 +66,10 @@ void AllReducer::Init(int _device_ordinal) {
      << "device is not supported";
 #endif  // __CUDACC_VER_MAJOR__ > 9

-  id = GetUniqueId();
-  dh::safe_cuda(cudaSetDevice(device_ordinal));
-  dh::safe_nccl(ncclCommInitRank(&comm, rabit::GetWorldSize(), id, rank));
-  safe_cuda(cudaStreamCreate(&stream));
+  id_ = GetUniqueId();
+  dh::safe_cuda(cudaSetDevice(device_ordinal_));
+  dh::safe_nccl(ncclCommInitRank(&comm_, rabit::GetWorldSize(), id_, rank));
+  safe_cuda(cudaStreamCreate(&stream_));
  initialised_ = true;
 #else
  if (rabit::IsDistributed()) {
@@ -81,8 +81,8 @@ void AllReducer::Init(int _device_ordinal) {
 AllReducer::~AllReducer() {
 #ifdef XGBOOST_USE_NCCL
  if (initialised_) {
-    dh::safe_cuda(cudaStreamDestroy(stream));
-    ncclCommDestroy(comm);
+    dh::safe_cuda(cudaStreamDestroy(stream_));
+    ncclCommDestroy(comm_);
  }
  if (xgboost::ConsoleLogger::ShouldLog(xgboost::ConsoleLogger::LV::kDebug)) {
    LOG(CONSOLE) << "======== NCCL Statistics========";
--- a/src/common/device_helpers.cuh
+++ b/src/common/device_helpers.cuh
@@ -35,10 +35,10 @@
 #include "../common/io.h"
 #endif

-#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
+#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 || defined(__clang__)

 #else  // In device code and CUDA < 600
-XGBOOST_DEVICE __forceinline__ double atomicAdd(double* address, double val) {
+__device__ __forceinline__ double atomicAdd(double* address, double val) {  // NOLINT
  unsigned long long int* address_as_ull =
      (unsigned long long int*)address;                   // NOLINT
  unsigned long long int old = *address_as_ull, assumed;  // NOLINT
@@ -141,7 +141,8 @@ inline void CheckComputeCapability() {
 }

 DEV_INLINE void AtomicOrByte(unsigned int* __restrict__ buffer, size_t ibyte, unsigned char b) {
-  atomicOr(&buffer[ibyte / sizeof(unsigned int)], (unsigned int)b << (ibyte % (sizeof(unsigned int)) * 8));
+  atomicOr(&buffer[ibyte / sizeof(unsigned int)],
+           static_cast<unsigned int>(b) << (ibyte % (sizeof(unsigned int)) * 8));
 }

 namespace internal {
@@ -174,7 +175,7 @@ CountNumItemsImpl(bool left, const T * __restrict__ items, uint32_t n, T v,
  return left ? items_begin - items : items + n - items_begin;
 }

-}
+}  // namespace internal

 /*!
 * \brief Find the strict upper bound for an element in a sorted array
@@ -291,9 +292,9 @@ class LaunchKernel {
  dim3 blocks_;

 public:
-  LaunchKernel(uint32_t _grids, uint32_t _blk, size_t _shmem=0, cudaStream_t _s=0) :
+  LaunchKernel(uint32_t _grids, uint32_t _blk, size_t _shmem=0, cudaStream_t _s=nullptr) :
      grids_{_grids, 1, 1}, blocks_{_blk, 1, 1}, shmem_size_{_shmem}, stream_{_s} {}
-  LaunchKernel(dim3 _grids, dim3 _blk, size_t _shmem=0, cudaStream_t _s=0) :
+  LaunchKernel(dim3 _grids, dim3 _blk, size_t _shmem=0, cudaStream_t _s=nullptr) :
      grids_{_grids}, blocks_{_blk}, shmem_size_{_shmem}, stream_{_s} {}

  template <typename K, typename... Args>
@@ -359,16 +360,18 @@ class MemoryLogger {

 public:
  void RegisterAllocation(void *ptr, size_t n) {
-    if (!xgboost::ConsoleLogger::ShouldLog(xgboost::ConsoleLogger::LV::kDebug))
+    if (!xgboost::ConsoleLogger::ShouldLog(xgboost::ConsoleLogger::LV::kDebug)) {
      return;
+    }
    std::lock_guard<std::mutex> guard(mutex_);
    int current_device;
    safe_cuda(cudaGetDevice(&current_device));
    stats_.RegisterAllocation(ptr, n);
  }
  void RegisterDeallocation(void *ptr, size_t n) {
-    if (!xgboost::ConsoleLogger::ShouldLog(xgboost::ConsoleLogger::LV::kDebug))
+    if (!xgboost::ConsoleLogger::ShouldLog(xgboost::ConsoleLogger::LV::kDebug)) {
      return;
+    }
    std::lock_guard<std::mutex> guard(mutex_);
    int current_device;
    safe_cuda(cudaGetDevice(&current_device));
@@ -384,8 +387,9 @@ public:
  }

  void Log() {
-    if (!xgboost::ConsoleLogger::ShouldLog(xgboost::ConsoleLogger::LV::kDebug))
+    if (!xgboost::ConsoleLogger::ShouldLog(xgboost::ConsoleLogger::LV::kDebug)) {
      return;
+    }
    std::lock_guard<std::mutex> guard(mutex_);
    int current_device;
    safe_cuda(cudaGetDevice(&current_device));
@@ -396,7 +400,7 @@ public:
    LOG(CONSOLE) << "Number of allocations: " << stats_.num_allocations;
  }
 };
-};
+}  // namespace detail

 inline detail::MemoryLogger &GlobalMemoryLogger() {
  static detail::MemoryLogger memory_logger;
@@ -413,27 +417,27 @@ inline void DebugSyncDevice(std::string file="", int32_t line = -1) {
  safe_cuda(cudaGetLastError());
 }

-namespace detail{
+namespace detail {
 /**
 * \brief Default memory allocator, uses cudaMalloc/Free and logs allocations if verbose.
 */
 template <class T>
 struct XGBDefaultDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
-  using super_t = thrust::device_malloc_allocator<T>;
-  using pointer = thrust::device_ptr<T>;
+  using SuperT = thrust::device_malloc_allocator<T>;
+  using pointer = thrust::device_ptr<T>;  // NOLINT
  template<typename U>
-  struct rebind
+  struct rebind  // NOLINT
  {
-    typedef XGBDefaultDeviceAllocatorImpl<U> other;
+    using other = XGBDefaultDeviceAllocatorImpl<U>;  // NOLINT
  };
-  pointer allocate(size_t n) {
-    pointer ptr = super_t::allocate(n);
+  pointer allocate(size_t n) {  // NOLINT
+    pointer ptr = SuperT::allocate(n);
    GlobalMemoryLogger().RegisterAllocation(ptr.get(), n * sizeof(T));
    return ptr;
  }
-  void deallocate(pointer ptr, size_t n) {
+  void deallocate(pointer ptr, size_t n) {  // NOLINT
    GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T));
-    return super_t::deallocate(ptr, n);
+    return SuperT::deallocate(ptr, n);
  }
 };

@@ -442,11 +446,11 @@ struct XGBDefaultDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
 */
 template <class T>
 struct XGBCachingDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
-  using pointer = thrust::device_ptr<T>;
+  using pointer = thrust::device_ptr<T>;  // NOLINT
  template<typename U>
-  struct rebind
+  struct rebind  // NOLINT
  {
-    typedef XGBCachingDeviceAllocatorImpl<U> other;
+    using other = XGBCachingDeviceAllocatorImpl<U>;  // NOLINT
  };
   cub::CachingDeviceAllocator& GetGlobalCachingAllocator ()
   {
@@ -455,7 +459,7 @@ struct XGBCachingDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
     static cub::CachingDeviceAllocator *allocator = new cub::CachingDeviceAllocator(2, 9, 29);
     return *allocator;
   }
-   pointer allocate(size_t n) {
+   pointer allocate(size_t n) {  // NOLINT
     T *ptr;
     GetGlobalCachingAllocator().DeviceAllocate(reinterpret_cast<void **>(&ptr),
                                                n * sizeof(T));
@@ -463,17 +467,17 @@ struct XGBCachingDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
     GlobalMemoryLogger().RegisterAllocation(thrust_ptr.get(), n * sizeof(T));
     return thrust_ptr;
   }
-   void deallocate(pointer ptr, size_t n) {
+   void deallocate(pointer ptr, size_t n) {  // NOLINT
     GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T));
     GetGlobalCachingAllocator().DeviceFree(ptr.get());
   }
  __host__ __device__
-    void construct(T *)
+    void construct(T *)  // NOLINT
  {
    // no-op
  }
 };
-};
+}  // namespace detail

 // Declare xgboost allocators
 // Replacement of allocator with custom backend should occur here
@@ -486,9 +490,9 @@ template <typename T>
 using XGBCachingDeviceAllocator = detail::XGBCachingDeviceAllocatorImpl<T>;
 /** \brief Specialisation of thrust device vector using custom allocator. */
 template <typename T>
-using device_vector = thrust::device_vector<T,  XGBDeviceAllocator<T>>;
+using device_vector = thrust::device_vector<T,  XGBDeviceAllocator<T>>;  // NOLINT
 template <typename T>
-using caching_device_vector = thrust::device_vector<T,  XGBCachingDeviceAllocator<T>>;
+using caching_device_vector = thrust::device_vector<T,  XGBCachingDeviceAllocator<T>>;  // NOLINT

 /**
 * \brief A double buffer, useful for algorithms like sort.
@@ -517,7 +521,7 @@ class DoubleBuffer {
    return xgboost::common::Span<T>{buff.Current(), Size()};
  }

-  T *other() { return buff.Alternate(); }
+  T *Other() { return buff.Alternate(); }
 };

 /**
@@ -688,7 +692,9 @@ class BulkAllocator {

  template <typename... Args>
  void Allocate(int device_idx, Args... args) {
-    if (device_idx_ == -1) device_idx_ = device_idx;
+    if (device_idx_ == -1) {
+      device_idx_ = device_idx;
+    }
    else CHECK(device_idx_ == device_idx);
    size_t size = GetSizeBytes(args...);

@@ -728,13 +734,13 @@ struct PinnedMemory {

 // Keep track of cub library device allocation
 struct CubMemory {
-  void *d_temp_storage;
-  size_t temp_storage_bytes;
+  void *d_temp_storage { nullptr };
+  size_t temp_storage_bytes { 0 };

  // Thrust
  using value_type = char;  // NOLINT

-  CubMemory() : d_temp_storage(nullptr), temp_storage_bytes(0) {}
+  CubMemory() = default;

  ~CubMemory() { Free(); }

@@ -818,7 +824,7 @@ __global__ void LbsKernel(CoordinateT *d_coordinates,
  cub::CountingInputIterator<OffsetT> tile_element_indices(tile_start_coord.y);
  CoordinateT thread_start_coord;

-  typedef typename std::iterator_traits<SegmentIterT>::value_type SegmentT;
+  using SegmentT = typename std::iterator_traits<SegmentIterT>::value_type;
  __shared__ struct {
    SegmentT tile_segment_end_offsets[TILE_SIZE + 1];
    SegmentT output_segment[TILE_SIZE];
@@ -862,7 +868,7 @@ template <typename FunctionT, typename SegmentIterT, typename OffsetT>
 void SparseTransformLbs(int device_idx, dh::CubMemory *temp_memory,
                        OffsetT count, SegmentIterT segments,
                        OffsetT num_segments, FunctionT f) {
-  typedef typename cub::CubVector<OffsetT, 2>::Type CoordinateT;
+  using CoordinateT = typename cub::CubVector<OffsetT, 2>::Type;
  dh::safe_cuda(cudaSetDevice(device_idx));
  const int BLOCK_THREADS = 256;
  const int ITEMS_PER_THREAD = 1;
@@ -961,13 +967,13 @@ void SegmentedSort(dh::CubMemory *tmp_mem, dh::DoubleBuffer<T1> *keys,
 * @param nVals number of elements in the input array
 */
 template <typename T>
-void SumReduction(dh::CubMemory &tmp_mem, xgboost::common::Span<T> in, xgboost::common::Span<T> out,
+void SumReduction(dh::CubMemory* tmp_mem, xgboost::common::Span<T> in, xgboost::common::Span<T> out,
                  int nVals) {
  size_t tmpSize;
  dh::safe_cuda(
      cub::DeviceReduce::Sum(NULL, tmpSize, in.data(), out.data(), nVals));
-  tmp_mem.LazyAllocate(tmpSize);
-  dh::safe_cuda(cub::DeviceReduce::Sum(tmp_mem.d_temp_storage, tmpSize,
+  tmp_mem->LazyAllocate(tmpSize);
+  dh::safe_cuda(cub::DeviceReduce::Sum(tmp_mem->d_temp_storage, tmpSize,
                                       in.data(), out.data(), nVals));
 }

@@ -980,20 +986,20 @@ void SumReduction(dh::CubMemory &tmp_mem, xgboost::common::Span<T> in, xgboost::
 */
 template <typename T>
 typename std::iterator_traits<T>::value_type SumReduction(
-    dh::CubMemory &tmp_mem, T in, int nVals) {
+    dh::CubMemory* tmp_mem, T in, int nVals) {
  using ValueT = typename std::iterator_traits<T>::value_type;
  size_t tmpSize {0};
  ValueT *dummy_out = nullptr;
  dh::safe_cuda(cub::DeviceReduce::Sum(nullptr, tmpSize, in, dummy_out, nVals));
  // Allocate small extra memory for the return value
-  tmp_mem.LazyAllocate(tmpSize + sizeof(ValueT));
-  auto ptr = reinterpret_cast<ValueT *>(tmp_mem.d_temp_storage) + 1;
+  tmp_mem->LazyAllocate(tmpSize + sizeof(ValueT));
+  auto ptr = reinterpret_cast<ValueT *>(tmp_mem->d_temp_storage) + 1;
  dh::safe_cuda(cub::DeviceReduce::Sum(
      reinterpret_cast<void *>(ptr), tmpSize, in,
-      reinterpret_cast<ValueT *>(tmp_mem.d_temp_storage),
+      reinterpret_cast<ValueT *>(tmp_mem->d_temp_storage),
      nVals));
  ValueT sum;
-  dh::safe_cuda(cudaMemcpy(&sum, tmp_mem.d_temp_storage, sizeof(ValueT),
+  dh::safe_cuda(cudaMemcpy(&sum, tmp_mem->d_temp_storage, sizeof(ValueT),
                           cudaMemcpyDeviceToHost));
  return sum;
 }
@@ -1079,20 +1085,19 @@ class SaveCudaContext {
 * this is a dummy class that will error if used with more than one GPU.
 */
 class AllReducer {
-  bool initialised_;
-  size_t allreduce_bytes_;  // Keep statistics of the number of bytes communicated
-  size_t allreduce_calls_;  // Keep statistics of the number of reduce calls
-  std::vector<size_t> host_data;  // Used for all reduce on host
+  bool initialised_ {false};
+  size_t allreduce_bytes_ {0};  // Keep statistics of the number of bytes communicated
+  size_t allreduce_calls_ {0};  // Keep statistics of the number of reduce calls
+  std::vector<size_t> host_data_;  // Used for all reduce on host
 #ifdef XGBOOST_USE_NCCL
-  ncclComm_t comm;
-  cudaStream_t stream;
-  int device_ordinal;
-  ncclUniqueId id;
+  ncclComm_t comm_;
+  cudaStream_t stream_;
+  int device_ordinal_;
+  ncclUniqueId id_;
 #endif

 public:
-  AllReducer() : initialised_(false), allreduce_bytes_(0),
-                 allreduce_calls_(0) {}
+  AllReducer() = default;

  /**
   * \brief Initialise with the desired device ordinal for this communication
@@ -1116,8 +1121,8 @@ class AllReducer {
  void AllReduceSum(const double *sendbuff, double *recvbuff, int count) {
 #ifdef XGBOOST_USE_NCCL
    CHECK(initialised_);
-    dh::safe_cuda(cudaSetDevice(device_ordinal));
-    dh::safe_nccl(ncclAllReduce(sendbuff, recvbuff, count, ncclDouble, ncclSum, comm, stream));
+    dh::safe_cuda(cudaSetDevice(device_ordinal_));
+    dh::safe_nccl(ncclAllReduce(sendbuff, recvbuff, count, ncclDouble, ncclSum, comm_, stream_));
    allreduce_bytes_ += count * sizeof(double);
    allreduce_calls_ += 1;
 #endif
@@ -1135,8 +1140,8 @@ class AllReducer {
  void AllReduceSum(const float *sendbuff, float *recvbuff, int count) {
 #ifdef XGBOOST_USE_NCCL
    CHECK(initialised_);
-    dh::safe_cuda(cudaSetDevice(device_ordinal));
-    dh::safe_nccl(ncclAllReduce(sendbuff, recvbuff, count, ncclFloat, ncclSum, comm, stream));
+    dh::safe_cuda(cudaSetDevice(device_ordinal_));
+    dh::safe_nccl(ncclAllReduce(sendbuff, recvbuff, count, ncclFloat, ncclSum, comm_, stream_));
    allreduce_bytes_ += count * sizeof(float);
    allreduce_calls_ += 1;
 #endif
@@ -1156,8 +1161,8 @@ class AllReducer {
 #ifdef XGBOOST_USE_NCCL
    CHECK(initialised_);

-    dh::safe_cuda(cudaSetDevice(device_ordinal));
-    dh::safe_nccl(ncclAllReduce(sendbuff, recvbuff, count, ncclInt64, ncclSum, comm, stream));
+    dh::safe_cuda(cudaSetDevice(device_ordinal_));
+    dh::safe_nccl(ncclAllReduce(sendbuff, recvbuff, count, ncclInt64, ncclSum, comm_, stream_));
 #endif
  }

@@ -1168,8 +1173,8 @@ class AllReducer {
   */
  void Synchronize() {
 #ifdef XGBOOST_USE_NCCL
-    dh::safe_cuda(cudaSetDevice(device_ordinal));
-    dh::safe_cuda(cudaStreamSynchronize(stream));
+    dh::safe_cuda(cudaSetDevice(device_ordinal_));
+    dh::safe_cuda(cudaStreamSynchronize(stream_));
 #endif
  };

@@ -1183,15 +1188,15 @@ class AllReducer {
   * \return the Unique ID
   */
  ncclUniqueId GetUniqueId() {
-    static const int RootRank = 0;
+    static const int kRootRank = 0;
    ncclUniqueId id;
-    if (rabit::GetRank() == RootRank) {
+    if (rabit::GetRank() == kRootRank) {
      dh::safe_nccl(ncclGetUniqueId(&id));
    }
    rabit::Broadcast(
-      (void*)&id,
-      (size_t)sizeof(ncclUniqueId),
-      (int)RootRank);
+        static_cast<void*>(&id),
+        sizeof(ncclUniqueId),
+        static_cast<int>(kRootRank));
    return id;
  }
 #endif
@@ -1202,18 +1207,18 @@ class AllReducer {
  void HostMaxAllReduce(std::vector<size_t> *p_data) {
 #ifdef XGBOOST_USE_NCCL
    auto &data = *p_data;
-    // Wait in case some other thread is accessing host_data
+    // Wait in case some other thread is accessing host_data_
 #pragma omp barrier
    // Reset shared buffer
 #pragma omp single
    {
-      host_data.resize(data.size());
-      std::fill(host_data.begin(), host_data.end(), size_t(0));
+      host_data_.resize(data.size());
+      std::fill(host_data_.begin(), host_data_.end(), size_t(0));
    }
    // Threads update shared array
    for (auto i = 0ull; i < data.size(); i++) {
 #pragma omp critical
-      { host_data[i] = std::max(host_data[i], data[i]); }
+      { host_data_[i] = std::max(host_data_[i], data[i]); }
    }
    // Wait until all threads are finished
 #pragma omp barrier
@@ -1221,15 +1226,15 @@ class AllReducer {
    // One thread performs all reduce across distributed nodes
 #pragma omp master
    {
-      rabit::Allreduce<rabit::op::Max, size_t>(host_data.data(),
-                                               host_data.size());
+      rabit::Allreduce<rabit::op::Max, size_t>(host_data_.data(),
+                                               host_data_.size());
    }

 #pragma omp barrier

    // Threads can now read back all reduced values
    for (auto i = 0ull; i < data.size(); i++) {
-      data[i] = host_data[i];
+      data[i] = host_data_[i];
    }
 #endif
  }
@@ -1264,12 +1269,12 @@ thrust::device_ptr<T> tend(xgboost::HostDeviceVector<T>& vector) {  // // NOLINT
 }

 template <typename T>
-thrust::device_ptr<T const> tcbegin(xgboost::HostDeviceVector<T> const& vector) {
+thrust::device_ptr<T const> tcbegin(xgboost::HostDeviceVector<T> const& vector) {  // NOLINT
  return thrust::device_ptr<T const>(vector.ConstDevicePointer());
 }

 template <typename T>
-thrust::device_ptr<T const> tcend(xgboost::HostDeviceVector<T> const& vector) {
+thrust::device_ptr<T const> tcend(xgboost::HostDeviceVector<T> const& vector) {  // NOLINT
  return tcbegin(vector) + vector.Size();
 }

@@ -1279,17 +1284,17 @@ thrust::device_ptr<T> tbegin(xgboost::common::Span<T>& span) {  // NOLINT
 }

 template <typename T>
-thrust::device_ptr<T> tend(xgboost::common::Span<T>& span) {  // // NOLINT
+thrust::device_ptr<T> tend(xgboost::common::Span<T>& span) {  // NOLINT
  return tbegin(span) + span.size();
 }

 template <typename T>
-thrust::device_ptr<T const> tcbegin(xgboost::common::Span<T> const& span) {
+thrust::device_ptr<T const> tcbegin(xgboost::common::Span<T> const& span) {  // NOLINT
  return thrust::device_ptr<T const>(span.data());
 }

 template <typename T>
-thrust::device_ptr<T const> tcend(xgboost::common::Span<T> const& span) {
+thrust::device_ptr<T const> tcend(xgboost::common::Span<T> const& span) {  // NOLINT
  return tcbegin(span) + span.size();
 }

@@ -1465,9 +1470,9 @@ class SegmentSorter {
 template <typename FunctionT>
 class LauncherItr {
 public:
-  int idx;
+  int idx { 0 };
  FunctionT f;
-  XGBOOST_DEVICE LauncherItr() : idx(0) {}
+  XGBOOST_DEVICE LauncherItr() : idx(0) {}  // NOLINT
  XGBOOST_DEVICE LauncherItr(int idx, FunctionT f) : idx(idx), f(f) {}
  XGBOOST_DEVICE LauncherItr &operator=(int output) {
    f(idx, output);
@@ -1493,7 +1498,7 @@ public:
 using value_type = void;       // NOLINT
 using pointer = value_type *;  // NOLINT
 using reference = LauncherItr<FunctionT>;  // NOLINT
- using iterator_category = typename thrust::detail::iterator_facade_category<
+ using iterator_category = typename thrust::detail::iterator_facade_category<  // NOLINT
     thrust::any_system_tag, thrust::random_access_traversal_tag, value_type,
     reference>::type;  // NOLINT
 private:
--- a/src/common/hist_util.cc
+++ b/src/common/hist_util.cc
@@ -1,5 +1,5 @@
 /*!
- * Copyright 2017-2019 by Contributors
+ * Copyright 2017-2020 by Contributors
 * \file hist_util.cc
 */
 #include <dmlc/timer.h>
@@ -11,10 +11,10 @@

 #include "xgboost/base.h"
 #include "../common/common.h"
-#include "./hist_util.h"
-#include "./random.h"
-#include "./column_matrix.h"
-#include "./quantile.h"
+#include "hist_util.h"
+#include "random.h"
+#include "column_matrix.h"
+#include "quantile.h"
 #include "./../tree/updater_quantile_hist.h"

 #if defined(XGBOOST_MM_PREFETCH_PRESENT)
@@ -99,16 +99,16 @@ void GHistIndexMatrix::SetIndexDataForSparse(common::Span<uint32_t> index_data_s
 void GHistIndexMatrix::ResizeIndex(const size_t rbegin, const SparsePage& batch,
                                   const size_t n_offsets, const size_t n_index,
                                   const bool isDense) {
-  if ((max_num_bins_ - 1 <= static_cast<int>(std::numeric_limits<uint8_t>::max())) && isDense) {
-    index.setBinTypeSize(UINT8_BINS_TYPE_SIZE);
-    index.resize((sizeof(uint8_t)) * n_index);
-  } else if ((max_num_bins_ - 1 > static_cast<int>(std::numeric_limits<uint8_t>::max())  &&
-    max_num_bins_ - 1 <= static_cast<int>(std::numeric_limits<uint16_t>::max())) && isDense) {
-    index.setBinTypeSize(UINT16_BINS_TYPE_SIZE);
-    index.resize((sizeof(uint16_t)) * n_index);
+  if ((max_num_bins - 1 <= static_cast<int>(std::numeric_limits<uint8_t>::max())) && isDense) {
+    index.SetBinTypeSize(kUint8BinsTypeSize);
+    index.Resize((sizeof(uint8_t)) * n_index);
+  } else if ((max_num_bins - 1 > static_cast<int>(std::numeric_limits<uint8_t>::max())  &&
+    max_num_bins - 1 <= static_cast<int>(std::numeric_limits<uint16_t>::max())) && isDense) {
+    index.SetBinTypeSize(kUint16BinsTypeSize);
+    index.Resize((sizeof(uint16_t)) * n_index);
  } else {
-    index.setBinTypeSize(UINT32_BINS_TYPE_SIZE);
-    index.resize((sizeof(uint32_t)) * n_index);
+    index.SetBinTypeSize(kUint32BinsTypeSize);
+    index.Resize((sizeof(uint32_t)) * n_index);
  }
 }

@@ -449,15 +449,15 @@ void DenseCuts::Init
  monitor_.Stop(__func__);
 }

-void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) {
-  cut.Build(p_fmat, max_num_bins);
-  max_num_bins_ = max_num_bins;
+void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_bins) {
+  cut.Build(p_fmat, max_bins);
+  max_num_bins = max_bins;
  const int32_t nthread = omp_get_max_threads();
  const uint32_t nbins = cut.Ptrs().back();
  hit_count.resize(nbins, 0);
  hit_count_tloc_.resize(nthread * nbins, 0);

-  this->p_fmat_ = p_fmat;
+  this->p_fmat = p_fmat;
  size_t new_size = 1;
  for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
    new_size += batch.Size();
@@ -524,24 +524,24 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) {

    uint32_t* offsets = nullptr;
    if (isDense) {
-      index.resizeOffset(n_offsets);
-      offsets = index.offset();
+      index.ResizeOffset(n_offsets);
+      offsets = index.Offset();
      for (size_t i = 0; i < n_offsets; ++i) {
        offsets[i] = cut.Ptrs()[i];
      }
    }

    if (isDense) {
-      BinTypeSize curent_bin_size = index.getBinTypeSize();
+      BinTypeSize curent_bin_size = index.GetBinTypeSize();
      common::Span<const uint32_t> offsets_span = {offsets, n_offsets};
-      if (curent_bin_size == UINT8_BINS_TYPE_SIZE) {
+      if (curent_bin_size == kUint8BinsTypeSize) {
          common::Span<uint8_t> index_data_span = {index.data<uint8_t>(), n_index};
          SetIndexDataForDense(index_data_span, batch_threads, batch, rbegin, offsets_span, nbins);
-      } else if (curent_bin_size == UINT16_BINS_TYPE_SIZE) {
+      } else if (curent_bin_size == kUint16BinsTypeSize) {
          common::Span<uint16_t> index_data_span = {index.data<uint16_t>(), n_index};
          SetIndexDataForDense(index_data_span, batch_threads, batch, rbegin, offsets_span, nbins);
      } else {
-          CHECK_EQ(curent_bin_size, UINT32_BINS_TYPE_SIZE);
+          CHECK_EQ(curent_bin_size, kUint32BinsTypeSize);
          common::Span<uint32_t> index_data_span = {index.data<uint32_t>(), n_index};
          SetIndexDataForDense(index_data_span, batch_threads, batch, rbegin, offsets_span, nbins);
      }
@@ -689,16 +689,16 @@ FindGroups(const std::vector<unsigned>& feature_list,
    }

    BinTypeSize bins_type_size = colmat.GetTypeSize();
-    if (bins_type_size == UINT8_BINS_TYPE_SIZE) {
+    if (bins_type_size == kUint8BinsTypeSize) {
        const auto column = colmat.GetColumn<uint8_t>(fid);
        SetGroup(fid, *(column.get()), max_conflict_cnt, search_groups,
                 &group_conflict_cnt, &conflict_marks, &groups, &group_nnz, cur_fid_nnz, nrow);
-    } else if (bins_type_size == UINT16_BINS_TYPE_SIZE) {
+    } else if (bins_type_size == kUint16BinsTypeSize) {
        const auto column = colmat.GetColumn<uint16_t>(fid);
        SetGroup(fid, *(column.get()), max_conflict_cnt, search_groups,
                 &group_conflict_cnt, &conflict_marks, &groups, &group_nnz, cur_fid_nnz, nrow);
    } else {
-        CHECK_EQ(bins_type_size, UINT32_BINS_TYPE_SIZE);
+        CHECK_EQ(bins_type_size, kUint32BinsTypeSize);
        const auto column = colmat.GetColumn<uint32_t>(fid);
        SetGroup(fid, *(column.get()), max_conflict_cnt, search_groups,
                 &group_conflict_cnt, &conflict_marks, &groups, &group_nnz, cur_fid_nnz, nrow);
@@ -909,7 +909,7 @@ void BuildHistDenseKernel(const std::vector<GradientPair>& gpair,
  const size_t* rid = row_indices.begin;
  const float* pgh = reinterpret_cast<const float*>(gpair.data());
  const BinIdxType* gradient_index = gmat.index.data<BinIdxType>();
-  const uint32_t* offsets = gmat.index.offset();
+  const uint32_t* offsets = gmat.index.Offset();
  FPType* hist_data = reinterpret_cast<FPType*>(hist.data());
  const uint32_t two {2};  // Each element from 'gpair' and 'hist' contains
                           // 2 FP values: gradient and hessian.
@@ -1000,16 +1000,16 @@ void BuildHistKernel(const std::vector<GradientPair>& gpair,
                     const RowSetCollection::Elem row_indices,
                     const GHistIndexMatrix& gmat, const bool isDense, GHistRow hist) {
  const bool is_dense = row_indices.Size() && isDense;
-  switch (gmat.index.getBinTypeSize()) {
-    case UINT8_BINS_TYPE_SIZE:
+  switch (gmat.index.GetBinTypeSize()) {
+    case kUint8BinsTypeSize:
      BuildHistDispatchKernel<FPType, do_prefetch, uint8_t>(gpair, row_indices,
                                                            gmat, hist, is_dense);
      break;
-    case UINT16_BINS_TYPE_SIZE:
+    case kUint16BinsTypeSize:
      BuildHistDispatchKernel<FPType, do_prefetch, uint16_t>(gpair, row_indices,
                                                             gmat, hist, is_dense);
      break;
-    case UINT32_BINS_TYPE_SIZE:
+    case kUint32BinsTypeSize:
      BuildHistDispatchKernel<FPType, do_prefetch, uint32_t>(gpair, row_indices,
                                                             gmat, hist, is_dense);
      break;
--- a/src/common/hist_util.h
+++ b/src/common/hist_util.h
@@ -45,9 +45,10 @@ class HistogramCuts {
  common::Monitor monitor_;

 public:
-  HostDeviceVector<bst_float> cut_values_;
-  HostDeviceVector<uint32_t> cut_ptrs_;
-  HostDeviceVector<float> min_vals_;  // storing minimum value in a sketch set.
+  HostDeviceVector<bst_float> cut_values_;  // NOLINT
+  HostDeviceVector<uint32_t> cut_ptrs_;     // NOLINT
+  // storing minimum value in a sketch set.
+  HostDeviceVector<float> min_vals_;  // NOLINT

  HistogramCuts();
  HistogramCuts(HistogramCuts const& that) {
@@ -211,14 +212,14 @@ HistogramCuts AdapterDeviceSketch(AdapterT* adapter, int num_bins,


 enum BinTypeSize {
-  UINT8_BINS_TYPE_SIZE  = 1,
-  UINT16_BINS_TYPE_SIZE = 2,
-  UINT32_BINS_TYPE_SIZE = 4
+  kUint8BinsTypeSize  = 1,
+  kUint16BinsTypeSize = 2,
+  kUint32BinsTypeSize = 4
 };

 struct Index {
-  Index(): binTypeSize_(UINT8_BINS_TYPE_SIZE), p_(1), offset_ptr_(nullptr) {
-    setBinTypeSize(binTypeSize_);
+  Index() {
+    SetBinTypeSize(binTypeSize_);
  }
  Index(const Index& i) = delete;
  Index& operator=(Index i) = delete;
@@ -231,75 +232,75 @@ struct Index {
      return func_(data_ptr_, i);
    }
  }
-  void setBinTypeSize(BinTypeSize binTypeSize) {
+  void SetBinTypeSize(BinTypeSize binTypeSize) {
    binTypeSize_ = binTypeSize;
    switch (binTypeSize) {
-      case UINT8_BINS_TYPE_SIZE:
-        func_ = &getValueFromUint8;
+      case kUint8BinsTypeSize:
+        func_ = &GetValueFromUint8;
        break;
-      case UINT16_BINS_TYPE_SIZE:
-        func_ = &getValueFromUint16;
+      case kUint16BinsTypeSize:
+        func_ = &GetValueFromUint16;
        break;
-      case UINT32_BINS_TYPE_SIZE:
-        func_ = &getValueFromUint32;
+      case kUint32BinsTypeSize:
+        func_ = &GetValueFromUint32;
        break;
      default:
-        CHECK(binTypeSize == UINT8_BINS_TYPE_SIZE  ||
-              binTypeSize == UINT16_BINS_TYPE_SIZE ||
-              binTypeSize == UINT32_BINS_TYPE_SIZE);
+        CHECK(binTypeSize == kUint8BinsTypeSize  ||
+              binTypeSize == kUint16BinsTypeSize ||
+              binTypeSize == kUint32BinsTypeSize);
    }
  }
-  BinTypeSize getBinTypeSize() const {
+  BinTypeSize GetBinTypeSize() const {
    return binTypeSize_;
  }
  template<typename T>
-  T* data() const {
+  T* data() const {  // NOLINT
    return static_cast<T*>(data_ptr_);
  }
-  uint32_t* offset() const {
+  uint32_t* Offset() const {
    return offset_ptr_;
  }
-  size_t offsetSize() const {
+  size_t OffsetSize() const {
    return offset_.size();
  }
-  size_t size() const {
+  size_t Size() const {
    return data_.size() / (binTypeSize_);
  }
-  void resize(const size_t nBytesData) {
+  void Resize(const size_t nBytesData) {
    data_.resize(nBytesData);
    data_ptr_ = reinterpret_cast<void*>(data_.data());
  }
-  void resizeOffset(const size_t nDisps) {
+  void ResizeOffset(const size_t nDisps) {
    offset_.resize(nDisps);
    offset_ptr_ = offset_.data();
    p_ = nDisps;
  }
-  std::vector<uint8_t>::const_iterator begin() const {
+  std::vector<uint8_t>::const_iterator begin() const {  // NOLINT
    return data_.begin();
  }
-  std::vector<uint8_t>::const_iterator end() const {
+  std::vector<uint8_t>::const_iterator end() const {  // NOLINT
    return data_.end();
  }

 private:
-  static uint32_t getValueFromUint8(void *t, size_t i) {
+  static uint32_t GetValueFromUint8(void *t, size_t i) {
    return reinterpret_cast<uint8_t*>(t)[i];
  }
-  static uint32_t getValueFromUint16(void* t, size_t i) {
+  static uint32_t GetValueFromUint16(void* t, size_t i) {
    return reinterpret_cast<uint16_t*>(t)[i];
  }
-  static uint32_t getValueFromUint32(void* t, size_t i) {
+  static uint32_t GetValueFromUint32(void* t, size_t i) {
    return reinterpret_cast<uint32_t*>(t)[i];
  }

-  typedef uint32_t (*Func)(void*, size_t);
+  using Func = uint32_t (*)(void*, size_t);

  std::vector<uint8_t> data_;
  std::vector<uint32_t> offset_;  // size of this field is equal to number of features
  void* data_ptr_;
-  BinTypeSize binTypeSize_;
-  size_t p_;
-  uint32_t* offset_ptr_;
+  BinTypeSize binTypeSize_ {kUint8BinsTypeSize};
+  size_t p_ {1};
+  uint32_t* offset_ptr_ {nullptr};
  Func func_;
 };

@@ -319,8 +320,8 @@ struct GHistIndexMatrix {
  std::vector<size_t> hit_count;
  /*! \brief The corresponding cuts */
  HistogramCuts cut;
-  DMatrix* p_fmat_;
-  size_t max_num_bins_;
+  DMatrix* p_fmat;
+  size_t max_num_bins;
  // Create a global histogram matrix, given cut
  void Init(DMatrix* p_fmat, int max_num_bins);

@@ -668,7 +669,7 @@ class ParallelGHistBuilder {
 */
 class GHistBuilder {
 public:
-  GHistBuilder() : nthread_{0}, nbins_{0} {}
+  GHistBuilder() = default;
  GHistBuilder(size_t nthread, uint32_t nbins) : nthread_{nthread}, nbins_{nbins} {}

  // construct a histogram via histogram aggregation
@@ -691,9 +692,9 @@ class GHistBuilder {

 private:
  /*! \brief number of threads for parallel computation */
-  size_t nthread_;
+  size_t nthread_ { 0 };
  /*! \brief number of all bins over all features */
-  uint32_t nbins_;
+  uint32_t nbins_ { 0 };
 };


--- a/src/common/json.cc
+++ b/src/common/json.cc
@@ -20,7 +20,7 @@ void JsonWriter::Save(Json json) {

 void JsonWriter::Visit(JsonArray const* arr) {
  this->Write("[");
-  auto const& vec = arr->getArray();
+  auto const& vec = arr->GetArray();
  size_t size = vec.size();
  for (size_t i = 0; i < size; ++i) {
    auto const& value = vec[i];
@@ -36,9 +36,9 @@ void JsonWriter::Visit(JsonObject const* obj) {
  this->NewLine();

  size_t i = 0;
-  size_t size = obj->getObject().size();
+  size_t size = obj->GetObject().size();

-  for (auto& value : obj->getObject()) {
+  for (auto& value : obj->GetObject()) {
    this->Write("\"" + value.first + "\":");
    this->Save(value.second);

@@ -54,14 +54,14 @@ void JsonWriter::Visit(JsonObject const* obj) {
 }

 void JsonWriter::Visit(JsonNumber const* num) {
-  convertor_ << num->getNumber();
+  convertor_ << num->GetNumber();
  auto const& str = convertor_.str();
  this->Write(StringView{str.c_str(), str.size()});
  convertor_.str("");
 }

 void JsonWriter::Visit(JsonInteger const* num) {
-  convertor_ << num->getInteger();
+  convertor_ << num->GetInteger();
  auto const& str = convertor_.str();
  this->Write(StringView{str.c_str(), str.size()});
  convertor_.str("");
@@ -74,7 +74,7 @@ void JsonWriter::Visit(JsonNull const* null) {
 void JsonWriter::Visit(JsonString const* str) {
  std::string buffer;
  buffer += '"';
-  auto const& string = str->getString();
+  auto const& string = str->GetString();
  for (size_t i = 0; i < string.length(); i++) {
    const char ch = string[i];
    if (ch == '\\') {
@@ -109,7 +109,7 @@ void JsonWriter::Visit(JsonString const* str) {
 }

 void JsonWriter::Visit(JsonBoolean const* boolean) {
-  bool val = boolean->getBoolean();
+  bool val = boolean->GetBoolean();
  if (val) {
    this->Write(u8"true");
  } else {
@@ -120,13 +120,13 @@ void JsonWriter::Visit(JsonBoolean const* boolean) {
 // Value
 std::string Value::TypeStr() const {
  switch (kind_) {
-    case ValueKind::String:  return "String";  break;
-    case ValueKind::Number:  return "Number";  break;
-    case ValueKind::Object:  return "Object";  break;
-    case ValueKind::Array:   return "Array";   break;
-    case ValueKind::Boolean: return "Boolean"; break;
-    case ValueKind::Null:    return "Null";    break;
-    case ValueKind::Integer: return "Integer"; break;
+    case ValueKind::kString:  return "String";  break;
+    case ValueKind::kNumber:  return "Number";  break;
+    case ValueKind::kObject:  return "Object";  break;
+    case ValueKind::kArray:   return "Array";   break;
+    case ValueKind::kBoolean: return "Boolean"; break;
+    case ValueKind::kNull:    return "Null";    break;
+    case ValueKind::kInteger: return "Integer"; break;
  }
  return "";
 }
@@ -140,10 +140,10 @@ Json& DummyJsonObject() {

 // Json Object
 JsonObject::JsonObject(JsonObject && that) :
-    Value(ValueKind::Object), object_{std::move(that.object_)} {}
+    Value(ValueKind::kObject), object_{std::move(that.object_)} {}

 JsonObject::JsonObject(std::map<std::string, Json>&& object)
-    : Value(ValueKind::Object), object_{std::move(object)} {}
+    : Value(ValueKind::kObject), object_{std::move(object)} {}

 Json& JsonObject::operator[](std::string const & key) {
  return object_[key];
@@ -157,12 +157,12 @@ Json& JsonObject::operator[](int ind) {

 bool JsonObject::operator==(Value const& rhs) const {
  if (!IsA<JsonObject>(&rhs)) { return false; }
-  return object_ == Cast<JsonObject const>(&rhs)->getObject();
+  return object_ == Cast<JsonObject const>(&rhs)->GetObject();
 }

 Value& JsonObject::operator=(Value const &rhs) {
  JsonObject const* casted = Cast<JsonObject const>(&rhs);
-  object_ = casted->getObject();
+  object_ = casted->GetObject();
  return *this;
 }

@@ -186,12 +186,12 @@ Json& JsonString::operator[](int ind) {

 bool JsonString::operator==(Value const& rhs) const {
  if (!IsA<JsonString>(&rhs)) { return false; }
-  return Cast<JsonString const>(&rhs)->getString() == str_;
+  return Cast<JsonString const>(&rhs)->GetString() == str_;
 }

 Value & JsonString::operator=(Value const &rhs) {
  JsonString const* casted = Cast<JsonString const>(&rhs);
-  str_ = casted->getString();
+  str_ = casted->GetString();
  return *this;
 }

@@ -202,7 +202,7 @@ void JsonString::Save(JsonWriter* writer) {

 // Json Array
 JsonArray::JsonArray(JsonArray && that) :
-    Value(ValueKind::Array), vec_{std::move(that.vec_)} {}
+    Value(ValueKind::kArray), vec_{std::move(that.vec_)} {}

 Json& JsonArray::operator[](std::string const & key) {
  LOG(FATAL) << "Object of type "
@@ -216,13 +216,13 @@ Json& JsonArray::operator[](int ind) {

 bool JsonArray::operator==(Value const& rhs) const {
  if (!IsA<JsonArray>(&rhs)) { return false; }
-  auto& arr = Cast<JsonArray const>(&rhs)->getArray();
+  auto& arr = Cast<JsonArray const>(&rhs)->GetArray();
  return std::equal(arr.cbegin(), arr.cend(), vec_.cbegin());
 }

 Value & JsonArray::operator=(Value const &rhs) {
  JsonArray const* casted = Cast<JsonArray const>(&rhs);
-  vec_ = casted->getArray();
+  vec_ = casted->GetArray();
  return *this;
 }

@@ -245,12 +245,12 @@ Json& JsonNumber::operator[](int ind) {

 bool JsonNumber::operator==(Value const& rhs) const {
  if (!IsA<JsonNumber>(&rhs)) { return false; }
-  return std::abs(number_ - Cast<JsonNumber const>(&rhs)->getNumber()) < kRtEps;
+  return std::abs(number_ - Cast<JsonNumber const>(&rhs)->GetNumber()) < kRtEps;
 }

 Value & JsonNumber::operator=(Value const &rhs) {
  JsonNumber const* casted = Cast<JsonNumber const>(&rhs);
-  number_ = casted->getNumber();
+  number_ = casted->GetNumber();
  return *this;
 }

@@ -273,12 +273,12 @@ Json& JsonInteger::operator[](int ind) {

 bool JsonInteger::operator==(Value const& rhs) const {
  if (!IsA<JsonInteger>(&rhs)) { return false; }
-  return integer_ == Cast<JsonInteger const>(&rhs)->getInteger();
+  return integer_ == Cast<JsonInteger const>(&rhs)->GetInteger();
 }

 Value & JsonInteger::operator=(Value const &rhs) {
  JsonInteger const* casted = Cast<JsonInteger const>(&rhs);
-  integer_ = casted->getInteger();
+  integer_ = casted->GetInteger();
  return *this;
 }

@@ -328,12 +328,12 @@ Json& JsonBoolean::operator[](int ind) {

 bool JsonBoolean::operator==(Value const& rhs) const {
  if (!IsA<JsonBoolean>(&rhs)) { return false; }
-  return boolean_ == Cast<JsonBoolean const>(&rhs)->getBoolean();
+  return boolean_ == Cast<JsonBoolean const>(&rhs)->GetBoolean();
 }

 Value & JsonBoolean::operator=(Value const &rhs) {
  JsonBoolean const* casted = Cast<JsonBoolean const>(&rhs);
-  boolean_ = casted->getBoolean();
+  boolean_ = casted->GetBoolean();
  return *this;
 }

--- a/src/common/observer.h
+++ b/src/common/observer.h
@@ -36,19 +36,19 @@ namespace xgboost {
 */
 class TrainingObserver {
 #if defined(XGBOOST_USE_DEBUG_OUTPUT)
-  bool constexpr static observe_ {true};
+  bool constexpr static kObserve {true};
 #else
-  bool constexpr static observe_ {false};
+  bool constexpr static kObserve {false};
 #endif  // defined(XGBOOST_USE_DEBUG_OUTPUT)

 public:
  void Update(int32_t iter) const {
-    if (XGBOOST_EXPECT(!observe_, true)) { return; }
+    if (XGBOOST_EXPECT(!kObserve, true)) { return; }
    OBSERVER_PRINT << "Iter: " << iter << OBSERVER_ENDL;
  }
  /*\brief Observe tree. */
  void Observe(RegTree const& tree) {
-    if (XGBOOST_EXPECT(!observe_, true)) { return; }
+    if (XGBOOST_EXPECT(!kObserve, true)) { return; }
    OBSERVER_PRINT << "Tree:" << OBSERVER_ENDL;
    Json j_tree {Object()};
    tree.SaveModel(&j_tree);
@@ -58,7 +58,7 @@ class TrainingObserver {
  }
  /*\brief Observe tree. */
  void Observe(RegTree const* p_tree) {
-    if (XGBOOST_EXPECT(!observe_, true)) { return; }
+    if (XGBOOST_EXPECT(!kObserve, true)) { return; }
    auto const& tree = *p_tree;
    this->Observe(tree);
  }
@@ -66,7 +66,7 @@ class TrainingObserver {
  template <typename T>
  void Observe(std::vector<T> const& h_vec, std::string name,
               size_t n = std::numeric_limits<std::size_t>::max()) const {
-    if (XGBOOST_EXPECT(!observe_, true)) { return; }
+    if (XGBOOST_EXPECT(!kObserve, true)) { return; }
    OBSERVER_PRINT << "Procedure: " << name << OBSERVER_ENDL;

    for (size_t i = 0; i < h_vec.size(); ++i) {
@@ -84,14 +84,14 @@ class TrainingObserver {
  template <typename T>
  void Observe(HostDeviceVector<T> const& vec, std::string name,
               size_t n = std::numeric_limits<std::size_t>::max()) const {
-    if (XGBOOST_EXPECT(!observe_, true)) { return; }
+    if (XGBOOST_EXPECT(!kObserve, true)) { return; }
    auto const& h_vec = vec.HostVector();
    this->Observe(h_vec, name, n);
  }
  template <typename T>
  void Observe(HostDeviceVector<T>* vec, std::string name,
               size_t n = std::numeric_limits<std::size_t>::max()) const {
-    if (XGBOOST_EXPECT(!observe_, true)) { return; }
+    if (XGBOOST_EXPECT(!kObserve, true)) { return; }
    this->Observe(*vec, name, n);
  }

@@ -100,14 +100,14 @@ class TrainingObserver {
            typename std::enable_if<
              std::is_base_of<XGBoostParameter<Parameter>, Parameter>::value>::type* = nullptr>
  void Observe(const Parameter &p, std::string name) const {
-    if (XGBOOST_EXPECT(!observe_, true)) { return; }
+    if (XGBOOST_EXPECT(!kObserve, true)) { return; }

    Json obj {toJson(p)};
    OBSERVER_PRINT << "Parameter: " << name << ":\n" << obj << OBSERVER_ENDL;
  }
  /*\brief Observe parameters provided by users. */
  void Observe(Args const& args) const {
-    if (XGBOOST_EXPECT(!observe_, true)) { return; }
+    if (XGBOOST_EXPECT(!kObserve, true)) { return; }

    for (auto kv : args) {
      OBSERVER_PRINT << kv.first << ": " << kv.second << OBSERVER_NEWLINE;
--- a/src/common/probability_distribution.h
+++ b/src/common/probability_distribution.h
@@ -59,6 +59,7 @@ class ProbabilityDistribution {
   * \return Reference to the newly created probability distribution object
   */
  static ProbabilityDistribution* Create(ProbabilityDistributionType dist);
+  virtual ~ProbabilityDistribution() = default;
 };

 /*! \brief The (standard) normal distribution */
--- a/src/common/row_set.h
+++ b/src/common/row_set.h
@@ -89,6 +89,8 @@ class RowSetCollection {
    const size_t* end = dmlc::BeginPtr(row_indices_) + row_indices_.size();
    elem_of_each_node_.emplace_back(Elem(begin, end, 0));
  }
+
+  std::vector<size_t>* Data() { return &row_indices_; }
  // split rowset into two
  inline void AddSplit(unsigned node_id,
                       unsigned left_node_id,
@@ -116,10 +118,9 @@ class RowSetCollection {
    elem_of_each_node_[node_id] = Elem(nullptr, nullptr, -1);
  }

+ private:
  // stores the row indexes in the set
  std::vector<size_t> row_indices_;
-
- private:
  // vector: node_id -> elements
  std::vector<Elem> elem_of_each_node_;
 };
@@ -151,12 +152,12 @@ class PartitionBuilder {

  common::Span<size_t> GetLeftBuffer(int nid, size_t begin, size_t end) {
    const size_t task_idx = GetTaskIdx(nid, begin);
-    return { mem_blocks_.at(task_idx).left(), end - begin };
+    return { mem_blocks_.at(task_idx).Left(), end - begin };
  }

  common::Span<size_t> GetRightBuffer(int nid, size_t begin, size_t end) {
    const size_t task_idx = GetTaskIdx(nid, begin);
-    return { mem_blocks_.at(task_idx).right(), end - begin };
+    return { mem_blocks_.at(task_idx).Right(), end - begin };
  }

  void SetNLeftElems(int nid, size_t begin, size_t end, size_t n_left) {
@@ -202,8 +203,8 @@ class PartitionBuilder {
    size_t* left_result  = rows_indexes + mem_blocks_[task_idx].n_offset_left;
    size_t* right_result = rows_indexes + mem_blocks_[task_idx].n_offset_right;

-    const size_t* left = mem_blocks_[task_idx].left();
-    const size_t* right = mem_blocks_[task_idx].right();
+    const size_t* left = mem_blocks_[task_idx].Left();
+    const size_t* right = mem_blocks_[task_idx].Right();

    std::copy_n(left, mem_blocks_[task_idx].n_left, left_result);
    std::copy_n(right, mem_blocks_[task_idx].n_right, right_result);
@@ -221,11 +222,11 @@ class PartitionBuilder {
    size_t n_offset_left;
    size_t n_offset_right;

-    size_t* left() {
+    size_t* Left() {
      return &left_data_[0];
    }

-    size_t* right() {
+    size_t* Right() {
      return &right_data_[0];
    }
   private:
--- a/src/common/timer.cc
+++ b/src/common/timer.cc
@@ -15,13 +15,13 @@ namespace common {

 void Monitor::Start(std::string const &name) {
  if (ConsoleLogger::ShouldLog(ConsoleLogger::LV::kDebug)) {
-    statistics_map[name].timer.Start();
+    statistics_map_[name].timer.Start();
  }
 }

 void Monitor::Stop(const std::string &name) {
  if (ConsoleLogger::ShouldLog(ConsoleLogger::LV::kDebug)) {
-    auto &stats = statistics_map[name];
+    auto &stats = statistics_map_[name];
    stats.timer.Stop();
    stats.count++;
  }
@@ -40,7 +40,7 @@ std::vector<Monitor::StatMap> Monitor::CollectFromOtherRanks() const {
  j_statistic["statistic"] = Object();

  auto& statistic = j_statistic["statistic"];
-  for (auto const& kv : statistics_map) {
+  for (auto const& kv : statistics_map_) {
    statistic[kv.first] = Object();
    auto& j_pair = statistic[kv.first];
    j_pair["count"] = Integer(kv.second.count);
@@ -105,7 +105,7 @@ void Monitor::Print() const {
    auto world = this->CollectFromOtherRanks();
    // rank zero is in charge of printing
    if (rabit::GetRank() == 0) {
-      LOG(CONSOLE) << "======== Monitor: " << label << " ========";
+      LOG(CONSOLE) << "======== Monitor: " << label_ << " ========";
      for (size_t i = 0; i < world.size(); ++i) {
        LOG(CONSOLE) << "From rank: " << i << ": " << std::endl;
        auto const& statistic = world[i];
@@ -114,12 +114,12 @@ void Monitor::Print() const {
    }
  } else {
    StatMap stat_map;
-    for (auto const& kv : statistics_map) {
+    for (auto const& kv : statistics_map_) {
      stat_map[kv.first] = std::make_pair(
          kv.second.count, std::chrono::duration_cast<std::chrono::microseconds>(
              kv.second.timer.elapsed).count());
    }
-    LOG(CONSOLE) << "======== Monitor: " << label << " ========";
+    LOG(CONSOLE) << "======== Monitor: " << label_ << " ========";
    this->PrintStatistics(stat_map);
  }
 }
--- a/src/common/timer.cu
+++ b/src/common/timer.cu
@@ -16,7 +16,7 @@ namespace common {

 void Monitor::StartCuda(const std::string& name) {
  if (ConsoleLogger::ShouldLog(ConsoleLogger::LV::kDebug)) {
-    auto &stats = statistics_map[name];
+    auto &stats = statistics_map_[name];
    stats.timer.Start();
 #if defined(XGBOOST_USE_NVTX)
    stats.nvtx_id = nvtxRangeStartA(name.c_str());
@@ -26,7 +26,7 @@ void Monitor::StartCuda(const std::string& name) {

 void Monitor::StopCuda(const std::string& name) {
  if (ConsoleLogger::ShouldLog(ConsoleLogger::LV::kDebug)) {
-    auto &stats = statistics_map[name];
+    auto &stats = statistics_map_[name];
    stats.timer.Stop();
    stats.count++;
 #if defined(XGBOOST_USE_NVTX)
--- a/src/common/timer.h
+++ b/src/common/timer.h
@@ -55,16 +55,16 @@ struct Monitor {
  // from left to right, <name <count, elapsed>>
  using StatMap = std::map<std::string, std::pair<size_t, size_t>>;

-  std::string label = "";
-  std::map<std::string, Statistics> statistics_map;
-  Timer self_timer;
+  std::string label_ = "";
+  std::map<std::string, Statistics> statistics_map_;
+  Timer self_timer_;

  /*! \brief Collect time statistics across all workers. */
  std::vector<StatMap> CollectFromOtherRanks() const;
  void PrintStatistics(StatMap const& statistics) const;

 public:
-  Monitor() { self_timer.Start(); }
+  Monitor() { self_timer_.Start(); }
  /*\brief Print statistics info during destruction.
   *
   * Please note that this may not work, as with distributed frameworks like Dask, the
@@ -73,13 +73,13 @@ struct Monitor {
   */
  ~Monitor() {
    this->Print();
-    self_timer.Stop();
+    self_timer_.Stop();
  }

  /*! \brief Print all the statistics. */
  void Print() const;

-  void Init(std::string label) { this->label = label; }
+  void Init(std::string label) { this->label_ = label; }
  void Start(const std::string &name);
  void Stop(const std::string &name);
  void StartCuda(const std::string &name);
--- a/src/common/transform.h
+++ b/src/common/transform.h
@@ -133,8 +133,9 @@ class Transform {
    template <typename std::enable_if<CompiledWithCuda>::type* = nullptr,
              typename... HDV>
    void LaunchCUDA(Functor _func, HDV*... _vectors) const {
-      if (shard_)
+      if (shard_) {
        UnpackShard(device_, _vectors...);
+      }

      size_t range_size = *range_.end() - *range_.begin();