Clang-tidy static analysis (#3222)

* Clang-tidy static analysis * Modernise checks * Google coding standard checks * Identifier renaming according to Google style
2018-04-19 18:57:13 +12:00
parent 3242b0a378
commit ccf80703ef
97 changed files with 3407 additions and 3354 deletions
--- a/src/common/avx_helpers.h
+++ b/src/common/avx_helpers.h
@@ -68,10 +68,10 @@ inline Float8 round(const Float8& x) {

 // Overload std::max/min
 namespace std {
-inline avx::Float8 max(const avx::Float8& a, const avx::Float8& b) {
+inline avx::Float8 max(const avx::Float8& a, const avx::Float8& b) {  // NOLINT
  return avx::Float8(_mm256_max_ps(a.x, b.x));
 }
-inline avx::Float8 min(const avx::Float8& a, const avx::Float8& b) {
+inline avx::Float8 min(const avx::Float8& a, const avx::Float8& b) {  // NOLINT
  return avx::Float8(_mm256_min_ps(a.x, b.x));
 }
 }  // namespace std
@@ -172,7 +172,7 @@ inline Float8 Sigmoid(Float8 x) {
 }

 // Store 8 gradient pairs given vectors containing gradient and Hessian
-inline void StoreGpair(xgboost::bst_gpair* dst, const Float8& grad,
+inline void StoreGpair(xgboost::GradientPair* dst, const Float8& grad,
                       const Float8& hess) {
  float* ptr = reinterpret_cast<float*>(dst);
  __m256 gpair_low = _mm256_unpacklo_ps(grad.x, hess.x);
@@ -190,11 +190,11 @@ namespace avx {
 * \brief Fallback implementation not using AVX.
 */

-struct Float8 {
+struct Float8 {  // NOLINT
  float x[8];
  explicit Float8(const float& val) {
-    for (int i = 0; i < 8; i++) {
-      x[i] = val;
+    for (float & i : x) {
+      i = val;
    }
  }
  explicit Float8(const float* vec) {
@@ -202,7 +202,7 @@ struct Float8 {
      x[i] = vec[i];
    }
  }
-  Float8() {}
+  Float8() = default;
  Float8& operator+=(const Float8& rhs) {
    for (int i = 0; i < 8; i++) {
      x[i] += rhs.x[i];
@@ -228,7 +228,7 @@ struct Float8 {
    return *this;
  }
  void Print() {
-    float* f = reinterpret_cast<float*>(&x);
+    auto* f = reinterpret_cast<float*>(&x);
    printf("%f %f %f %f %f %f %f %f\n", f[0], f[1], f[2], f[3], f[4], f[5],
           f[6], f[7]);
  }
@@ -252,10 +252,10 @@ inline Float8 operator/(Float8 lhs, const Float8& rhs) {
 }

 // Store 8 gradient pairs given vectors containing gradient and Hessian
-inline void StoreGpair(xgboost::bst_gpair* dst, const Float8& grad,
+inline void StoreGpair(xgboost::GradientPair* dst, const Float8& grad,
                       const Float8& hess) {
  for (int i = 0; i < 8; i++) {
-    dst[i] = xgboost::bst_gpair(grad.x[i], hess.x[i]);
+    dst[i] = xgboost::GradientPair(grad.x[i], hess.x[i]);
  }
 }

@@ -269,14 +269,14 @@ inline Float8 Sigmoid(Float8 x) {
 }  // namespace avx

 namespace std {
-inline avx::Float8 max(const avx::Float8& a, const avx::Float8& b) {
+inline avx::Float8 max(const avx::Float8& a, const avx::Float8& b) {  // NOLINT
  avx::Float8 max;
  for (int i = 0; i < 8; i++) {
    max.x[i] = std::max(a.x[i], b.x[i]);
  }
  return max;
 }
-inline avx::Float8 min(const avx::Float8& a, const avx::Float8& b) {
+inline avx::Float8 min(const avx::Float8& a, const avx::Float8& b) {  // NOLINT
  avx::Float8 min;
  for (int i = 0; i < 8; i++) {
    min.x[i] = std::min(a.x[i], b.x[i]);
--- a/src/common/bitmap.h
+++ b/src/common/bitmap.h
@@ -42,7 +42,7 @@ struct BitMap {
  inline void InitFromBool(const std::vector<int>& vec) {
    this->Resize(vec.size());
    // parallel over the full cases
-    bst_omp_uint nsize = static_cast<bst_omp_uint>(vec.size() / 32);
+    auto nsize = static_cast<bst_omp_uint>(vec.size() / 32);
    #pragma omp parallel for schedule(static)
    for (bst_omp_uint i = 0; i < nsize; ++i) {
      uint32_t res = 0;
--- a/src/common/column_matrix.h
+++ b/src/common/column_matrix.h
@@ -8,21 +8,27 @@
 #ifndef XGBOOST_COMMON_COLUMN_MATRIX_H_
 #define XGBOOST_COMMON_COLUMN_MATRIX_H_

-#define XGBOOST_TYPE_SWITCH(dtype, OP)        \
-switch (dtype) {                \
-  case xgboost::common::uint32 : {           \
-    typedef uint32_t DType;         \
-    OP; break;              \
-  }               \
-  case xgboost::common::uint16 : {           \
-    typedef uint16_t DType;         \
-    OP; break;              \
-  }               \
-  case xgboost::common::uint8 : {            \
-    typedef uint8_t DType;          \
-    OP; break;              \
-    default: LOG(FATAL) << "don't recognize type flag" << dtype;  \
-  }               \
+#define XGBOOST_TYPE_SWITCH(dtype, OP)                      \
+  \
+switch(dtype) {                                             \
+    case xgboost::common::uint32: {                         \
+      using DType = uint32_t;                               \
+      OP;                                                   \
+      break;                                                \
+    }                                                       \
+    case xgboost::common::uint16: {                         \
+      using DType = uint16_t;                               \
+      OP;                                                   \
+      break;                                                \
+    }                                                       \
+    case xgboost::common::uint8: {                          \
+      using DType = uint8_t;                                \
+      OP;                                                   \
+      break;                                                \
+      default:                                              \
+        LOG(FATAL) << "don't recognize type flag" << dtype; \
+    }                                                       \
+  \
 }

 #include <type_traits>
@@ -31,11 +37,12 @@ switch (dtype) {                \
 #include "hist_util.h"
 #include "../tree/fast_hist_param.h"

-using xgboost::tree::FastHistParam;

 namespace xgboost {
 namespace common {

+using tree::FastHistParam;
+
 /*! \brief indicator of data type used for storing bin id's in a column. */
 enum DataType {
  uint8 = 1,
@@ -78,7 +85,7 @@ class ColumnMatrix {
       slot of internal buffer. */
    packing_factor_ = sizeof(uint32_t) / static_cast<size_t>(this->dtype);

-    const bst_uint nfeature = static_cast<bst_uint>(gmat.cut->row_ptr.size() - 1);
+    const auto nfeature = static_cast<bst_uint>(gmat.cut->row_ptr.size() - 1);
    const size_t nrow = gmat.row_ptr.size() - 1;

    // identify type of each column
--- a/src/common/common.cc
+++ b/src/common/common.cc
@@ -14,7 +14,7 @@ struct RandomThreadLocalEntry {
  GlobalRandomEngine engine;
 };

-typedef dmlc::ThreadLocalStore<RandomThreadLocalEntry> RandomThreadLocalStore;
+using RandomThreadLocalStore = dmlc::ThreadLocalStore<RandomThreadLocalEntry>;

 GlobalRandomEngine& GlobalRandom() {
  return RandomThreadLocalStore::Get()->engine;
--- a/src/common/compressed_iterator.h
+++ b/src/common/compressed_iterator.h
@@ -11,20 +11,20 @@
 namespace xgboost {
 namespace common {

-typedef unsigned char compressed_byte_t;
+using CompressedByteT = unsigned char;

 namespace detail {
-inline void SetBit(compressed_byte_t *byte, int bit_idx) {
+inline void SetBit(CompressedByteT *byte, int bit_idx) {
  *byte |= 1 << bit_idx;
 }
 template <typename T>
 inline T CheckBit(const T &byte, int bit_idx) {
  return byte & (1 << bit_idx);
 }
-inline void ClearBit(compressed_byte_t *byte, int bit_idx) {
+inline void ClearBit(CompressedByteT *byte, int bit_idx) {
  *byte &= ~(1 << bit_idx);
 }
-static const int padding = 4;  // Assign padding so we can read slightly off
+static const int kPadding = 4;  // Assign padding so we can read slightly off
                               // the beginning of the array

 // The number of bits required to represent a given unsigned range
@@ -76,16 +76,16 @@ class CompressedBufferWriter {
    size_t compressed_size = static_cast<size_t>(std::ceil(
        static_cast<double>(detail::SymbolBits(num_symbols) * num_elements) /
        bits_per_byte));
-    return compressed_size + detail::padding;
+    return compressed_size + detail::kPadding;
  }

  template <typename T>
-  void WriteSymbol(compressed_byte_t *buffer, T symbol, size_t offset) {
+  void WriteSymbol(CompressedByteT *buffer, T symbol, size_t offset) {
    const int bits_per_byte = 8;

    for (size_t i = 0; i < symbol_bits_; i++) {
      size_t byte_idx = ((offset + 1) * symbol_bits_ - (i + 1)) / bits_per_byte;
-      byte_idx += detail::padding;
+      byte_idx += detail::kPadding;
      size_t bit_idx =
          ((bits_per_byte + i) - ((offset + 1) * symbol_bits_)) % bits_per_byte;

@@ -96,20 +96,20 @@ class CompressedBufferWriter {
      }
    }
  }
-  template <typename iter_t>
-  void Write(compressed_byte_t *buffer, iter_t input_begin, iter_t input_end) {
+  template <typename IterT>
+  void Write(CompressedByteT *buffer, IterT input_begin, IterT input_end) {
    uint64_t tmp = 0;
    size_t stored_bits = 0;
    const size_t max_stored_bits = 64 - symbol_bits_;
-    size_t buffer_position = detail::padding;
+    size_t buffer_position = detail::kPadding;
    const size_t num_symbols = input_end - input_begin;
    for (size_t i = 0; i < num_symbols; i++) {
-      typename std::iterator_traits<iter_t>::value_type symbol = input_begin[i];
+      typename std::iterator_traits<IterT>::value_type symbol = input_begin[i];
      if (stored_bits > max_stored_bits) {
        // Eject only full bytes
        size_t tmp_bytes = stored_bits / 8;
        for (size_t j = 0; j < tmp_bytes; j++) {
-          buffer[buffer_position] = static_cast<compressed_byte_t>(
+          buffer[buffer_position] = static_cast<CompressedByteT>(
              tmp >> (stored_bits - (j + 1) * 8));
          buffer_position++;
        }
@@ -129,10 +129,10 @@ class CompressedBufferWriter {
      int shift_bits = static_cast<int>(stored_bits) - (j + 1) * 8;
      if (shift_bits >= 0) {
        buffer[buffer_position] =
-            static_cast<compressed_byte_t>(tmp >> shift_bits);
+            static_cast<CompressedByteT>(tmp >> shift_bits);
      } else {
        buffer[buffer_position] =
-            static_cast<compressed_byte_t>(tmp << std::abs(shift_bits));
+            static_cast<CompressedByteT>(tmp << std::abs(shift_bits));
      }
      buffer_position++;
    }
@@ -153,23 +153,21 @@ template <typename T>

 class CompressedIterator {
 public:
-  typedef CompressedIterator<T> self_type;  ///< My own type
-  typedef ptrdiff_t
-      difference_type;   ///< Type to express the result of subtracting
-                         /// one iterator from another
-  typedef T value_type;  ///< The type of the element the iterator can point to
-  typedef value_type *pointer;   ///< The type of a pointer to an element the
-                                 /// iterator can point to
-  typedef value_type reference;  ///< The type of a reference to an element the
-                                 /// iterator can point to
+  // Type definitions for thrust
+  typedef CompressedIterator<T> self_type;  // NOLINT
+  typedef ptrdiff_t difference_type;        // NOLINT
+  typedef T value_type;                     // NOLINT
+  typedef value_type *pointer;              // NOLINT
+  typedef value_type reference;             // NOLINT
+
 private:
-  compressed_byte_t *buffer_;
+  CompressedByteT *buffer_;
  size_t symbol_bits_;
  size_t offset_;

 public:
  CompressedIterator() : buffer_(nullptr), symbol_bits_(0), offset_(0) {}
-  CompressedIterator(compressed_byte_t *buffer, int num_symbols)
+  CompressedIterator(CompressedByteT *buffer, int num_symbols)
      : buffer_(buffer), offset_(0) {
    symbol_bits_ = detail::SymbolBits(num_symbols);
  }
@@ -178,7 +176,7 @@ class CompressedIterator {
    const int bits_per_byte = 8;
    size_t start_bit_idx = ((offset_ + 1) * symbol_bits_ - 1);
    size_t start_byte_idx = start_bit_idx / bits_per_byte;
-    start_byte_idx += detail::padding;
+    start_byte_idx += detail::kPadding;

    // Read 5 bytes - the maximum we will need
    uint64_t tmp = static_cast<uint64_t>(buffer_[start_byte_idx - 4]) << 32 |
--- a/src/common/config.h
+++ b/src/common/config.h
@@ -24,33 +24,33 @@ class ConfigReaderBase {
   * \brief get current name, called after Next returns true
   * \return current parameter name
   */
-  inline const char *name(void) const {
-    return s_name.c_str();
+  inline const char *Name() const {
+    return s_name_.c_str();
  }
  /*!
   * \brief get current value, called after Next returns true
   * \return current parameter value
   */
-  inline const char *val(void) const {
-    return s_val.c_str();
+  inline const char *Val() const {
+    return s_val_.c_str();
  }
  /*!
   * \brief move iterator to next position
   * \return true if there is value in next position
   */
-  inline bool Next(void) {
+  inline bool Next() {
    while (!this->IsEnd()) {
-      GetNextToken(&s_name);
-      if (s_name == "=") return false;
-      if (GetNextToken(&s_buf) || s_buf != "=")  return false;
-      if (GetNextToken(&s_val) || s_val == "=")  return false;
+      GetNextToken(&s_name_);
+      if (s_name_ == "=") return false;
+      if (GetNextToken(&s_buf_) || s_buf_ != "=")  return false;
+      if (GetNextToken(&s_val_) || s_val_ == "=")  return false;
      return true;
    }
    return false;
  }
  // called before usage
-  inline void Init(void) {
-    ch_buf = this->GetChar();
+  inline void Init() {
+    ch_buf_ = this->GetChar();
  }

 protected:
@@ -58,38 +58,38 @@ class ConfigReaderBase {
   * \brief to be implemented by subclass,
   * get next token, return EOF if end of file
   */
-  virtual char GetChar(void) = 0;
+  virtual char GetChar() = 0;
  /*! \brief to be implemented by child, check if end of stream */
-  virtual bool IsEnd(void) = 0;
+  virtual bool IsEnd() = 0;

 private:
-  char ch_buf;
-  std::string s_name, s_val, s_buf;
+  char ch_buf_;
+  std::string s_name_, s_val_, s_buf_;

-  inline void SkipLine(void) {
+  inline void SkipLine() {
    do {
-      ch_buf = this->GetChar();
-    } while (ch_buf != EOF && ch_buf != '\n' && ch_buf != '\r');
+      ch_buf_ = this->GetChar();
+    } while (ch_buf_ != EOF && ch_buf_ != '\n' && ch_buf_ != '\r');
  }

  inline void ParseStr(std::string *tok) {
-    while ((ch_buf = this->GetChar()) != EOF) {
-      switch (ch_buf) {
+    while ((ch_buf_ = this->GetChar()) != EOF) {
+      switch (ch_buf_) {
        case '\\': *tok += this->GetChar(); break;
        case '\"': return;
        case '\r':
        case '\n': LOG(FATAL)<< "ConfigReader: unterminated string";
-        default: *tok += ch_buf;
+        default: *tok += ch_buf_;
      }
    }
    LOG(FATAL) << "ConfigReader: unterminated string";
  }
  inline void ParseStrML(std::string *tok) {
-    while ((ch_buf = this->GetChar()) != EOF) {
-      switch (ch_buf) {
+    while ((ch_buf_ = this->GetChar()) != EOF) {
+      switch (ch_buf_) {
        case '\\': *tok += this->GetChar(); break;
        case '\'': return;
-        default: *tok += ch_buf;
+        default: *tok += ch_buf_;
      }
    }
    LOG(FATAL) << "unterminated string";
@@ -98,24 +98,24 @@ class ConfigReaderBase {
  inline bool GetNextToken(std::string *tok) {
    tok->clear();
    bool new_line = false;
-    while (ch_buf != EOF) {
-      switch (ch_buf) {
+    while (ch_buf_ != EOF) {
+      switch (ch_buf_) {
        case '#' : SkipLine(); new_line = true; break;
        case '\"':
          if (tok->length() == 0) {
-            ParseStr(tok); ch_buf = this->GetChar(); return new_line;
+            ParseStr(tok); ch_buf_ = this->GetChar(); return new_line;
          } else {
            LOG(FATAL) << "ConfigReader: token followed directly by string";
          }
        case '\'':
          if (tok->length() == 0) {
-            ParseStrML(tok); ch_buf = this->GetChar(); return new_line;
+            ParseStrML(tok); ch_buf_ = this->GetChar(); return new_line;
          } else {
            LOG(FATAL) << "ConfigReader: token followed directly by string";
          }
        case '=':
          if (tok->length() == 0) {
-            ch_buf = this->GetChar();
+            ch_buf_ = this->GetChar();
            *tok = '=';
          }
          return new_line;
@@ -124,12 +124,12 @@ class ConfigReaderBase {
          if (tok->length() == 0) new_line = true;
        case '\t':
        case ' ' :
-          ch_buf = this->GetChar();
+          ch_buf_ = this->GetChar();
          if (tok->length() != 0) return new_line;
          break;
        default:
-          *tok += ch_buf;
-          ch_buf = this->GetChar();
+          *tok += ch_buf_;
+          ch_buf_ = this->GetChar();
          break;
      }
    }
@@ -149,19 +149,19 @@ class ConfigStreamReader: public ConfigReaderBase {
   * \brief constructor
   * \param fin istream input stream
   */
-  explicit ConfigStreamReader(std::istream &fin) : fin(fin) {}
+  explicit ConfigStreamReader(std::istream &fin) : fin_(fin) {}

 protected:
-  virtual char GetChar(void) {
-    return fin.get();
+  char GetChar() override {
+    return fin_.get();
  }
  /*! \brief to be implemented by child, check if end of stream */
-  virtual bool IsEnd(void) {
-    return fin.eof();
+  bool IsEnd() override {
+    return fin_.eof();
  }

 private:
-  std::istream &fin;
+  std::istream &fin_;
 };

 /*!
@@ -173,20 +173,20 @@ class ConfigIterator: public ConfigStreamReader {
   * \brief constructor
   * \param fname name of configure file
   */
-  explicit ConfigIterator(const char *fname) : ConfigStreamReader(fi) {
-    fi.open(fname);
-    if (fi.fail()) {
+  explicit ConfigIterator(const char *fname) : ConfigStreamReader(fi_) {
+    fi_.open(fname);
+    if (fi_.fail()) {
      LOG(FATAL) << "cannot open file " << fname;
    }
    ConfigReaderBase::Init();
  }
  /*! \brief destructor */
-  ~ConfigIterator(void) {
-    fi.close();
+  ~ConfigIterator() {
+    fi_.close();
  }

 private:
-  std::ifstream fi;
+  std::ifstream fi_;
 };
 }  // namespace common
 }  // namespace xgboost
--- a/src/common/device_helpers.cuh
+++ b/src/common/device_helpers.cuh
@@ -25,16 +25,16 @@

 namespace dh {

-#define HOST_DEV_INLINE __host__ __device__ __forceinline__
+#define HOST_DEV_INLINE XGBOOST_DEVICE __forceinline__
 #define DEV_INLINE __device__ __forceinline__

 /*
 * Error handling  functions
 */

-#define safe_cuda(ans) throw_on_cuda_error((ans), __FILE__, __LINE__)
+#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)

-inline cudaError_t throw_on_cuda_error(cudaError_t code, const char *file,
+inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file,
                                       int line) {
  if (code != cudaSuccess) {
    std::stringstream ss;
@@ -48,9 +48,9 @@ inline cudaError_t throw_on_cuda_error(cudaError_t code, const char *file,
 }

 #ifdef XGBOOST_USE_NCCL
-#define safe_nccl(ans) throw_on_nccl_error((ans), __FILE__, __LINE__)
+#define safe_nccl(ans) ThrowOnNcclError((ans), __FILE__, __LINE__)

-inline ncclResult_t throw_on_nccl_error(ncclResult_t code, const char *file,
+inline ncclResult_t ThrowOnNcclError(ncclResult_t code, const char *file,
                                        int line) {
  if (code != ncclSuccess) {
    std::stringstream ss;
@@ -64,16 +64,16 @@ inline ncclResult_t throw_on_nccl_error(ncclResult_t code, const char *file,
 #endif

 template <typename T>
-T *raw(thrust::device_vector<T> &v) {  //  NOLINT
+T *Raw(thrust::device_vector<T> &v) {  //  NOLINT
  return raw_pointer_cast(v.data());
 }

 template <typename T>
-const T *raw(const thrust::device_vector<T> &v) {  //  NOLINT
+const T *Raw(const thrust::device_vector<T> &v) {  //  NOLINT
  return raw_pointer_cast(v.data());
 }

-inline int n_visible_devices() {
+inline int NVisibleDevices() {
  int n_visgpus = 0;

  dh::safe_cuda(cudaGetDeviceCount(&n_visgpus));
@@ -81,40 +81,40 @@ inline int n_visible_devices() {
  return n_visgpus;
 }

-inline int n_devices_all(int n_gpus) {
-  int n_devices_visible = dh::n_visible_devices();
+inline int NDevicesAll(int n_gpus) {
+  int n_devices_visible = dh::NVisibleDevices();
  int n_devices = n_gpus < 0 ? n_devices_visible : n_gpus;
  return (n_devices);
 }
-inline int n_devices(int n_gpus, int num_rows) {
-  int n_devices = dh::n_devices_all(n_gpus);
+inline int NDevices(int n_gpus, int num_rows) {
+  int n_devices = dh::NDevicesAll(n_gpus);
  // fix-up device number to be limited by number of rows
  n_devices = n_devices > num_rows ? num_rows : n_devices;
  return (n_devices);
 }

 // if n_devices=-1, then use all visible devices
-inline void synchronize_n_devices(int n_devices, std::vector<int> dList) {
+inline void SynchronizeNDevices(int n_devices, std::vector<int> dList) {
  for (int d_idx = 0; d_idx < n_devices; d_idx++) {
    int device_idx = dList[d_idx];
    safe_cuda(cudaSetDevice(device_idx));
    safe_cuda(cudaDeviceSynchronize());
  }
 }
-inline void synchronize_all() {
-  for (int device_idx = 0; device_idx < n_visible_devices(); device_idx++) {
+inline void SynchronizeAll() {
+  for (int device_idx = 0; device_idx < NVisibleDevices(); device_idx++) {
    safe_cuda(cudaSetDevice(device_idx));
    safe_cuda(cudaDeviceSynchronize());
  }
 }

-inline std::string device_name(int device_idx) {
+inline std::string DeviceName(int device_idx) {
  cudaDeviceProp prop;
  dh::safe_cuda(cudaGetDeviceProperties(&prop, device_idx));
  return std::string(prop.name);
 }

-inline size_t available_memory(int device_idx) {
+inline size_t AvailableMemory(int device_idx) {
  size_t device_free = 0;
  size_t device_total = 0;
  safe_cuda(cudaSetDevice(device_idx));
@@ -130,20 +130,20 @@ inline size_t available_memory(int device_idx) {
 * \param device_idx  Zero-based index of the device.
 */

-inline size_t max_shared_memory(int device_idx) {
+inline size_t MaxSharedMemory(int device_idx) {
  cudaDeviceProp prop;
  dh::safe_cuda(cudaGetDeviceProperties(&prop, device_idx));
  return prop.sharedMemPerBlock;
 }

 // ensure gpu_id is correct, so not dependent upon user knowing details
-inline int get_device_idx(int gpu_id) {
+inline int GetDeviceIdx(int gpu_id) {
  // protect against overrun for gpu_id
-  return (std::abs(gpu_id) + 0) % dh::n_visible_devices();
+  return (std::abs(gpu_id) + 0) % dh::NVisibleDevices();
 }

-inline void check_compute_capability() {
-  int n_devices = n_visible_devices();
+inline void CheckComputeCapability() {
+  int n_devices = NVisibleDevices();
  for (int d_idx = 0; d_idx < n_devices; ++d_idx) {
    cudaDeviceProp prop;
    safe_cuda(cudaGetDeviceProperties(&prop, d_idx));
@@ -159,72 +159,72 @@ inline void check_compute_capability() {
 * Range iterator
 */

-class range {
+class Range {
 public:
-  class iterator {
-    friend class range;
+  class Iterator {
+    friend class Range;

   public:
-    __host__ __device__ int64_t operator*() const { return i_; }
-    __host__ __device__ const iterator &operator++() {
+    XGBOOST_DEVICE int64_t operator*() const { return i_; }
+    XGBOOST_DEVICE const Iterator &operator++() {
      i_ += step_;
      return *this;
    }
-    __host__ __device__ iterator operator++(int) {
-      iterator copy(*this);
+    XGBOOST_DEVICE Iterator operator++(int) {
+      Iterator copy(*this);
      i_ += step_;
      return copy;
    }

-    __host__ __device__ bool operator==(const iterator &other) const {
+    XGBOOST_DEVICE bool operator==(const Iterator &other) const {
      return i_ >= other.i_;
    }
-    __host__ __device__ bool operator!=(const iterator &other) const {
+    XGBOOST_DEVICE bool operator!=(const Iterator &other) const {
      return i_ < other.i_;
    }

-    __host__ __device__ void step(int s) { step_ = s; }
+    XGBOOST_DEVICE void Step(int s) { step_ = s; }

   protected:
-    __host__ __device__ explicit iterator(int64_t start) : i_(start) {}
+    XGBOOST_DEVICE explicit Iterator(int64_t start) : i_(start) {}

   public:
    uint64_t i_;
    int step_ = 1;
  };

-  __host__ __device__ iterator begin() const { return begin_; }
-  __host__ __device__ iterator end() const { return end_; }
-  __host__ __device__ range(int64_t begin, int64_t end)
+  XGBOOST_DEVICE Iterator begin() const { return begin_; }  // NOLINT
+  XGBOOST_DEVICE Iterator end() const { return end_; }      // NOLINT
+  XGBOOST_DEVICE Range(int64_t begin, int64_t end)
      : begin_(begin), end_(end) {}
-  __host__ __device__ void step(int s) { begin_.step(s); }
+  XGBOOST_DEVICE void Step(int s) { begin_.Step(s); }

 private:
-  iterator begin_;
-  iterator end_;
+  Iterator begin_;
+  Iterator end_;
 };

 template <typename T>
-__device__ range grid_stride_range(T begin, T end) {
+__device__ Range GridStrideRange(T begin, T end) {
  begin += blockDim.x * blockIdx.x + threadIdx.x;
-  range r(begin, end);
-  r.step(gridDim.x * blockDim.x);
+  Range r(begin, end);
+  r.Step(gridDim.x * blockDim.x);
  return r;
 }

 template <typename T>
-__device__ range block_stride_range(T begin, T end) {
+__device__ Range BlockStrideRange(T begin, T end) {
  begin += threadIdx.x;
-  range r(begin, end);
-  r.step(blockDim.x);
+  Range r(begin, end);
+  r.Step(blockDim.x);
  return r;
 }

 // Threadblock iterates over range, filling with value. Requires all threads in
 // block to be active.
 template <typename IterT, typename ValueT>
-__device__ void block_fill(IterT begin, size_t n, ValueT value) {
-  for (auto i : block_stride_range(static_cast<size_t>(0), n)) {
+__device__ void BlockFill(IterT begin, size_t n, ValueT value) {
+  for (auto i : BlockStrideRange(static_cast<size_t>(0), n)) {
    begin[i] = value;
  }
 }
@@ -234,34 +234,34 @@ __device__ void block_fill(IterT begin, size_t n, ValueT value) {
 */

 template <typename T1, typename T2>
-T1 div_round_up(const T1 a, const T2 b) {
+T1 DivRoundUp(const T1 a, const T2 b) {
  return static_cast<T1>(ceil(static_cast<double>(a) / b));
 }

 template <typename L>
-__global__ void launch_n_kernel(size_t begin, size_t end, L lambda) {
-  for (auto i : grid_stride_range(begin, end)) {
+__global__ void LaunchNKernel(size_t begin, size_t end, L lambda) {
+  for (auto i : GridStrideRange(begin, end)) {
    lambda(i);
  }
 }
 template <typename L>
-__global__ void launch_n_kernel(int device_idx, size_t begin, size_t end,
+__global__ void LaunchNKernel(int device_idx, size_t begin, size_t end,
                                L lambda) {
-  for (auto i : grid_stride_range(begin, end)) {
+  for (auto i : GridStrideRange(begin, end)) {
    lambda(i, device_idx);
  }
 }

 template <int ITEMS_PER_THREAD = 8, int BLOCK_THREADS = 256, typename L>
-inline void launch_n(int device_idx, size_t n, L lambda) {
+inline void LaunchN(int device_idx, size_t n, L lambda) {
  if (n == 0) {
    return;
  }

  safe_cuda(cudaSetDevice(device_idx));
  const int GRID_SIZE =
-      static_cast<int>(div_round_up(n, ITEMS_PER_THREAD * BLOCK_THREADS));
-  launch_n_kernel<<<GRID_SIZE, BLOCK_THREADS>>>(static_cast<size_t>(0), n,
+      static_cast<int>(DivRoundUp(n, ITEMS_PER_THREAD * BLOCK_THREADS));
+  LaunchNKernel<<<GRID_SIZE, BLOCK_THREADS>>>(static_cast<size_t>(0), n,
                                                lambda);
 }

@@ -269,91 +269,91 @@ inline void launch_n(int device_idx, size_t n, L lambda) {
 * Memory
 */

-enum memory_type { DEVICE, DEVICE_MANAGED };
+enum MemoryType { kDevice, kDeviceManaged };

-template <memory_type MemoryT>
-class bulk_allocator;
+template <MemoryType MemoryT>
+class BulkAllocator;
 template <typename T>
-class dvec2;
+class DVec2;

 template <typename T>
-class dvec {
-  friend class dvec2<T>;
+class DVec {
+  friend class DVec2<T>;

 private:
-  T *_ptr;
-  size_t _size;
-  int _device_idx;
+  T *ptr_;
+  size_t size_;
+  int device_idx_;

 public:
-  void external_allocate(int device_idx, void *ptr, size_t size) {
-    if (!empty()) {
-      throw std::runtime_error("Tried to allocate dvec but already allocated");
+  void ExternalAllocate(int device_idx, void *ptr, size_t size) {
+    if (!Empty()) {
+      throw std::runtime_error("Tried to allocate DVec but already allocated");
    }
-    _ptr = static_cast<T *>(ptr);
-    _size = size;
-    _device_idx = device_idx;
-    safe_cuda(cudaSetDevice(_device_idx));
+    ptr_ = static_cast<T *>(ptr);
+    size_ = size;
+    device_idx_ = device_idx;
+    safe_cuda(cudaSetDevice(device_idx_));
  }

-  dvec() : _ptr(NULL), _size(0), _device_idx(-1) {}
-  size_t size() const { return _size; }
-  int device_idx() const { return _device_idx; }
-  bool empty() const { return _ptr == NULL || _size == 0; }
+  DVec() : ptr_(NULL), size_(0), device_idx_(-1) {}
+  size_t Size() const { return size_; }
+  int DeviceIdx() const { return device_idx_; }
+  bool Empty() const { return ptr_ == NULL || size_ == 0; }

-  T *data() { return _ptr; }
+  T *Data() { return ptr_; }

-  const T *data() const { return _ptr; }
+  const T *Data() const { return ptr_; }

-  std::vector<T> as_vector() const {
-    std::vector<T> h_vector(size());
-    safe_cuda(cudaSetDevice(_device_idx));
-    safe_cuda(cudaMemcpy(h_vector.data(), _ptr, size() * sizeof(T),
+  std::vector<T> AsVector() const {
+    std::vector<T> h_vector(Size());
+    safe_cuda(cudaSetDevice(device_idx_));
+    safe_cuda(cudaMemcpy(h_vector.data(), ptr_, Size() * sizeof(T),
                         cudaMemcpyDeviceToHost));
    return h_vector;
  }

-  void fill(T value) {
-    auto d_ptr = _ptr;
-    launch_n(_device_idx, size(),
+  void Fill(T value) {
+    auto d_ptr = ptr_;
+    LaunchN(device_idx_, Size(),
             [=] __device__(size_t idx) { d_ptr[idx] = value; });
  }

-  void print() {
-    auto h_vector = this->as_vector();
+  void Print() {
+    auto h_vector = this->AsVector();
    for (auto e : h_vector) {
      std::cout << e << " ";
    }
    std::cout << "\n";
  }

-  thrust::device_ptr<T> tbegin() { return thrust::device_pointer_cast(_ptr); }
+  thrust::device_ptr<T> tbegin() { return thrust::device_pointer_cast(ptr_); }

  thrust::device_ptr<T> tend() {
-    return thrust::device_pointer_cast(_ptr + size());
+    return thrust::device_pointer_cast(ptr_ + Size());
  }

  template <typename T2>
-  dvec &operator=(const std::vector<T2> &other) {
+  DVec &operator=(const std::vector<T2> &other) {
    this->copy(other.begin(), other.end());
    return *this;
  }

-  dvec &operator=(dvec<T> &other) {
-    if (other.size() != size()) {
+  DVec &operator=(DVec<T> &other) {
+    if (other.Size() != Size()) {
      throw std::runtime_error(
-          "Cannot copy assign dvec to dvec, sizes are different");
+          "Cannot copy assign DVec to DVec, sizes are different");
    }
-    safe_cuda(cudaSetDevice(this->device_idx()));
-    if (other.device_idx() == this->device_idx()) {
-      dh::safe_cuda(cudaMemcpy(this->data(), other.data(),
-                               other.size() * sizeof(T),
+    safe_cuda(cudaSetDevice(this->DeviceIdx()));
+    if (other.DeviceIdx() == this->DeviceIdx()) {
+      dh::safe_cuda(cudaMemcpy(this->Data(), other.Data(),
+                               other.Size() * sizeof(T),
                               cudaMemcpyDeviceToDevice));
    } else {
-      std::cout << "deviceother: " << other.device_idx()
-                << " devicethis: " << this->device_idx() << std::endl;
-      std::cout << "size deviceother: " << other.size()
-                << " devicethis: " << this->device_idx() << std::endl;
+      std::cout << "deviceother: " << other.DeviceIdx()
+                << " devicethis: " << this->DeviceIdx() << std::endl;
+      std::cout << "size deviceother: " << other.Size()
+                << " devicethis: " << this->DeviceIdx() << std::endl;
      throw std::runtime_error("Cannot copy to/from different devices");
    }

@@ -362,177 +362,178 @@ class dvec {

  template <typename IterT>
  void copy(IterT begin, IterT end) {
-    safe_cuda(cudaSetDevice(this->device_idx()));
-    if (end - begin != size()) {
+    safe_cuda(cudaSetDevice(this->DeviceIdx()));
+    if (end - begin != Size()) {
      throw std::runtime_error(
-          "Cannot copy assign vector to dvec, sizes are different");
+          "Cannot copy assign vector to DVec, sizes are different");
    }
    thrust::copy(begin, end, this->tbegin());
  }

  void copy(thrust::device_ptr<T> begin, thrust::device_ptr<T> end) {
-    safe_cuda(cudaSetDevice(this->device_idx()));
-    if (end - begin != size()) {
+    safe_cuda(cudaSetDevice(this->DeviceIdx()));
+    if (end - begin != Size()) {
      throw std::runtime_error(
-                               "Cannot copy assign vector to dvec, sizes are different");
+                               "Cannot copy assign vector to DVec, sizes are different");
    }
-    safe_cuda(cudaMemcpy(this->data(), begin.get(),
-                         size() * sizeof(T), cudaMemcpyDefault));
+    safe_cuda(cudaMemcpy(this->Data(), begin.get(),
+                         Size() * sizeof(T), cudaMemcpyDefault));
  }
 };

 /**
- * @class dvec2 device_helpers.cuh
- * @brief wrapper for storing 2 dvec's which are needed for cub::DoubleBuffer
+ * @class DVec2 device_helpers.cuh
+ * @brief wrapper for storing 2 DVec's which are needed for cub::DoubleBuffer
 */
 template <typename T>
-class dvec2 {
+class DVec2 {
 private:
-  dvec<T> _d1, _d2;
-  cub::DoubleBuffer<T> _buff;
-  int _device_idx;
+  DVec<T> d1_, d2_;
+  cub::DoubleBuffer<T> buff_;
+  int device_idx_;

 public:
-  void external_allocate(int device_idx, void *ptr1, void *ptr2, size_t size) {
-    if (!empty()) {
-      throw std::runtime_error("Tried to allocate dvec2 but already allocated");
+  void ExternalAllocate(int device_idx, void *ptr1, void *ptr2, size_t size) {
+    if (!Empty()) {
+      throw std::runtime_error("Tried to allocate DVec2 but already allocated");
    }
-    _device_idx = device_idx;
-    _d1.external_allocate(_device_idx, ptr1, size);
-    _d2.external_allocate(_device_idx, ptr2, size);
-    _buff.d_buffers[0] = static_cast<T *>(ptr1);
-    _buff.d_buffers[1] = static_cast<T *>(ptr2);
-    _buff.selector = 0;
+    device_idx_ = device_idx;
+    d1_.ExternalAllocate(device_idx_, ptr1, size);
+    d2_.ExternalAllocate(device_idx_, ptr2, size);
+    buff_.d_buffers[0] = static_cast<T *>(ptr1);
+    buff_.d_buffers[1] = static_cast<T *>(ptr2);
+    buff_.selector = 0;
  }
-  dvec2() : _d1(), _d2(), _buff(), _device_idx(-1) {}
+  DVec2() : d1_(), d2_(), buff_(), device_idx_(-1) {}

-  size_t size() const { return _d1.size(); }
-  int device_idx() const { return _device_idx; }
-  bool empty() const { return _d1.empty() || _d2.empty(); }
+  size_t Size() const { return d1_.Size(); }
+  int DeviceIdx() const { return device_idx_; }
+  bool Empty() const { return d1_.Empty() || d2_.Empty(); }

-  cub::DoubleBuffer<T> &buff() { return _buff; }
+  cub::DoubleBuffer<T> &buff() { return buff_; }

-  dvec<T> &d1() { return _d1; }
-  dvec<T> &d2() { return _d2; }
+  DVec<T> &D1() { return d1_; }

-  T *current() { return _buff.Current(); }
+  DVec<T> &D2() { return d2_; }

-  dvec<T> &current_dvec() { return _buff.selector == 0 ? d1() : d2(); }
+  T *Current() { return buff_.Current(); }

-  T *other() { return _buff.Alternate(); }
+  DVec<T> &CurrentDVec() { return buff_.selector == 0 ? D1() : D2(); }
+
+  T *other() { return buff_.Alternate(); }
 };

-template <memory_type MemoryT>
-class bulk_allocator {
-  std::vector<char *> d_ptr;
-  std::vector<size_t> _size;
-  std::vector<int> _device_idx;
+template <MemoryType MemoryT>
+class BulkAllocator {
+  std::vector<char *> d_ptr_;
+  std::vector<size_t> size_;
+  std::vector<int> device_idx_;

-  const int align = 256;
+  static const int kAlign = 256;

-  size_t align_round_up(size_t n) const {
-    n = (n + align - 1) / align;
-    return n * align;
+  size_t AlignRoundUp(size_t n) const {
+    n = (n + kAlign - 1) / kAlign;
+    return n * kAlign;
  }

  template <typename T>
-  size_t get_size_bytes(dvec<T> *first_vec, size_t first_size) {
-    return align_round_up(first_size * sizeof(T));
+  size_t GetSizeBytes(DVec<T> *first_vec, size_t first_size) {
+    return AlignRoundUp(first_size * sizeof(T));
  }

  template <typename T, typename... Args>
-  size_t get_size_bytes(dvec<T> *first_vec, size_t first_size, Args... args) {
-    return get_size_bytes<T>(first_vec, first_size) + get_size_bytes(args...);
+  size_t GetSizeBytes(DVec<T> *first_vec, size_t first_size, Args... args) {
+    return GetSizeBytes<T>(first_vec, first_size) + GetSizeBytes(args...);
  }

  template <typename T>
-  void allocate_dvec(int device_idx, char *ptr, dvec<T> *first_vec,
+  void AllocateDVec(int device_idx, char *ptr, DVec<T> *first_vec,
                     size_t first_size) {
-    first_vec->external_allocate(device_idx, static_cast<void *>(ptr),
+    first_vec->ExternalAllocate(device_idx, static_cast<void *>(ptr),
                                 first_size);
  }

  template <typename T, typename... Args>
-  void allocate_dvec(int device_idx, char *ptr, dvec<T> *first_vec,
+  void AllocateDVec(int device_idx, char *ptr, DVec<T> *first_vec,
                     size_t first_size, Args... args) {
-    allocate_dvec<T>(device_idx, ptr, first_vec, first_size);
-    ptr += align_round_up(first_size * sizeof(T));
-    allocate_dvec(device_idx, ptr, args...);
+    AllocateDVec<T>(device_idx, ptr, first_vec, first_size);
+    ptr += AlignRoundUp(first_size * sizeof(T));
+    AllocateDVec(device_idx, ptr, args...);
  }

-  char *allocate_device(int device_idx, size_t bytes, memory_type t) {
+  char *AllocateDevice(int device_idx, size_t bytes, MemoryType t) {
    char *ptr;
    safe_cuda(cudaSetDevice(device_idx));
    safe_cuda(cudaMalloc(&ptr, bytes));
    return ptr;
  }
  template <typename T>
-  size_t get_size_bytes(dvec2<T> *first_vec, size_t first_size) {
-    return 2 * align_round_up(first_size * sizeof(T));
+  size_t GetSizeBytes(DVec2<T> *first_vec, size_t first_size) {
+    return 2 * AlignRoundUp(first_size * sizeof(T));
  }

  template <typename T, typename... Args>
-  size_t get_size_bytes(dvec2<T> *first_vec, size_t first_size, Args... args) {
-    return get_size_bytes<T>(first_vec, first_size) + get_size_bytes(args...);
+  size_t GetSizeBytes(DVec2<T> *first_vec, size_t first_size, Args... args) {
+    return GetSizeBytes<T>(first_vec, first_size) + GetSizeBytes(args...);
  }

  template <typename T>
-  void allocate_dvec(int device_idx, char *ptr, dvec2<T> *first_vec,
+  void AllocateDVec(int device_idx, char *ptr, DVec2<T> *first_vec,
                     size_t first_size) {
-    first_vec->external_allocate(
+    first_vec->ExternalAllocate(
        device_idx, static_cast<void *>(ptr),
-        static_cast<void *>(ptr + align_round_up(first_size * sizeof(T))),
+        static_cast<void *>(ptr + AlignRoundUp(first_size * sizeof(T))),
        first_size);
  }

  template <typename T, typename... Args>
-  void allocate_dvec(int device_idx, char *ptr, dvec2<T> *first_vec,
+  void AllocateDVec(int device_idx, char *ptr, DVec2<T> *first_vec,
                     size_t first_size, Args... args) {
-    allocate_dvec<T>(device_idx, ptr, first_vec, first_size);
-    ptr += (align_round_up(first_size * sizeof(T)) * 2);
-    allocate_dvec(device_idx, ptr, args...);
+    AllocateDVec<T>(device_idx, ptr, first_vec, first_size);
+    ptr += (AlignRoundUp(first_size * sizeof(T)) * 2);
+    AllocateDVec(device_idx, ptr, args...);
  }

 public:
-  bulk_allocator() {}
+   BulkAllocator() = default;
  // prevent accidental copying, moving or assignment of this object
-  bulk_allocator(const bulk_allocator<MemoryT>&) = delete;
-  bulk_allocator(bulk_allocator<MemoryT>&&) = delete;
-  void operator=(const bulk_allocator<MemoryT>&) = delete;
-  void operator=(bulk_allocator<MemoryT>&&) = delete;
+  BulkAllocator(const BulkAllocator<MemoryT>&) = delete;
+  BulkAllocator(BulkAllocator<MemoryT>&&) = delete;
+  void operator=(const BulkAllocator<MemoryT>&) = delete;
+  void operator=(BulkAllocator<MemoryT>&&) = delete;
  
-  ~bulk_allocator() {
-    for (size_t i = 0; i < d_ptr.size(); i++) {
-      if (!(d_ptr[i] == nullptr)) {
-        safe_cuda(cudaSetDevice(_device_idx[i]));
-        safe_cuda(cudaFree(d_ptr[i]));
-        d_ptr[i] = nullptr;
+  ~BulkAllocator() {
+    for (size_t i = 0; i < d_ptr_.size(); i++) {
+      if (!(d_ptr_[i] == nullptr)) {
+        safe_cuda(cudaSetDevice(device_idx_[i]));
+        safe_cuda(cudaFree(d_ptr_[i]));
+        d_ptr_[i] = nullptr;
      }
    }
  }

  // returns sum of bytes for all allocations
-  size_t size() {
-    return std::accumulate(_size.begin(), _size.end(), static_cast<size_t>(0));
+  size_t Size() {
+    return std::accumulate(size_.begin(), size_.end(), static_cast<size_t>(0));
  }

  template <typename... Args>
-  void allocate(int device_idx, bool silent, Args... args) {
-    size_t size = get_size_bytes(args...);
+  void Allocate(int device_idx, bool silent, Args... args) {
+    size_t size = GetSizeBytes(args...);

-    char *ptr = allocate_device(device_idx, size, MemoryT);
+    char *ptr = AllocateDevice(device_idx, size, MemoryT);

-    allocate_dvec(device_idx, ptr, args...);
+    AllocateDVec(device_idx, ptr, args...);

-    d_ptr.push_back(ptr);
-    _size.push_back(size);
-    _device_idx.push_back(device_idx);
+    d_ptr_.push_back(ptr);
+    size_.push_back(size);
+    device_idx_.push_back(device_idx);

    if (!silent) {
      const int mb_size = 1048576;
      LOG(CONSOLE) << "Allocated " << size / mb_size << "MB on [" << device_idx
-                   << "] " << device_name(device_idx) << ", "
-                   << available_memory(device_idx) / mb_size << "MB remaining.";
+                   << "] " << DeviceName(device_idx) << ", "
+                   << AvailableMemory(device_idx) / mb_size << "MB remaining.";
    }
  }
 };
@@ -543,7 +544,7 @@ struct CubMemory {
  size_t temp_storage_bytes;

  // Thrust
-  typedef char value_type;
+   using ValueT = char;

  CubMemory() : d_temp_storage(nullptr), temp_storage_bytes(0) {}

@@ -568,17 +569,18 @@ struct CubMemory {
    }
  }
  // Thrust
-  char *allocate(std::ptrdiff_t num_bytes) {
+  char *allocate(std::ptrdiff_t num_bytes) {  // NOLINT
    LazyAllocate(num_bytes);
    return reinterpret_cast<char *>(d_temp_storage);
  }

  // Thrust
-  void deallocate(char *ptr, size_t n) {
+  void deallocate(char *ptr, size_t n) {  // NOLINT
+
    // Do nothing
  }

-  bool IsAllocated() { return d_temp_storage != NULL; }
+  bool IsAllocated() { return d_temp_storage != nullptr; }
 };

 /*
@@ -586,7 +588,7 @@ struct CubMemory {
 */

 template <typename T>
-void print(const dvec<T> &v, size_t max_items = 10) {
+void Print(const DVec<T> &v, size_t max_items = 10) {
  std::vector<T> h = v.as_vector();
  for (size_t i = 0; i < std::min(max_items, h.size()); i++) {
    std::cout << " " << h[i];
@@ -609,14 +611,14 @@ void print(const dvec<T> &v, size_t max_items = 10) {

 // Load balancing search

-template <typename coordinate_t, typename segments_t, typename offset_t>
-void FindMergePartitions(int device_idx, coordinate_t *d_tile_coordinates,
-                         size_t num_tiles, int tile_size, segments_t segments,
-                         offset_t num_rows, offset_t num_elements) {
-  dh::launch_n(device_idx, num_tiles + 1, [=] __device__(int idx) {
-    offset_t diagonal = idx * tile_size;
-    coordinate_t tile_coordinate;
-    cub::CountingInputIterator<offset_t> nonzero_indices(0);
+template <typename CoordinateT, typename SegmentT, typename OffsetT>
+void FindMergePartitions(int device_idx, CoordinateT *d_tile_coordinates,
+                         size_t num_tiles, int tile_size, SegmentT segments,
+                         OffsetT num_rows, OffsetT num_elements) {
+  dh::LaunchN(device_idx, num_tiles + 1, [=] __device__(int idx) {
+    OffsetT diagonal = idx * tile_size;
+    CoordinateT tile_coordinate;
+    cub::CountingInputIterator<OffsetT> nonzero_indices(0);

    // Search the merge path
    // Cast to signed integer as this function can have negatives
@@ -630,27 +632,27 @@ void FindMergePartitions(int device_idx, coordinate_t *d_tile_coordinates,
 }

 template <int TILE_SIZE, int ITEMS_PER_THREAD, int BLOCK_THREADS,
-          typename offset_t, typename coordinate_t, typename func_t,
-          typename segments_iter>
-__global__ void LbsKernel(coordinate_t *d_coordinates,
-                          segments_iter segment_end_offsets, func_t f,
-                          offset_t num_segments) {
+          typename OffsetT, typename CoordinateT, typename FunctionT,
+          typename SegmentIterT>
+__global__ void LbsKernel(CoordinateT *d_coordinates,
+                          SegmentIterT segment_end_offsets, FunctionT f,
+                          OffsetT num_segments) {
  int tile = blockIdx.x;
-  coordinate_t tile_start_coord = d_coordinates[tile];
-  coordinate_t tile_end_coord = d_coordinates[tile + 1];
+  CoordinateT tile_start_coord = d_coordinates[tile];
+  CoordinateT tile_end_coord = d_coordinates[tile + 1];
  int64_t tile_num_rows = tile_end_coord.x - tile_start_coord.x;
  int64_t tile_num_elements = tile_end_coord.y - tile_start_coord.y;

-  cub::CountingInputIterator<offset_t> tile_element_indices(tile_start_coord.y);
-  coordinate_t thread_start_coord;
+  cub::CountingInputIterator<OffsetT> tile_element_indices(tile_start_coord.y);
+  CoordinateT thread_start_coord;

-  typedef typename std::iterator_traits<segments_iter>::value_type segment_t;
+  typedef typename std::iterator_traits<SegmentIterT>::value_type SegmentT;
  __shared__ struct {
-    segment_t tile_segment_end_offsets[TILE_SIZE + 1];
-    segment_t output_segment[TILE_SIZE];
+    SegmentT tile_segment_end_offsets[TILE_SIZE + 1];
+    SegmentT output_segment[TILE_SIZE];
  } temp_storage;

-  for (auto item : dh::block_stride_range(int(0), int(tile_num_rows + 1))) {
+  for (auto item : dh::BlockStrideRange(int(0), int(tile_num_rows + 1))) {
    temp_storage.tile_segment_end_offsets[item] =
        segment_end_offsets[min(static_cast<size_t>(tile_start_coord.x + item),
                                static_cast<size_t>(num_segments - 1))];
@@ -665,7 +667,7 @@ __global__ void LbsKernel(coordinate_t *d_coordinates,
                       tile_element_indices,                   // List B
                       tile_num_rows, tile_num_elements, thread_start_coord);

-  coordinate_t thread_current_coord = thread_start_coord;
+  CoordinateT thread_current_coord = thread_start_coord;
 #pragma unroll
  for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) {
    if (tile_element_indices[thread_current_coord.y] <
@@ -679,50 +681,50 @@ __global__ void LbsKernel(coordinate_t *d_coordinates,
  }
  __syncthreads();

-  for (auto item : dh::block_stride_range(int(0), int(tile_num_elements))) {
+  for (auto item : dh::BlockStrideRange(int(0), int(tile_num_elements))) {
    f(tile_start_coord.y + item, temp_storage.output_segment[item]);
  }
 }

-template <typename func_t, typename segments_iter, typename offset_t>
+template <typename FunctionT, typename SegmentIterT, typename OffsetT>
 void SparseTransformLbs(int device_idx, dh::CubMemory *temp_memory,
-                        offset_t count, segments_iter segments,
-                        offset_t num_segments, func_t f) {
-  typedef typename cub::CubVector<offset_t, 2>::Type coordinate_t;
+                        OffsetT count, SegmentIterT segments,
+                        OffsetT num_segments, FunctionT f) {
+  typedef typename cub::CubVector<OffsetT, 2>::Type CoordinateT;
  dh::safe_cuda(cudaSetDevice(device_idx));
  const int BLOCK_THREADS = 256;
  const int ITEMS_PER_THREAD = 1;
  const int TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD;
-  auto num_tiles = dh::div_round_up(count + num_segments, BLOCK_THREADS);
+  auto num_tiles = dh::DivRoundUp(count + num_segments, BLOCK_THREADS);
  CHECK(num_tiles < std::numeric_limits<unsigned int>::max());

-  temp_memory->LazyAllocate(sizeof(coordinate_t) * (num_tiles + 1));
-  coordinate_t *tmp_tile_coordinates =
-      reinterpret_cast<coordinate_t *>(temp_memory->d_temp_storage);
+  temp_memory->LazyAllocate(sizeof(CoordinateT) * (num_tiles + 1));
+  CoordinateT *tmp_tile_coordinates =
+      reinterpret_cast<CoordinateT *>(temp_memory->d_temp_storage);

  FindMergePartitions(device_idx, tmp_tile_coordinates, num_tiles,
                      BLOCK_THREADS, segments, num_segments, count);

-  LbsKernel<TILE_SIZE, ITEMS_PER_THREAD, BLOCK_THREADS, offset_t>
+  LbsKernel<TILE_SIZE, ITEMS_PER_THREAD, BLOCK_THREADS, OffsetT>
      <<<uint32_t(num_tiles), BLOCK_THREADS>>>(tmp_tile_coordinates,
                                               segments + 1, f, num_segments);
 }

-template <typename func_t, typename offset_t>
-void DenseTransformLbs(int device_idx, offset_t count, offset_t num_segments,
-                       func_t f) {
+template <typename FunctionT, typename OffsetT>
+void DenseTransformLbs(int device_idx, OffsetT count, OffsetT num_segments,
+                       FunctionT f) {
  CHECK(count % num_segments == 0) << "Data is not dense.";

-  launch_n(device_idx, count, [=] __device__(offset_t idx) {
-    offset_t segment = idx / (count / num_segments);
+  LaunchN(device_idx, count, [=] __device__(OffsetT idx) {
+    OffsetT segment = idx / (count / num_segments);
    f(idx, segment);
  });
 }

 /**
- * \fn  template <typename func_t, typename segments_iter, typename offset_t>
- * void TransformLbs(int device_idx, dh::CubMemory *temp_memory, offset_t count,
- * segments_iter segments, offset_t num_segments, bool is_dense, func_t f)
+ * \fn  template <typename FunctionT, typename SegmentIterT, typename OffsetT>
+ * void TransformLbs(int device_idx, dh::CubMemory *temp_memory, OffsetT count,
+ * SegmentIterT segments, OffsetT num_segments, bool is_dense, FunctionT f)
 *
 * \brief Load balancing search function. Reads a CSR type matrix description
 * and allows a function to be executed on each element. Search 'modern GPU load
@@ -731,9 +733,9 @@ void DenseTransformLbs(int device_idx, offset_t count, offset_t num_segments,
 * \author  Rory
 * \date  7/9/2017
 *
- * \tparam  func_t        Type of the function t.
- * \tparam  segments_iter Type of the segments iterator.
- * \tparam  offset_t      Type of the offset.
+ * \tparam  FunctionT        Type of the function t.
+ * \tparam  SegmentIterT Type of the segments iterator.
+ * \tparam  OffsetT      Type of the offset.
 * \param           device_idx    Zero-based index of the device.
 * \param [in,out]  temp_memory   Temporary memory allocator.
 * \param           count         Number of elements.
@@ -743,10 +745,10 @@ void DenseTransformLbs(int device_idx, offset_t count, offset_t num_segments,
 * \param           f             Lambda to be executed on matrix elements.
 */

-template <typename func_t, typename segments_iter, typename offset_t>
-void TransformLbs(int device_idx, dh::CubMemory *temp_memory, offset_t count,
-                  segments_iter segments, offset_t num_segments, bool is_dense,
-                  func_t f) {
+template <typename FunctionT, typename SegmentIterT, typename OffsetT>
+void TransformLbs(int device_idx, dh::CubMemory *temp_memory, OffsetT count,
+                  SegmentIterT segments, OffsetT num_segments, bool is_dense,
+                  FunctionT f) {
  if (is_dense) {
    DenseTransformLbs(device_idx, count, num_segments, f);
  } else {
@@ -765,18 +767,18 @@ void TransformLbs(int device_idx, dh::CubMemory *temp_memory, offset_t count,
 * @param offsets the segments
 */
 template <typename T1, typename T2>
-void segmentedSort(dh::CubMemory *tmp_mem, dh::dvec2<T1> *keys,
-                   dh::dvec2<T2> *vals, int nVals, int nSegs,
-                   const dh::dvec<int> &offsets, int start = 0,
+void SegmentedSort(dh::CubMemory *tmp_mem, dh::DVec2<T1> *keys,
+                   dh::DVec2<T2> *vals, int nVals, int nSegs,
+                   const dh::DVec<int> &offsets, int start = 0,
                   int end = sizeof(T1) * 8) {
  size_t tmpSize;
  dh::safe_cuda(cub::DeviceSegmentedRadixSort::SortPairs(
-      NULL, tmpSize, keys->buff(), vals->buff(), nVals, nSegs, offsets.data(),
-      offsets.data() + 1, start, end));
+      NULL, tmpSize, keys->buff(), vals->buff(), nVals, nSegs, offsets.Data(),
+      offsets.Data() + 1, start, end));
  tmp_mem->LazyAllocate(tmpSize);
  dh::safe_cuda(cub::DeviceSegmentedRadixSort::SortPairs(
      tmp_mem->d_temp_storage, tmpSize, keys->buff(), vals->buff(), nVals,
-      nSegs, offsets.data(), offsets.data() + 1, start, end));
+      nSegs, offsets.Data(), offsets.Data() + 1, start, end));
 }

 /**
@@ -787,14 +789,14 @@ void segmentedSort(dh::CubMemory *tmp_mem, dh::dvec2<T1> *keys,
 * @param nVals number of elements in the input array
 */
 template <typename T>
-void sumReduction(dh::CubMemory &tmp_mem, dh::dvec<T> &in, dh::dvec<T> &out,
+void SumReduction(dh::CubMemory &tmp_mem, dh::DVec<T> &in, dh::DVec<T> &out,
                  int nVals) {
  size_t tmpSize;
  dh::safe_cuda(
-      cub::DeviceReduce::Sum(NULL, tmpSize, in.data(), out.data(), nVals));
+      cub::DeviceReduce::Sum(NULL, tmpSize, in.Data(), out.Data(), nVals));
  tmp_mem.LazyAllocate(tmpSize);
  dh::safe_cuda(cub::DeviceReduce::Sum(tmp_mem.d_temp_storage, tmpSize,
-                                       in.data(), out.data(), nVals));
+                                       in.Data(), out.Data(), nVals));
 }

 /**
@@ -805,7 +807,7 @@ void sumReduction(dh::CubMemory &tmp_mem, dh::dvec<T> &in, dh::dvec<T> &out,
 * @param nVals number of elements in the input array
 */
 template <typename T>
-T sumReduction(dh::CubMemory &tmp_mem, T *in, int nVals) {
+T SumReduction(dh::CubMemory &tmp_mem, T *in, int nVals) {
  size_t tmpSize;
  dh::safe_cuda(cub::DeviceReduce::Sum(nullptr, tmpSize, in, in, nVals));
  // Allocate small extra memory for the return value
@@ -827,8 +829,8 @@ T sumReduction(dh::CubMemory &tmp_mem, T *in, int nVals) {
 * @param def default value to be filled
 */
 template <typename T, int BlkDim = 256, int ItemsPerThread = 4>
-void fillConst(int device_idx, T *out, int len, T def) {
-  dh::launch_n<ItemsPerThread, BlkDim>(device_idx, len,
+void FillConst(int device_idx, T *out, int len, T def) {
+  dh::LaunchN<ItemsPerThread, BlkDim>(device_idx, len,
                                       [=] __device__(int i) { out[i] = def; });
 }

@@ -842,9 +844,9 @@ void fillConst(int device_idx, T *out, int len, T def) {
 * @param nVals length of the buffers
 */
 template <typename T1, typename T2, int BlkDim = 256, int ItemsPerThread = 4>
-void gather(int device_idx, T1 *out1, const T1 *in1, T2 *out2, const T2 *in2,
+void Gather(int device_idx, T1 *out1, const T1 *in1, T2 *out2, const T2 *in2,
            const int *instId, int nVals) {
-  dh::launch_n<ItemsPerThread, BlkDim>(device_idx, nVals,
+  dh::LaunchN<ItemsPerThread, BlkDim>(device_idx, nVals,
                                       [=] __device__(int i) {
                                         int iid = instId[i];
                                         T1 v1 = in1[iid];
@@ -862,8 +864,8 @@ void gather(int device_idx, T1 *out1, const T1 *in1, T2 *out2, const T2 *in2,
 * @param nVals length of the buffers
 */
 template <typename T, int BlkDim = 256, int ItemsPerThread = 4>
-void gather(int device_idx, T *out, const T *in, const int *instId, int nVals) {
-  dh::launch_n<ItemsPerThread, BlkDim>(device_idx, nVals,
+void Gather(int device_idx, T *out, const T *in, const int *instId, int nVals) {
+  dh::LaunchN<ItemsPerThread, BlkDim>(device_idx, nVals,
                                       [=] __device__(int i) {
                                         int iid = instId[i];
                                         out[i] = in[iid];
--- a/src/common/group_data.h
+++ b/src/common/group_data.h
@@ -29,12 +29,12 @@ struct ParallelGroupBuilder {
  // parallel group builder of data
  ParallelGroupBuilder(std::vector<SizeType> *p_rptr,
                       std::vector<ValueType> *p_data)
-      : rptr(*p_rptr), data(*p_data), thread_rptr(tmp_thread_rptr) {
+      : rptr_(*p_rptr), data_(*p_data), thread_rptr_(tmp_thread_rptr_) {
  }
  ParallelGroupBuilder(std::vector<SizeType> *p_rptr,
                       std::vector<ValueType> *p_data,
                       std::vector< std::vector<SizeType> > *p_thread_rptr)
-      : rptr(*p_rptr), data(*p_data), thread_rptr(*p_thread_rptr) {
+      : rptr_(*p_rptr), data_(*p_data), thread_rptr_(*p_thread_rptr) {
  }

 public:
@@ -45,10 +45,10 @@ struct ParallelGroupBuilder {
   * \param nthread number of thread that will be used in construction
   */
  inline void InitBudget(size_t nkeys, int nthread) {
-    thread_rptr.resize(nthread);
-    for (size_t i = 0;  i < thread_rptr.size(); ++i) {
-      thread_rptr[i].resize(nkeys);
-      std::fill(thread_rptr[i].begin(), thread_rptr[i].end(), 0);
+    thread_rptr_.resize(nthread);
+    for (size_t i = 0;  i < thread_rptr_.size(); ++i) {
+      thread_rptr_[i].resize(nkeys);
+      std::fill(thread_rptr_[i].begin(), thread_rptr_[i].end(), 0);
    }
  }
  /*!
@@ -58,34 +58,34 @@ struct ParallelGroupBuilder {
   * \param nelem number of element budget add to this row
   */
  inline void AddBudget(size_t key, int threadid, SizeType nelem = 1) {
-    std::vector<SizeType> &trptr = thread_rptr[threadid];
+    std::vector<SizeType> &trptr = thread_rptr_[threadid];
    if (trptr.size() < key + 1) {
      trptr.resize(key + 1, 0);
    }
    trptr[key] += nelem;
  }
  /*! \brief step 3: initialize the necessary storage */
-  inline void InitStorage(void) {
+  inline void InitStorage() {
    // set rptr to correct size
-    for (size_t tid = 0; tid < thread_rptr.size(); ++tid) {
-      if (rptr.size() <= thread_rptr[tid].size()) {
-        rptr.resize(thread_rptr[tid].size() + 1);
+    for (size_t tid = 0; tid < thread_rptr_.size(); ++tid) {
+      if (rptr_.size() <= thread_rptr_[tid].size()) {
+        rptr_.resize(thread_rptr_[tid].size() + 1);
      }
    }
    // initialize rptr to be beginning of each segment
    size_t start = 0;
-    for (size_t i = 0; i + 1 < rptr.size(); ++i) {
-      for (size_t tid = 0; tid < thread_rptr.size(); ++tid) {
-        std::vector<SizeType> &trptr = thread_rptr[tid];
+    for (size_t i = 0; i + 1 < rptr_.size(); ++i) {
+      for (size_t tid = 0; tid < thread_rptr_.size(); ++tid) {
+        std::vector<SizeType> &trptr = thread_rptr_[tid];
        if (i < trptr.size()) {
          size_t ncnt = trptr[i];
          trptr[i] = start;
          start += ncnt;
        }
      }
-      rptr[i + 1] = start;
+      rptr_[i + 1] = start;
    }
-    data.resize(start);
+    data_.resize(start);
  }
  /*!
   * \brief step 4: add data to the allocated space,
@@ -96,19 +96,19 @@ struct ParallelGroupBuilder {
   * \param threadid the id of thread that calls this function
   */
  inline void Push(size_t key, ValueType value, int threadid) {
-    SizeType &rp = thread_rptr[threadid][key];
-    data[rp++] = value;
+    SizeType &rp = thread_rptr_[threadid][key];
+    data_[rp++] = value;
  }

 private:
  /*! \brief pointer to the beginning and end of each continuous key */
-  std::vector<SizeType> &rptr;
+  std::vector<SizeType> &rptr_;
  /*! \brief index of nonzero entries in each row */
-  std::vector<ValueType> &data;
+  std::vector<ValueType> &data_;
  /*! \brief thread local data structure */
-  std::vector<std::vector<SizeType> > &thread_rptr;
+  std::vector<std::vector<SizeType> > &thread_rptr_;
  /*! \brief local temp thread ptr, use this if not specified by the constructor */
-  std::vector<std::vector<SizeType> > tmp_thread_rptr;
+  std::vector<std::vector<SizeType> > tmp_thread_rptr_;
 };
 }  // namespace common
 }  // namespace xgboost
--- a/src/common/hist_util.cc
+++ b/src/common/hist_util.cc
@@ -17,20 +17,20 @@ namespace xgboost {
 namespace common {

 void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) {
-  typedef common::WXQuantileSketch<bst_float, bst_float> WXQSketch;
-  const MetaInfo& info = p_fmat->info();
+  using WXQSketch = common::WXQuantileSketch<bst_float, bst_float>;
+  const MetaInfo& info = p_fmat->Info();

  // safe factor for better accuracy
-  const int kFactor = 8;
+  constexpr int kFactor = 8;
  std::vector<WXQSketch> sketchs;

  const int nthread = omp_get_max_threads();

-  unsigned nstep = static_cast<unsigned>((info.num_col + nthread - 1) / nthread);
-  unsigned ncol = static_cast<unsigned>(info.num_col);
-  sketchs.resize(info.num_col);
+  auto nstep = static_cast<unsigned>((info.num_col_ + nthread - 1) / nthread);
+  auto ncol = static_cast<unsigned>(info.num_col_);
+  sketchs.resize(info.num_col_);
  for (auto& s : sketchs) {
-    s.Init(info.num_row, 1.0 / (max_num_bins * kFactor));
+    s.Init(info.num_row_, 1.0 / (max_num_bins * kFactor));
  }

  dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator();
@@ -40,7 +40,7 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) {
    #pragma omp parallel num_threads(nthread)
    {
      CHECK_EQ(nthread, omp_get_num_threads());
-      unsigned tid = static_cast<unsigned>(omp_get_thread_num());
+      auto tid = static_cast<unsigned>(omp_get_thread_num());
      unsigned begin = std::min(nstep * tid, ncol);
      unsigned end = std::min(nstep * (tid + 1), ncol);
      for (size_t i = 0; i < batch.size; ++i) { // NOLINT(*)
@@ -68,7 +68,7 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) {
  size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_num_bins * kFactor);
  sreducer.Allreduce(dmlc::BeginPtr(summary_array), nbytes, summary_array.size());

-  this->min_val.resize(info.num_col);
+  this->min_val.resize(info.num_col_);
  row_ptr.push_back(0);
  for (size_t fid = 0; fid < summary_array.size(); ++fid) {
    WXQSketch::SummaryContainer a;
@@ -105,7 +105,7 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) {
 }

 void GHistIndexMatrix::Init(DMatrix* p_fmat) {
-  CHECK(cut != nullptr);
+  CHECK(cut != nullptr);  // NOLINT
  dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator();

  const int nthread = omp_get_max_threads();
@@ -126,7 +126,7 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat) {
    CHECK_GT(cut->cut.size(), 0U);
    CHECK_EQ(cut->row_ptr.back(), cut->cut.size());

-    omp_ulong bsize = static_cast<omp_ulong>(batch.size);
+    auto bsize = static_cast<omp_ulong>(batch.size);
    #pragma omp parallel for num_threads(nthread) schedule(static)
    for (omp_ulong i = 0; i < bsize; ++i) { // NOLINT(*)
      const int tid = omp_get_thread_num();
@@ -217,7 +217,7 @@ FindGroups_(const std::vector<unsigned>& feature_list,
  std::vector<std::vector<bool>> conflict_marks;
  std::vector<size_t> group_nnz;
  std::vector<size_t> group_conflict_cnt;
-  const size_t max_conflict_cnt
+  const auto max_conflict_cnt
    = static_cast<size_t>(param.max_conflict_rate * nrow);

  for (auto fid : feature_list) {
@@ -336,14 +336,14 @@ FastFeatureGrouping(const GHistIndexMatrix& gmat,
 void GHistIndexBlockMatrix::Init(const GHistIndexMatrix& gmat,
                                 const ColumnMatrix& colmat,
                                 const FastHistParam& param) {
-  cut = gmat.cut;
+  cut_ = gmat.cut;

  const size_t nrow = gmat.row_ptr.size() - 1;
  const uint32_t nbins = gmat.cut->row_ptr.back();

  /* step 1: form feature groups */
  auto groups = FastFeatureGrouping(gmat, colmat, param);
-  const uint32_t nblock = static_cast<uint32_t>(groups.size());
+  const auto nblock = static_cast<uint32_t>(groups.size());

  /* step 2: build a new CSR matrix for each feature group */
  std::vector<uint32_t> bin2block(nbins);  // lookup table [bin id] => [block id]
@@ -380,24 +380,24 @@ void GHistIndexBlockMatrix::Init(const GHistIndexMatrix& gmat,
  index_blk_ptr.push_back(0);
  row_ptr_blk_ptr.push_back(0);
  for (uint32_t block_id = 0; block_id < nblock; ++block_id) {
-    index.insert(index.end(), index_temp[block_id].begin(), index_temp[block_id].end());
-    row_ptr.insert(row_ptr.end(), row_ptr_temp[block_id].begin(), row_ptr_temp[block_id].end());
-    index_blk_ptr.push_back(index.size());
-    row_ptr_blk_ptr.push_back(row_ptr.size());
+    index_.insert(index_.end(), index_temp[block_id].begin(), index_temp[block_id].end());
+    row_ptr_.insert(row_ptr_.end(), row_ptr_temp[block_id].begin(), row_ptr_temp[block_id].end());
+    index_blk_ptr.push_back(index_.size());
+    row_ptr_blk_ptr.push_back(row_ptr_.size());
  }

  // save shortcut for each block
  for (uint32_t block_id = 0; block_id < nblock; ++block_id) {
    Block blk;
-    blk.index_begin = &index[index_blk_ptr[block_id]];
-    blk.row_ptr_begin = &row_ptr[row_ptr_blk_ptr[block_id]];
-    blk.index_end = &index[index_blk_ptr[block_id + 1]];
-    blk.row_ptr_end = &row_ptr[row_ptr_blk_ptr[block_id + 1]];
-    blocks.push_back(blk);
+    blk.index_begin = &index_[index_blk_ptr[block_id]];
+    blk.row_ptr_begin = &row_ptr_[row_ptr_blk_ptr[block_id]];
+    blk.index_end = &index_[index_blk_ptr[block_id + 1]];
+    blk.row_ptr_end = &row_ptr_[row_ptr_blk_ptr[block_id + 1]];
+    blocks_.push_back(blk);
  }
 }

-void GHistBuilder::BuildHist(const std::vector<bst_gpair>& gpair,
+void GHistBuilder::BuildHist(const std::vector<GradientPair>& gpair,
                             const RowSetCollection::Elem row_indices,
                             const GHistIndexMatrix& gmat,
                             const std::vector<bst_uint>& feat_set,
@@ -405,30 +405,30 @@ void GHistBuilder::BuildHist(const std::vector<bst_gpair>& gpair,
  data_.resize(nbins_ * nthread_, GHistEntry());
  std::fill(data_.begin(), data_.end(), GHistEntry());

-  const int K = 8;  // loop unrolling factor
-  const bst_omp_uint nthread = static_cast<bst_omp_uint>(this->nthread_);
+  constexpr int kUnroll = 8;  // loop unrolling factor
+  const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
  const size_t nrows = row_indices.end - row_indices.begin;
-  const size_t rest = nrows % K;
+  const size_t rest = nrows % kUnroll;

  #pragma omp parallel for num_threads(nthread) schedule(guided)
-  for (bst_omp_uint i = 0; i < nrows - rest; i += K) {
+  for (bst_omp_uint i = 0; i < nrows - rest; i += kUnroll) {
    const bst_omp_uint tid = omp_get_thread_num();
    const size_t off = tid * nbins_;
-    size_t rid[K];
-    size_t ibegin[K];
-    size_t iend[K];
-    bst_gpair stat[K];
-    for (int k = 0; k < K; ++k) {
+    size_t rid[kUnroll];
+    size_t ibegin[kUnroll];
+    size_t iend[kUnroll];
+    GradientPair stat[kUnroll];
+    for (int k = 0; k < kUnroll; ++k) {
      rid[k] = row_indices.begin[i + k];
    }
-    for (int k = 0; k < K; ++k) {
+    for (int k = 0; k < kUnroll; ++k) {
      ibegin[k] = gmat.row_ptr[rid[k]];
      iend[k] = gmat.row_ptr[rid[k] + 1];
    }
-    for (int k = 0; k < K; ++k) {
+    for (int k = 0; k < kUnroll; ++k) {
      stat[k] = gpair[rid[k]];
    }
-    for (int k = 0; k < K; ++k) {
+    for (int k = 0; k < kUnroll; ++k) {
      for (size_t j = ibegin[k]; j < iend[k]; ++j) {
        const uint32_t bin = gmat.index[j];
        data_[off + bin].Add(stat[k]);
@@ -439,7 +439,7 @@ void GHistBuilder::BuildHist(const std::vector<bst_gpair>& gpair,
    const size_t rid = row_indices.begin[i];
    const size_t ibegin = gmat.row_ptr[rid];
    const size_t iend = gmat.row_ptr[rid + 1];
-    const bst_gpair stat = gpair[rid];
+    const GradientPair stat = gpair[rid];
    for (size_t j = ibegin; j < iend; ++j) {
      const uint32_t bin = gmat.index[j];
      data_[bin].Add(stat);
@@ -456,37 +456,40 @@ void GHistBuilder::BuildHist(const std::vector<bst_gpair>& gpair,
  }
 }

-void GHistBuilder::BuildBlockHist(const std::vector<bst_gpair>& gpair,
+void GHistBuilder::BuildBlockHist(const std::vector<GradientPair>& gpair,
                                  const RowSetCollection::Elem row_indices,
                                  const GHistIndexBlockMatrix& gmatb,
                                  const std::vector<bst_uint>& feat_set,
                                  GHistRow hist) {
-  const int K = 8;  // loop unrolling factor
-  const bst_omp_uint nthread = static_cast<bst_omp_uint>(this->nthread_);
+  constexpr int kUnroll = 8;  // loop unrolling factor
  const size_t nblock = gmatb.GetNumBlock();
  const size_t nrows = row_indices.end - row_indices.begin;
-  const size_t rest = nrows % K;
+  const size_t rest = nrows % kUnroll;
+
+#if defined(_OPENMP)
+  const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
+#endif

  #pragma omp parallel for num_threads(nthread) schedule(guided)
  for (bst_omp_uint bid = 0; bid < nblock; ++bid) {
    auto gmat = gmatb[bid];

-    for (size_t i = 0; i < nrows - rest; i += K) {
-      size_t rid[K];
-      size_t ibegin[K];
-      size_t iend[K];
-      bst_gpair stat[K];
-      for (int k = 0; k < K; ++k) {
+    for (size_t i = 0; i < nrows - rest; i += kUnroll) {
+      size_t rid[kUnroll];
+      size_t ibegin[kUnroll];
+      size_t iend[kUnroll];
+      GradientPair stat[kUnroll];
+      for (int k = 0; k < kUnroll; ++k) {
        rid[k] = row_indices.begin[i + k];
      }
-      for (int k = 0; k < K; ++k) {
+      for (int k = 0; k < kUnroll; ++k) {
        ibegin[k] = gmat.row_ptr[rid[k]];
        iend[k] = gmat.row_ptr[rid[k] + 1];
      }
-      for (int k = 0; k < K; ++k) {
+      for (int k = 0; k < kUnroll; ++k) {
        stat[k] = gpair[rid[k]];
      }
-      for (int k = 0; k < K; ++k) {
+      for (int k = 0; k < kUnroll; ++k) {
        for (size_t j = ibegin[k]; j < iend[k]; ++j) {
          const uint32_t bin = gmat.index[j];
          hist.begin[bin].Add(stat[k]);
@@ -497,7 +500,7 @@ void GHistBuilder::BuildBlockHist(const std::vector<bst_gpair>& gpair,
      const size_t rid = row_indices.begin[i];
      const size_t ibegin = gmat.row_ptr[rid];
      const size_t iend = gmat.row_ptr[rid + 1];
-      const bst_gpair stat = gpair[rid];
+      const GradientPair stat = gpair[rid];
      for (size_t j = ibegin; j < iend; ++j) {
        const uint32_t bin = gmat.index[j];
        hist.begin[bin].Add(stat);
@@ -507,21 +510,26 @@ void GHistBuilder::BuildBlockHist(const std::vector<bst_gpair>& gpair,
 }

 void GHistBuilder::SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow parent) {
-  const bst_omp_uint nthread = static_cast<bst_omp_uint>(this->nthread_);
  const uint32_t nbins = static_cast<bst_omp_uint>(nbins_);
-  const int K = 8;  // loop unrolling factor
-  const uint32_t rest = nbins % K;
+  constexpr int kUnroll = 8;  // loop unrolling factor
+  const uint32_t rest = nbins % kUnroll;
+
+#if defined(_OPENMP)
+  const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
+#endif
+
  #pragma omp parallel for num_threads(nthread) schedule(static)
-  for (bst_omp_uint bin_id = 0; bin_id < static_cast<bst_omp_uint>(nbins - rest); bin_id += K) {
-    GHistEntry pb[K];
-    GHistEntry sb[K];
-    for (int k = 0; k < K; ++k) {
+  for (bst_omp_uint bin_id = 0;
+       bin_id < static_cast<bst_omp_uint>(nbins - rest); bin_id += kUnroll) {
+    GHistEntry pb[kUnroll];
+    GHistEntry sb[kUnroll];
+    for (int k = 0; k < kUnroll; ++k) {
      pb[k] = parent.begin[bin_id + k];
    }
-    for (int k = 0; k < K; ++k) {
+    for (int k = 0; k < kUnroll; ++k) {
      sb[k] = sibling.begin[bin_id + k];
    }
-    for (int k = 0; k < K; ++k) {
+    for (int k = 0; k < kUnroll; ++k) {
      self.begin[bin_id + k].SetSubtract(pb[k], sb[k]);
    }
  }
--- a/src/common/hist_util.h
+++ b/src/common/hist_util.h
@@ -13,26 +13,26 @@
 #include "row_set.h"
 #include "../tree/fast_hist_param.h"

-using xgboost::tree::FastHistParam;
-
 namespace xgboost {
 namespace common {

+using tree::FastHistParam;
+
 /*! \brief sums of gradient statistics corresponding to a histogram bin */
 struct GHistEntry {
  /*! \brief sum of first-order gradient statistics */
-  double sum_grad;
+  double sum_grad{0};
  /*! \brief sum of second-order gradient statistics */
-  double sum_hess;
+  double sum_hess{0};

-  GHistEntry() : sum_grad(0), sum_hess(0) {}
+  GHistEntry()  = default;

  inline void Clear() {
    sum_grad = sum_hess = 0;
  }

-  /*! \brief add a bst_gpair to the sum */
-  inline void Add(const bst_gpair& e) {
+  /*! \brief add a GradientPair to the sum */
+  inline void Add(const GradientPair& e) {
    sum_grad += e.GetGrad();
    sum_hess += e.GetHess();
  }
@@ -58,7 +58,7 @@ struct HistCutUnit {
  /*! \brief number of cutting point, containing the maximum point */
  uint32_t size;
  // default constructor
-  HistCutUnit() {}
+  HistCutUnit() = default;
  // constructor
  HistCutUnit(const bst_float* cut, uint32_t size)
      : cut(cut), size(size) {}
@@ -74,8 +74,8 @@ struct HistCutMatrix {
  std::vector<bst_float> cut;
  /*! \brief Get histogram bound for fid */
  inline HistCutUnit operator[](bst_uint fid) const {
-    return HistCutUnit(dmlc::BeginPtr(cut) + row_ptr[fid],
-                       row_ptr[fid + 1] - row_ptr[fid]);
+    return {dmlc::BeginPtr(cut) + row_ptr[fid],
+                       row_ptr[fid + 1] - row_ptr[fid]};
  }
  // create histogram cut matrix given statistics from data
  // using approximate quantile sketch approach
@@ -92,7 +92,7 @@ struct GHistIndexRow {
  const uint32_t* index;
  /*! \brief The size of the histogram */
  size_t size;
-  GHistIndexRow() {}
+  GHistIndexRow() = default;
  GHistIndexRow(const uint32_t* index, size_t size)
      : index(index), size(size) {}
 };
@@ -115,7 +115,7 @@ struct GHistIndexMatrix {
  void Init(DMatrix* p_fmat);
  // get i-th row
  inline GHistIndexRow operator[](size_t i) const {
-    return GHistIndexRow(&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]);
+    return {&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]};
  }
  inline void GetFeatureCounts(size_t* counts) const {
    auto nfeature = cut->row_ptr.size() - 1;
@@ -141,7 +141,7 @@ struct GHistIndexBlock {

  // get i-th row
  inline GHistIndexRow operator[](size_t i) const {
-    return GHistIndexRow(&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]);
+    return {&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]};
  }
 };

@@ -154,24 +154,24 @@ class GHistIndexBlockMatrix {
            const FastHistParam& param);

  inline GHistIndexBlock operator[](size_t i) const {
-    return GHistIndexBlock(blocks[i].row_ptr_begin, blocks[i].index_begin);
+    return {blocks_[i].row_ptr_begin, blocks_[i].index_begin};
  }

  inline size_t GetNumBlock() const {
-    return blocks.size();
+    return blocks_.size();
  }

 private:
-  std::vector<size_t> row_ptr;
-  std::vector<uint32_t> index;
-  const HistCutMatrix* cut;
+  std::vector<size_t> row_ptr_;
+  std::vector<uint32_t> index_;
+  const HistCutMatrix* cut_;
  struct Block {
    const size_t* row_ptr_begin;
    const size_t* row_ptr_end;
    const uint32_t* index_begin;
    const uint32_t* index_end;
  };
-  std::vector<Block> blocks;
+  std::vector<Block> blocks_;
 };

 /*!
@@ -186,7 +186,7 @@ struct GHistRow {
  /*! \brief number of entries */
  uint32_t size;

-  GHistRow() {}
+  GHistRow() = default;
  GHistRow(GHistEntry* begin, uint32_t size)
    : begin(begin), size(size) {}
 };
@@ -198,15 +198,15 @@ class HistCollection {
 public:
  // access histogram for i-th node
  inline GHistRow operator[](bst_uint nid) const {
-    const uint32_t kMax = std::numeric_limits<uint32_t>::max();
+    constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
    CHECK_NE(row_ptr_[nid], kMax);
-    return GHistRow(const_cast<GHistEntry*>(dmlc::BeginPtr(data_) + row_ptr_[nid]), nbins_);
+    return {const_cast<GHistEntry*>(dmlc::BeginPtr(data_) + row_ptr_[nid]), nbins_};
  }

  // have we computed a histogram for i-th node?
  inline bool RowExists(bst_uint nid) const {
-    const uint32_t kMax = std::numeric_limits<uint32_t>::max();
-    return (nid < row_ptr_.size() && row_ptr_[nid] != kMax);
+    const uint32_t k_max = std::numeric_limits<uint32_t>::max();
+    return (nid < row_ptr_.size() && row_ptr_[nid] != k_max);
  }

  // initialize histogram collection
@@ -218,7 +218,7 @@ class HistCollection {

  // create an empty histogram for i-th node
  inline void AddHistRow(bst_uint nid) {
-    const uint32_t kMax = std::numeric_limits<uint32_t>::max();
+    constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
    if (nid >= row_ptr_.size()) {
      row_ptr_.resize(nid + 1, kMax);
    }
@@ -250,13 +250,13 @@ class GHistBuilder {
  }

  // construct a histogram via histogram aggregation
-  void BuildHist(const std::vector<bst_gpair>& gpair,
+  void BuildHist(const std::vector<GradientPair>& gpair,
                 const RowSetCollection::Elem row_indices,
                 const GHistIndexMatrix& gmat,
                 const std::vector<bst_uint>& feat_set,
                 GHistRow hist);
  // same, with feature grouping
-  void BuildBlockHist(const std::vector<bst_gpair>& gpair,
+  void BuildBlockHist(const std::vector<GradientPair>& gpair,
                      const RowSetCollection::Elem row_indices,
                      const GHistIndexBlockMatrix& gmatb,
                      const std::vector<bst_uint>& feat_set,
--- a/src/common/host_device_vector.cc
+++ b/src/common/host_device_vector.cc
@@ -6,6 +6,8 @@
 // dummy implementation of HostDeviceVector in case CUDA is not used

 #include <xgboost/base.h>
+
+#include <utility>
 #include "./host_device_vector.h"

 namespace xgboost {
@@ -13,8 +15,8 @@ namespace xgboost {
 template <typename T>
 struct HostDeviceVectorImpl {
  explicit HostDeviceVectorImpl(size_t size, T v) : data_h_(size, v) {}
-  explicit HostDeviceVectorImpl(std::initializer_list<T> init) : data_h_(init) {}
-  explicit HostDeviceVectorImpl(const std::vector<T>& init) : data_h_(init) {}
+  HostDeviceVectorImpl(std::initializer_list<T> init) : data_h_(init) {}
+  explicit HostDeviceVectorImpl(std::vector<T>  init) : data_h_(std::move(init)) {}
  std::vector<T> data_h_;
 };

@@ -43,25 +45,25 @@ HostDeviceVector<T>::~HostDeviceVector() {
 }

 template <typename T>
-size_t HostDeviceVector<T>::size() const { return impl_->data_h_.size(); }
+size_t HostDeviceVector<T>::Size() const { return impl_->data_h_.size(); }

 template <typename T>
-int HostDeviceVector<T>::device() const { return -1; }
+int HostDeviceVector<T>::DeviceIdx() const { return -1; }

 template <typename T>
-T* HostDeviceVector<T>::ptr_d(int device) { return nullptr; }
+T* HostDeviceVector<T>::DevicePointer(int device) { return nullptr; }

 template <typename T>
-std::vector<T>& HostDeviceVector<T>::data_h() { return impl_->data_h_; }
+std::vector<T>& HostDeviceVector<T>::HostVector() { return impl_->data_h_; }

 template <typename T>
-void HostDeviceVector<T>::resize(size_t new_size, T v, int new_device) {
+void HostDeviceVector<T>::Resize(size_t new_size, T v, int new_device) {
  impl_->data_h_.resize(new_size, v);
 }

 // explicit instantiations are required, as HostDeviceVector isn't header-only
 template class HostDeviceVector<bst_float>;
-template class HostDeviceVector<bst_gpair>;
+template class HostDeviceVector<GradientPair>;

 }  // namespace xgboost

--- a/src/common/host_device_vector.cu
+++ b/src/common/host_device_vector.cu
@@ -35,27 +35,27 @@ struct HostDeviceVectorImpl {
  void operator=(const HostDeviceVectorImpl<T>&) = delete;
  void operator=(HostDeviceVectorImpl<T>&&) = delete;

-  size_t size() const { return on_d_ ? data_d_.size() : data_h_.size(); }
+  size_t Size() const { return on_d_ ? data_d_.size() : data_h_.size(); }

-  int device() const { return device_; }
+  int DeviceIdx() const { return device_; }

-  T* ptr_d(int device) {
-    lazy_sync_device(device);
+  T* DevicePointer(int device) {
+    LazySyncDevice(device);
    return data_d_.data().get();
  }
-  thrust::device_ptr<T> tbegin(int device) {
-    return thrust::device_ptr<T>(ptr_d(device));
+  thrust::device_ptr<T> tbegin(int device) {  // NOLINT
+    return thrust::device_ptr<T>(DevicePointer(device));
  }
-  thrust::device_ptr<T> tend(int device) {
+  thrust::device_ptr<T> tend(int device) {  // NOLINT
    auto begin = tbegin(device);
-    return begin + size();
+    return begin + Size();
  }
-  std::vector<T>& data_h() {
-    lazy_sync_host();
+  std::vector<T>& HostVector() {
+    LazySyncHost();
    return data_h_;
  }
-  void resize(size_t new_size, T v, int new_device) {
-    if (new_size == this->size() && new_device == device_)
+  void Resize(size_t new_size, T v, int new_device) {
+    if (new_size == this->Size() && new_device == device_)
      return;
    if (new_device != -1)
      device_ = new_device;
@@ -70,26 +70,26 @@ struct HostDeviceVectorImpl {
    }
  }

-  void lazy_sync_host() {
+  void LazySyncHost() {
    if (!on_d_)
      return;
-    if (data_h_.size() != this->size())
-      data_h_.resize(this->size());
+    if (data_h_.size() != this->Size())
+      data_h_.resize(this->Size());
    dh::safe_cuda(cudaSetDevice(device_));
    thrust::copy(data_d_.begin(), data_d_.end(), data_h_.begin());
    on_d_ = false;
  }

-  void lazy_sync_device(int device) {
+  void LazySyncDevice(int device) {
    if (on_d_)
      return;
    if (device != device_) {
      CHECK_EQ(device_, -1);
      device_ = device;
    }
-    if (data_d_.size() != this->size()) {
+    if (data_d_.size() != this->Size()) {
      dh::safe_cuda(cudaSetDevice(device_));
-      data_d_.resize(this->size());
+      data_d_.resize(this->Size());
    }
    dh::safe_cuda(cudaSetDevice(device_));
    thrust::copy(data_h_.begin(), data_h_.end(), data_d_.begin());
@@ -128,34 +128,34 @@ HostDeviceVector<T>::~HostDeviceVector() {
 }

 template <typename T>
-size_t HostDeviceVector<T>::size() const { return impl_->size(); }
+size_t HostDeviceVector<T>::Size() const { return impl_->Size(); }

 template <typename T>
-int HostDeviceVector<T>::device() const { return impl_->device(); }
+int HostDeviceVector<T>::DeviceIdx() const { return impl_->DeviceIdx(); }

 template <typename T>
-T* HostDeviceVector<T>::ptr_d(int device) { return impl_->ptr_d(device); }
+T* HostDeviceVector<T>::DevicePointer(int device) { return impl_->DevicePointer(device); }

 template <typename T>
-thrust::device_ptr<T> HostDeviceVector<T>::tbegin(int device) {
+thrust::device_ptr<T> HostDeviceVector<T>::tbegin(int device) {  // NOLINT
  return impl_->tbegin(device);
 }

 template <typename T>
-thrust::device_ptr<T> HostDeviceVector<T>::tend(int device) {
+thrust::device_ptr<T> HostDeviceVector<T>::tend(int device) {  // NOLINT
  return impl_->tend(device);
 }

 template <typename T>
-std::vector<T>& HostDeviceVector<T>::data_h() { return impl_->data_h(); }
+std::vector<T>& HostDeviceVector<T>::HostVector() { return impl_->HostVector(); }

 template <typename T>
-void HostDeviceVector<T>::resize(size_t new_size, T v, int new_device) {
-  impl_->resize(new_size, v, new_device);
+void HostDeviceVector<T>::Resize(size_t new_size, T v, int new_device) {
+  impl_->Resize(new_size, v, new_device);
 }

 // explicit instantiations are required, as HostDeviceVector isn't header-only
 template class HostDeviceVector<bst_float>;
-template class HostDeviceVector<bst_gpair>;
+template class HostDeviceVector<GradientPair>;

 }  // namespace xgboost
--- a/src/common/host_device_vector.h
+++ b/src/common/host_device_vector.h
@@ -70,10 +70,10 @@ class HostDeviceVector {
  HostDeviceVector(HostDeviceVector<T>&&) = delete;
  void operator=(const HostDeviceVector<T>&) = delete;
  void operator=(HostDeviceVector<T>&&) = delete;
-  size_t size() const;
-  int device() const;
-  T* ptr_d(int device);
-  T* ptr_h() { return data_h().data(); }
+  size_t Size() const;
+  int DeviceIdx() const;
+  T* DevicePointer(int device);
+  T* HostPointer() { return HostVector().data(); }

  // only define functions returning device_ptr
  // if HostDeviceVector.h is included from a .cu file
@@ -82,10 +82,10 @@ class HostDeviceVector {
  thrust::device_ptr<T> tend(int device);
 #endif

-  std::vector<T>& data_h();
+  std::vector<T>& HostVector();

  // passing in new_device == -1 keeps the device as is
-  void resize(size_t new_size, T v = T(), int new_device = -1);
+  void Resize(size_t new_size, T v = T(), int new_device = -1);

 private:
  HostDeviceVectorImpl<T>* impl_;
--- a/src/common/io.h
+++ b/src/common/io.h
@@ -15,8 +15,8 @@

 namespace xgboost {
 namespace common {
-typedef rabit::utils::MemoryFixSizeBuffer MemoryFixSizeBuffer;
-typedef rabit::utils::MemoryBufferStream MemoryBufferStream;
+using MemoryFixSizeBuffer = rabit::utils::MemoryFixSizeBuffer;
+using MemoryBufferStream = rabit::utils::MemoryBufferStream;

 /*!
 * \brief Input stream that support additional PeekRead
--- a/src/common/math.h
+++ b/src/common/math.h
@@ -39,12 +39,12 @@ inline void Softmax(std::vector<float>* p_rec) {
    wmax = std::max(rec[i], wmax);
  }
  double wsum = 0.0f;
-  for (size_t i = 0; i < rec.size(); ++i) {
-    rec[i] = std::exp(rec[i] - wmax);
-    wsum += rec[i];
+  for (float & elem : rec) {
+    elem = std::exp(elem - wmax);
+    wsum += elem;
  }
-  for (size_t i = 0; i < rec.size(); ++i) {
-    rec[i] /= static_cast<float>(wsum);
+  for (float & elem : rec) {
+    elem /= static_cast<float>(wsum);
  }
 }

--- a/src/common/quantile.h
+++ b/src/common/quantile.h
@@ -35,7 +35,7 @@ struct WQSummary {
    /*! \brief the value of data */
    DType value;
    // constructor
-    Entry() {}
+    Entry() = default;
    // constructor
    Entry(RType rmin, RType rmax, RType wmin, DType value)
        : rmin(rmin), rmax(rmax), wmin(wmin), value(value) {}
@@ -48,11 +48,11 @@ struct WQSummary {
      CHECK(rmax- rmin - wmin > -eps) <<  "relation constraint: min/max";
    }
    /*! \return rmin estimation for v strictly bigger than value */
-    inline RType rmin_next() const {
+    inline RType RMinNext() const {
      return rmin + wmin;
    }
    /*! \return rmax estimation for v strictly smaller than value */
-    inline RType rmax_prev() const {
+    inline RType RMaxPrev() const {
      return rmax - wmin;
    }
  };
@@ -65,7 +65,7 @@ struct WQSummary {
      // weight of instance
      RType weight;
      // default constructor
-      QEntry() {}
+      QEntry() = default;
      // constructor
      QEntry(DType value, RType weight)
          : value(value), weight(weight) {}
@@ -116,7 +116,7 @@ struct WQSummary {
  inline RType MaxError() const {
    RType res = data[0].rmax - data[0].rmin - data[0].wmin;
    for (size_t i = 1; i < size; ++i) {
-      res = std::max(data[i].rmax_prev() - data[i - 1].rmin_next(), res);
+      res = std::max(data[i].RMaxPrev() - data[i - 1].RMinNext(), res);
      res = std::max(data[i].rmax - data[i].rmin - data[i].wmin, res);
    }
    return res;
@@ -140,8 +140,8 @@ struct WQSummary {
      if (istart == 0) {
        return Entry(0.0f, 0.0f, 0.0f, qvalue);
      } else {
-        return Entry(data[istart - 1].rmin_next(),
-                     data[istart].rmax_prev(),
+        return Entry(data[istart - 1].RMinNext(),
+                     data[istart].RMaxPrev(),
                     0.0f, qvalue);
      }
    }
@@ -197,7 +197,7 @@ struct WQSummary {
      while (i < src.size - 1
             && dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i;
      CHECK(i != src.size - 1);
-      if (dx2 < src.data[i].rmin_next() + src.data[i + 1].rmax_prev()) {
+      if (dx2 < src.data[i].RMinNext() + src.data[i + 1].RMaxPrev()) {
        if (i != lastidx) {
          data[size++] = src.data[i]; lastidx = i;
        }
@@ -236,20 +236,20 @@ struct WQSummary {
        *dst = Entry(a->rmin + b->rmin,
                     a->rmax + b->rmax,
                     a->wmin + b->wmin, a->value);
-        aprev_rmin = a->rmin_next();
-        bprev_rmin = b->rmin_next();
+        aprev_rmin = a->RMinNext();
+        bprev_rmin = b->RMinNext();
        ++dst; ++a; ++b;
      } else if (a->value < b->value) {
        *dst = Entry(a->rmin + bprev_rmin,
-                     a->rmax + b->rmax_prev(),
+                     a->rmax + b->RMaxPrev(),
                     a->wmin, a->value);
-        aprev_rmin = a->rmin_next();
+        aprev_rmin = a->RMinNext();
        ++dst; ++a;
      } else {
        *dst = Entry(b->rmin + aprev_rmin,
-                     b->rmax + a->rmax_prev(),
+                     b->rmax + a->RMaxPrev(),
                     b->wmin, b->value);
-        bprev_rmin = b->rmin_next();
+        bprev_rmin = b->RMinNext();
        ++dst; ++b;
      }
    }
@@ -307,7 +307,7 @@ struct WQSummary {
        data[i].rmax = prev_rmax;
        *err_maxgap = std::max(*err_maxgap, prev_rmax - data[i].rmax);
      }
-      RType rmin_next = data[i].rmin_next();
+      RType rmin_next = data[i].RMinNext();
      if (data[i].rmax < rmin_next) {
        data[i].rmax = rmin_next;
        *err_wgap = std::max(*err_wgap, data[i].rmax - rmin_next);
@@ -334,13 +334,13 @@ struct WQSummary {
 template<typename DType, typename RType>
 struct WXQSummary : public WQSummary<DType, RType> {
  // redefine entry type
-  typedef typename WQSummary<DType, RType>::Entry Entry;
+  using Entry = typename WQSummary<DType, RType>::Entry;
  // constructor
  WXQSummary(Entry *data, size_t size)
      : WQSummary<DType, RType>(data, size) {}
  // check if the block is large chunk
  inline static bool CheckLarge(const Entry &e, RType chunk) {
-    return  e.rmin_next() > e.rmax_prev() + chunk;
+    return  e.RMinNext() > e.RMaxPrev() + chunk;
  }
  // set prune
  inline void SetPrune(const WQSummary<DType, RType> &src, size_t maxsize) {
@@ -377,13 +377,13 @@ struct WXQSummary : public WQSummary<DType, RType> {
        if (CheckLarge(src.data[i], chunk)) {
          if (bid != i - 1) {
            // accumulate the range of the rest points
-            mrange += src.data[i].rmax_prev() - src.data[bid].rmin_next();
+            mrange += src.data[i].RMaxPrev() - src.data[bid].RMinNext();
          }
          bid = i; ++nbig;
        }
      }
      if (bid != src.size - 2) {
-        mrange += src.data[src.size-1].rmax_prev() - src.data[bid].rmin_next();
+        mrange += src.data[src.size-1].RMaxPrev() - src.data[bid].RMinNext();
      }
    }
    // assert: there cannot be more than n big data points
@@ -405,14 +405,14 @@ struct WXQSummary : public WQSummary<DType, RType> {
      if (end == src.size - 1 || CheckLarge(src.data[end], chunk)) {
        if (bid != end - 1) {
          size_t i = bid;
-          RType maxdx2 = src.data[end].rmax_prev() * 2;
+          RType maxdx2 = src.data[end].RMaxPrev() * 2;
          for (; k < n; ++k) {
            RType dx2 =  2 * ((k * mrange) / n + begin);
            if (dx2 >= maxdx2) break;
            while (i < end &&
                   dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i;
            if (i == end) break;
-            if (dx2 < src.data[i].rmin_next() + src.data[i + 1].rmax_prev()) {
+            if (dx2 < src.data[i].RMinNext() + src.data[i + 1].RMaxPrev()) {
              if (i != lastidx) {
                this->data[this->size++] = src.data[i]; lastidx = i;
              }
@@ -429,7 +429,7 @@ struct WXQSummary : public WQSummary<DType, RType> {
        }
        bid = end;
        // shift base by the gap
-        begin += src.data[bid].rmin_next() - src.data[bid].rmax_prev();
+        begin += src.data[bid].RMinNext() - src.data[bid].RMaxPrev();
      }
    }
  }
@@ -448,7 +448,7 @@ struct GKSummary {
    /*! \brief the value of data */
    DType value;
    // constructor
-    Entry() {}
+    Entry() = default;
    // constructor
    Entry(RType rmin, RType rmax, DType value)
        : rmin(rmin), rmax(rmax), value(value) {}
@@ -591,17 +591,17 @@ template<typename DType, typename RType, class TSummary>
 class QuantileSketchTemplate {
 public:
  /*! \brief type of summary type */
-  typedef TSummary Summary;
+  using Summary = TSummary;
  /*! \brief the entry type */
-  typedef typename Summary::Entry Entry;
+  using Entry = typename Summary::Entry;
  /*! \brief same as summary, but use STL to backup the space */
  struct SummaryContainer : public Summary {
    std::vector<Entry> space;
-    SummaryContainer(const SummaryContainer &src) : Summary(NULL, src.size) {
+    SummaryContainer(const SummaryContainer &src) : Summary(nullptr, src.size) {
      this->space = src.space;
      this->data = dmlc::BeginPtr(this->space);
    }
-    SummaryContainer() : Summary(NULL, 0) {
+    SummaryContainer() : Summary(nullptr, 0) {
    }
    /*! \brief reserve space for summary */
    inline void Reserve(size_t size) {
@@ -775,7 +775,7 @@ class QuantileSketchTemplate {
  inline void InitLevel(size_t nlevel) {
    if (level.size() >= nlevel) return;
    data.resize(limit_size * nlevel);
-    level.resize(nlevel, Summary(NULL, 0));
+    level.resize(nlevel, Summary(nullptr, 0));
    for (size_t l = 0; l < level.size(); ++l) {
      level[l].data = dmlc::BeginPtr(data) + l * limit_size;
    }
--- a/src/common/random.h
+++ b/src/common/random.h
@@ -15,7 +15,7 @@ namespace common {
 /*!
 * \brief Define mt19937 as default type Random Engine.
 */
-typedef std::mt19937 RandomEngine;
+using RandomEngine = std::mt19937;

 #if XGBOOST_CUSTOMIZE_GLOBAL_PRNG
 /*!
@@ -56,7 +56,7 @@ typedef CustomGlobalRandomEngine GlobalRandomEngine;
 /*!
 * \brief global random engine
 */
-typedef RandomEngine GlobalRandomEngine;
+using GlobalRandomEngine = RandomEngine;
 #endif

 /*!
--- a/src/common/row_set.h
+++ b/src/common/row_set.h
@@ -21,18 +21,18 @@ class RowSetCollection {
   *  rows (instances) associated with a particular node in a decision
   *  tree. */
  struct Elem {
-    const size_t* begin;
-    const size_t* end;
-    int node_id;
+    const size_t* begin{nullptr};
+    const size_t* end{nullptr};
+    int node_id{-1};
      // id of node associated with this instance set; -1 means uninitialized
-    Elem(void)
-        : begin(nullptr), end(nullptr), node_id(-1) {}
+    Elem()
+         = default;
    Elem(const size_t* begin,
         const size_t* end,
         int node_id)
        : begin(begin), end(end), node_id(node_id) {}

-    inline size_t size() const {
+    inline size_t Size() const {
      return end - begin;
    }
  };
@@ -42,11 +42,11 @@ class RowSetCollection {
    std::vector<size_t> right;
  };

-  inline std::vector<Elem>::const_iterator begin() const {
+  inline std::vector<Elem>::const_iterator begin() const {  // NOLINT
    return elem_of_each_node_.begin();
  }

-  inline std::vector<Elem>::const_iterator end() const {
+  inline std::vector<Elem>::const_iterator end() const {  // NOLINT
    return elem_of_each_node_.end();
  }

@@ -88,7 +88,7 @@ class RowSetCollection {
                       unsigned left_node_id,
                       unsigned right_node_id) {
    const Elem e = elem_of_each_node_[node_id];
-    const bst_omp_uint nthread = static_cast<bst_omp_uint>(row_split_tloc.size());
+    const auto nthread = static_cast<bst_omp_uint>(row_split_tloc.size());
    CHECK(e.begin != nullptr);
    size_t* all_begin = dmlc::BeginPtr(row_indices_);
    size_t* begin = all_begin + (e.begin - all_begin);
--- a/src/common/timer.h
+++ b/src/common/timer.h
@@ -12,10 +12,10 @@
 namespace xgboost {
 namespace common {
 struct Timer {
-  typedef std::chrono::high_resolution_clock ClockT;
-  typedef std::chrono::high_resolution_clock::time_point TimePointT;
-  typedef std::chrono::high_resolution_clock::duration DurationT;
-  typedef std::chrono::duration<double> SecondsT;
+  using ClockT = std::chrono::high_resolution_clock;
+  using TimePointT = std::chrono::high_resolution_clock::time_point;
+  using DurationT = std::chrono::high_resolution_clock::duration;
+  using SecondsT = std::chrono::duration<double>;

  TimePointT start;
  DurationT elapsed;
@@ -70,7 +70,7 @@ struct Monitor {
    if (debug_verbose) {
 #ifdef __CUDACC__
 #include "device_helpers.cuh"
-      dh::synchronize_n_devices(dList.size(), dList);
+      dh::SynchronizeNDevices(dList.size(), dList);
 #endif
    }
    timer_map[name].Start();
@@ -80,7 +80,7 @@ struct Monitor {
    if (debug_verbose) {
 #ifdef __CUDACC__
 #include "device_helpers.cuh"
-      dh::synchronize_n_devices(dList.size(), dList);
+      dh::SynchronizeNDevices(dList.size(), dList);
 #endif
    }
    timer_map[name].Stop();