Clang-tidy static analysis (#3222)

* Clang-tidy static analysis * Modernise checks * Google coding standard checks * Identifier renaming according to Google style
2018-04-19 18:57:13 +12:00 · 2018-04-19 18:57:13 +12:00 · ccf80703ef
commit ccf80703ef
parent 3242b0a378
97 changed files with 3407 additions and 3354 deletions
--- a/.clang-tidy
+++ b/.clang-tidy
@ -0,0 +1,22 @@
+Checks: 'modernize-*,-modernize-make-*,-modernize-raw-string-literal,google-*,-google-default-arguments,-clang-diagnostic-#pragma-messages,readability-identifier-naming'
+CheckOptions:
+  - { key: readability-identifier-naming.ClassCase,           value: CamelCase  }
+  - { key: readability-identifier-naming.StructCase,          value: CamelCase  }
+  - { key: readability-identifier-naming.TypeAliasCase,          value: CamelCase  }
+  - { key: readability-identifier-naming.TypedefCase,          value: CamelCase  }
+  - { key: readability-identifier-naming.TypeTemplateParameterCase,          value: CamelCase  }
+  - { key: readability-identifier-naming.LocalVariableCase,          value: lower_case  }
+  - { key: readability-identifier-naming.MemberCase,          value: lower_case  }
+  - { key: readability-identifier-naming.PrivateMemberSuffix,           value: '_'  }
+  - { key: readability-identifier-naming.ProtectedMemberSuffix,           value: '_'  }
+  - { key: readability-identifier-naming.EnumCase,          value: CamelCase  }
+  - { key: readability-identifier-naming.EnumConstant,          value: CamelCase  }
+  - { key: readability-identifier-naming.EnumConstantPrefix,          value: k  }
+  - { key: readability-identifier-naming.GlobalConstantCase,          value: CamelCase  }
+  - { key: readability-identifier-naming.GlobalConstantPrefix,          value: k  }
+  - { key: readability-identifier-naming.StaticConstantCase,          value: CamelCase  }
+  - { key: readability-identifier-naming.StaticConstantPrefix,          value: k  }
+  - { key: readability-identifier-naming.ConstexprVariableCase,          value: CamelCase  }
+  - { key: readability-identifier-naming.ConstexprVariablePrefix,          value: k  }
+  - { key: readability-identifier-naming.FunctionCase,          value: CamelCase  }
+  - { key: readability-identifier-naming.NamespaceCase,       value: lower_case }
--- a/.travis.yml
+++ b/.travis.yml
@ -44,10 +44,12 @@ matrix:
 addons:
  apt:
    sources:
+      - llvm-toolchain-trusty-5.0
      - ubuntu-toolchain-r-test
      - george-edison55-precise-backports
    packages:
-      - cmake
+      - clang
+      - clang-tidy-5.0
      - cmake-data
      - doxygen
      - wget
--- a/include/xgboost/base.h
+++ b/include/xgboost/base.h
@ -81,20 +81,19 @@ namespace xgboost {
 * \brief unsigned integer type used in boost,
 *  used for feature index and row index.
 */
-typedef uint32_t bst_uint;
-typedef int32_t bst_int;
+using bst_uint = uint32_t;  // NOLINT
+using bst_int = int32_t;    // NOLINT
 /*! \brief long integers */
 typedef uint64_t bst_ulong;  // NOLINT(*)
 /*! \brief float type, used for storing statistics */
-typedef float bst_float;
-
+using bst_float = float;  // NOLINT

 namespace detail {
 /*! \brief Implementation of gradient statistics pair. Template specialisation
 * may be used to overload different gradients types e.g. low precision, high
 * precision, integer, floating point. */
 template <typename T>
-class bst_gpair_internal {
+class GradientPairInternal {
  /*! \brief gradient statistics */
  T grad_;
  /*! \brief second order gradient statistics */
@ -104,23 +103,23 @@ class bst_gpair_internal {
  XGBOOST_DEVICE void SetHess(float h) { hess_ = h; }

 public:
-  typedef T value_t;
+  using ValueT = T;

-  XGBOOST_DEVICE bst_gpair_internal() : grad_(0), hess_(0) {}
+  XGBOOST_DEVICE GradientPairInternal() : grad_(0), hess_(0) {}

-  XGBOOST_DEVICE bst_gpair_internal(float grad, float hess) {
+  XGBOOST_DEVICE GradientPairInternal(float grad, float hess) {
    SetGrad(grad);
    SetHess(hess);
  }

  // Copy constructor if of same value type
-  XGBOOST_DEVICE bst_gpair_internal(const bst_gpair_internal<T> &g)
-      : grad_(g.grad_), hess_(g.hess_) {}
+  XGBOOST_DEVICE GradientPairInternal(const GradientPairInternal<T> &g)
+      : grad_(g.grad_), hess_(g.hess_) {}  // NOLINT

  // Copy constructor if different value type - use getters and setters to
  // perform conversion
  template <typename T2>
-  XGBOOST_DEVICE bst_gpair_internal(const bst_gpair_internal<T2> &g) {
+  XGBOOST_DEVICE explicit GradientPairInternal(const GradientPairInternal<T2> &g) {
    SetGrad(g.GetGrad());
    SetHess(g.GetHess());
  }
@ -128,85 +127,85 @@ class bst_gpair_internal {
  XGBOOST_DEVICE float GetGrad() const { return grad_; }
  XGBOOST_DEVICE float GetHess() const { return hess_; }

-  XGBOOST_DEVICE bst_gpair_internal<T> &operator+=(
-      const bst_gpair_internal<T> &rhs) {
+  XGBOOST_DEVICE GradientPairInternal<T> &operator+=(
+      const GradientPairInternal<T> &rhs) {
    grad_ += rhs.grad_;
    hess_ += rhs.hess_;
    return *this;
  }

-  XGBOOST_DEVICE bst_gpair_internal<T> operator+(
-      const bst_gpair_internal<T> &rhs) const {
-    bst_gpair_internal<T> g;
+  XGBOOST_DEVICE GradientPairInternal<T> operator+(
+      const GradientPairInternal<T> &rhs) const {
+    GradientPairInternal<T> g;
    g.grad_ = grad_ + rhs.grad_;
    g.hess_ = hess_ + rhs.hess_;
    return g;
  }

-  XGBOOST_DEVICE bst_gpair_internal<T> &operator-=(
-      const bst_gpair_internal<T> &rhs) {
+  XGBOOST_DEVICE GradientPairInternal<T> &operator-=(
+      const GradientPairInternal<T> &rhs) {
    grad_ -= rhs.grad_;
    hess_ -= rhs.hess_;
    return *this;
  }

-  XGBOOST_DEVICE bst_gpair_internal<T> operator-(
-      const bst_gpair_internal<T> &rhs) const {
-    bst_gpair_internal<T> g;
+  XGBOOST_DEVICE GradientPairInternal<T> operator-(
+      const GradientPairInternal<T> &rhs) const {
+    GradientPairInternal<T> g;
    g.grad_ = grad_ - rhs.grad_;
    g.hess_ = hess_ - rhs.hess_;
    return g;
  }

-  XGBOOST_DEVICE bst_gpair_internal(int value) {
-    *this = bst_gpair_internal<T>(static_cast<float>(value),
+  XGBOOST_DEVICE explicit GradientPairInternal(int value) {
+    *this = GradientPairInternal<T>(static_cast<float>(value),
                                  static_cast<float>(value));
  }

  friend std::ostream &operator<<(std::ostream &os,
-                                  const bst_gpair_internal<T> &g) {
+                                  const GradientPairInternal<T> &g) {
    os << g.GetGrad() << "/" << g.GetHess();
    return os;
  }
 };

 template<>
-inline XGBOOST_DEVICE float bst_gpair_internal<int64_t>::GetGrad() const {
+inline XGBOOST_DEVICE float GradientPairInternal<int64_t>::GetGrad() const {
  return grad_ * 1e-4f;
 }
 template<>
-inline XGBOOST_DEVICE float bst_gpair_internal<int64_t>::GetHess() const {
+inline XGBOOST_DEVICE float GradientPairInternal<int64_t>::GetHess() const {
  return hess_ * 1e-4f;
 }
 template<>
-inline XGBOOST_DEVICE void bst_gpair_internal<int64_t>::SetGrad(float g) {
+inline XGBOOST_DEVICE void GradientPairInternal<int64_t>::SetGrad(float g) {
  grad_ = static_cast<int64_t>(std::round(g * 1e4));
 }
 template<>
-inline XGBOOST_DEVICE void bst_gpair_internal<int64_t>::SetHess(float h) {
+inline XGBOOST_DEVICE void GradientPairInternal<int64_t>::SetHess(float h) {
  hess_ = static_cast<int64_t>(std::round(h * 1e4));
 }

 }  // namespace detail

 /*! \brief gradient statistics pair usually needed in gradient boosting */
-typedef detail::bst_gpair_internal<float> bst_gpair;
+using GradientPair = detail::GradientPairInternal<float>;

 /*! \brief High precision gradient statistics pair */
-typedef detail::bst_gpair_internal<double> bst_gpair_precise;
+using GradientPairPrecise = detail::GradientPairInternal<double>;

 /*! \brief High precision gradient statistics pair with integer backed
 * storage. Operators are associative where floating point versions are not
 * associative. */
-typedef detail::bst_gpair_internal<int64_t> bst_gpair_integer;
+using GradientPairInteger = detail::GradientPairInternal<int64_t>;

 /*! \brief small eps gap for minimum split decision. */
-const bst_float rt_eps = 1e-6f;
+const bst_float kRtEps = 1e-6f;

 /*! \brief define unsigned long for openmp loop */
-typedef dmlc::omp_ulong omp_ulong;
+using omp_ulong = dmlc::omp_ulong;  // NOLINT
 /*! \brief define unsigned int for openmp loop */
-typedef dmlc::omp_uint bst_omp_uint;
+using bst_omp_uint = dmlc::omp_uint;  // NOLINT

 /*!
 * \brief define compatible keywords in g++
--- a/include/xgboost/c_api.h
+++ b/include/xgboost/c_api.h
@ -30,16 +30,16 @@ typedef uint64_t bst_ulong;  // NOLINT(*)


 /*! \brief handle to DMatrix */
-typedef void *DMatrixHandle;
+typedef void *DMatrixHandle;  // NOLINT(*)
 /*! \brief handle to Booster */
-typedef void *BoosterHandle;
+typedef void *BoosterHandle;  // NOLINT(*)
 /*! \brief handle to a data iterator */
-typedef void *DataIterHandle;
+typedef void *DataIterHandle;  // NOLINT(*)
 /*! \brief handle to a internal data holder. */
-typedef void *DataHolderHandle;
+typedef void *DataHolderHandle;  // NOLINT(*)

 /*! \brief Mini batch used in XGBoost Data Iteration */
-typedef struct {
+typedef struct {  // NOLINT(*)
  /*! \brief number of rows in the minibatch */
  size_t size;
  /*! \brief row pointer to the rows in the data */
@ -66,7 +66,7 @@ typedef struct {
 * \param handle The handle to the callback.
 * \param batch The data content to be set.
 */
-XGB_EXTERN_C typedef int XGBCallbackSetData(
+XGB_EXTERN_C typedef int XGBCallbackSetData(  // NOLINT(*)
    DataHolderHandle handle, XGBoostBatchCSR batch);

 /*!
@ -80,9 +80,8 @@ XGB_EXTERN_C typedef int XGBCallbackSetData(
 * \param set_function_handle The handle to be passed to set function.
 * \return 0 if we are reaching the end and batch is not returned.
 */
-XGB_EXTERN_C typedef int XGBCallbackDataIterNext(
-    DataIterHandle data_handle,
-    XGBCallbackSetData* set_function,
+XGB_EXTERN_C typedef int XGBCallbackDataIterNext(  // NOLINT(*)
+    DataIterHandle data_handle, XGBCallbackSetData *set_function,
    DataHolderHandle set_function_handle);

 /*!
@ -216,11 +215,9 @@ XGB_DLL int XGDMatrixCreateFromMat(const float *data,
 * \param nthread number of threads (up to maximum cores available, if <=0 use all cores)
 * \return 0 when success, -1 when failure happens
 */
-XGB_DLL int XGDMatrixCreateFromMat_omp(const float *data,
-                                       bst_ulong nrow,
-                                       bst_ulong ncol,
-                                       float missing,
-                                       DMatrixHandle *out,
+XGB_DLL int XGDMatrixCreateFromMat_omp(const float *data,  // NOLINT
+                                       bst_ulong nrow, bst_ulong ncol,
+                                       float missing, DMatrixHandle *out,
                                       int nthread);
 /*!
 * \brief create a new dmatrix from sliced content of existing matrix
--- a/include/xgboost/data.h
+++ b/include/xgboost/data.h
@ -30,44 +30,45 @@ enum DataType {
 /*!
 * \brief Meta information about dataset, always sit in memory.
 */
-struct MetaInfo {
+class MetaInfo {
+ public:
  /*! \brief number of rows in the data */
-  uint64_t num_row;
+  uint64_t num_row_{0};
  /*! \brief number of columns in the data */
-  uint64_t num_col;
+  uint64_t num_col_{0};
  /*! \brief number of nonzero entries in the data */
-  uint64_t num_nonzero;
+  uint64_t num_nonzero_{0};
  /*! \brief label of each instance */
-  std::vector<bst_float> labels;
+  std::vector<bst_float> labels_;
  /*!
   * \brief specified root index of each instance,
   *  can be used for multi task setting
   */
-  std::vector<bst_uint> root_index;
+  std::vector<bst_uint> root_index_;
  /*!
   * \brief the index of begin and end of a group
   *  needed when the learning task is ranking.
   */
-  std::vector<bst_uint> group_ptr;
+  std::vector<bst_uint> group_ptr_;
  /*! \brief weights of each instance, optional */
-  std::vector<bst_float> weights;
+  std::vector<bst_float> weights_;
  /*!
   * \brief initialized margins,
   * if specified, xgboost will start from this init margin
   * can be used to specify initial prediction to boost from.
   */
-  std::vector<bst_float> base_margin;
+  std::vector<bst_float> base_margin_;
  /*! \brief version flag, used to check version of this info */
  static const int kVersion = 1;
  /*! \brief default constructor */
-  MetaInfo() : num_row(0), num_col(0), num_nonzero(0) {}
+  MetaInfo()  = default;
  /*!
   * \brief Get weight of each instances.
   * \param i Instance index.
   * \return The weight.
   */
  inline bst_float GetWeight(size_t i) const {
-    return weights.size() != 0 ?  weights[i] : 1.0f;
+    return weights_.size() != 0 ?  weights_[i] : 1.0f;
  }
  /*!
   * \brief Get the root index of i-th instance.
@ -75,20 +76,20 @@ struct MetaInfo {
   * \return The pre-defined root index of i-th instance.
   */
  inline unsigned GetRoot(size_t i) const {
-    return root_index.size() != 0 ? root_index[i] : 0U;
+    return root_index_.size() != 0 ? root_index_[i] : 0U;
  }
  /*! \brief get sorted indexes (argsort) of labels by absolute value (used by cox loss) */
  inline const std::vector<size_t>& LabelAbsSort() const {
-    if (label_order_cache.size() == labels.size()) {
-      return label_order_cache;
+    if (label_order_cache_.size() == labels_.size()) {
+      return label_order_cache_;
    }
-    label_order_cache.resize(labels.size());
-    std::iota(label_order_cache.begin(), label_order_cache.end(), 0);
-    const auto l = labels;
-    XGBOOST_PARALLEL_SORT(label_order_cache.begin(), label_order_cache.end(),
+    label_order_cache_.resize(labels_.size());
+    std::iota(label_order_cache_.begin(), label_order_cache_.end(), 0);
+    const auto l = labels_;
+    XGBOOST_PARALLEL_SORT(label_order_cache_.begin(), label_order_cache_.end(),
              [&l](size_t i1, size_t i2) {return std::abs(l[i1]) < std::abs(l[i2]);});

-    return label_order_cache;
+    return label_order_cache_;
  }
  /*! \brief clear all the information */
  void Clear();
@ -113,7 +114,7 @@ struct MetaInfo {

 private:
  /*! \brief argsort of labels */
-  mutable std::vector<size_t> label_order_cache;
+  mutable std::vector<size_t> label_order_cache_;
 };

 /*! \brief read-only sparse instance batch in CSR format */
@ -125,7 +126,7 @@ struct SparseBatch {
    /*! \brief feature value */
    bst_float fvalue;
    /*! \brief default constructor */
-    Entry() {}
+    Entry() = default;
    /*!
     * \brief constructor with index and value
     * \param index The feature or row index.
@ -141,11 +142,11 @@ struct SparseBatch {
  /*! \brief an instance of sparse vector in the batch */
  struct Inst {
    /*! \brief pointer to the elements*/
-    const Entry *data;
+    const Entry *data{nullptr};
    /*! \brief length of the instance */
-    bst_uint length;
+    bst_uint length{0};
    /*! \brief constructor */
-    Inst() : data(0), length(0) {}
+    Inst()  = default;
    Inst(const Entry *data, bst_uint length) : data(data), length(length) {}
    /*! \brief get i-th pair in the sparse vector*/
    inline const Entry& operator[](size_t i) const {
@ -167,7 +168,7 @@ struct RowBatch : public SparseBatch {
  const Entry *data_ptr;
  /*! \brief get i-th row from the batch */
  inline Inst operator[](size_t i) const {
-    return Inst(data_ptr + ind_ptr[i], static_cast<bst_uint>(ind_ptr[i + 1] - ind_ptr[i]));
+    return {data_ptr + ind_ptr[i], static_cast<bst_uint>(ind_ptr[i + 1] - ind_ptr[i])};
  }
 };

@ -206,16 +207,16 @@ class DataSource : public dmlc::DataIter<RowBatch> {
 * \brief A vector-like structure to represent set of rows.
 * But saves the memory when all rows are in the set (common case in xgb)
 */
-struct RowSet {
+class RowSet {
 public:
  /*! \return i-th row index */
  inline bst_uint operator[](size_t i) const;
  /*! \return the size of the set. */
-  inline size_t size() const;
+  inline size_t Size() const;
  /*! \brief push the index back to the set */
-  inline void push_back(bst_uint i);
+  inline void PushBack(bst_uint i);
  /*! \brief clear the set */
-  inline void clear();
+  inline void Clear();
  /*!
   * \brief save rowset to file.
   * \param fo The file to be saved.
@ -228,11 +229,11 @@ struct RowSet {
   */
  inline bool Load(dmlc::Stream* fi);
  /*! \brief constructor */
-  RowSet() : size_(0) {}
+  RowSet()  = default;

 private:
  /*! \brief The internal data structure of size */
-  uint64_t size_;
+  uint64_t size_{0};
  /*! \brief The internal data structure of row set if not all*/
  std::vector<bst_uint> rows_;
 };
@ -250,11 +251,11 @@ struct RowSet {
 class DMatrix {
 public:
  /*! \brief default constructor */
-  DMatrix() : cache_learner_ptr_(nullptr) {}
+  DMatrix()  = default;
  /*! \brief meta information of the dataset */
-  virtual MetaInfo& info() = 0;
+  virtual MetaInfo& Info() = 0;
  /*! \brief meta information of the dataset */
-  virtual const MetaInfo& info() const = 0;
+  virtual const MetaInfo& Info() const = 0;
  /*!
   * \brief get the row iterator, reset to beginning position
   * \note Only either RowIterator or  column Iterator can be active.
@ -291,9 +292,9 @@ class DMatrix {
  /*! \brief get column density */
  virtual float GetColDensity(size_t cidx) const = 0;
  /*! \return reference of buffered rowset, in column access */
-  virtual const RowSet& buffered_rowset() const = 0;
+  virtual const RowSet& BufferedRowset() const = 0;
  /*! \brief virtual destructor */
-  virtual ~DMatrix() {}
+  virtual ~DMatrix() = default;
  /*!
   * \brief Save DMatrix to local file.
   *  The saved file only works for non-sharded dataset(single machine training).
@ -343,7 +344,7 @@ class DMatrix {
  // allow learner class to access this field.
  friend class LearnerImpl;
  /*! \brief public field to back ref cached matrix. */
-  LearnerImpl* cache_learner_ptr_;
+  LearnerImpl* cache_learner_ptr_{nullptr};
 };

 // implementation of inline functions
@ -351,15 +352,15 @@ inline bst_uint RowSet::operator[](size_t i) const {
  return rows_.size() == 0 ? static_cast<bst_uint>(i) : rows_[i];
 }

-inline size_t RowSet::size() const {
+inline size_t RowSet::Size() const {
  return size_;
 }

-inline void RowSet::clear() {
+inline void RowSet::Clear() {
  rows_.clear(); size_ = 0;
 }

-inline void RowSet::push_back(bst_uint i) {
+inline void RowSet::PushBack(bst_uint i) {
  if (rows_.size() == 0) {
    if (i == size_) {
      ++size_; return;
--- a/include/xgboost/feature_map.h
+++ b/include/xgboost/feature_map.h
@ -45,7 +45,7 @@ class FeatureMap {
   */
  inline void PushBack(int fid, const char *fname, const char *ftype) {
    CHECK_EQ(fid, static_cast<int>(names_.size()));
-    names_.push_back(std::string(fname));
+    names_.emplace_back(fname);
    types_.push_back(GetType(ftype));
  }
  /*! \brief clear the feature map */
@ -54,11 +54,11 @@ class FeatureMap {
    types_.clear();
  }
  /*! \return number of known features */
-  inline size_t size() const {
+  inline size_t Size() const {
    return names_.size();
  }
  /*! \return name of specific feature */
-  inline const char* name(size_t idx) const {
+  inline const char* Name(size_t idx) const {
    CHECK_LT(idx,  names_.size()) << "FeatureMap feature index exceed bound";
    return names_[idx].c_str();
  }
@ -75,7 +75,7 @@ class FeatureMap {
   * \return The translated type.
   */
  inline static Type GetType(const char* tname) {
-    using namespace std;
+    using std::strcmp;
    if (!strcmp("i", tname)) return kIndicator;
    if (!strcmp("q", tname)) return kQuantitive;
    if (!strcmp("int", tname)) return kInteger;
--- a/include/xgboost/gbm.h
+++ b/include/xgboost/gbm.h
@ -27,7 +27,7 @@ namespace xgboost {
 class GradientBooster {
 public:
  /*! \brief virtual destructor */
-  virtual ~GradientBooster() {}
+  virtual ~GradientBooster() = default;
  /*!
   * \brief set configuration from pair iterators.
   * \param begin The beginning iterator.
@ -69,7 +69,7 @@ class GradientBooster {
   * the booster may change content of gpair
   */
  virtual void DoBoost(DMatrix* p_fmat,
-                       HostDeviceVector<bst_gpair>* in_gpair,
+                       HostDeviceVector<GradientPair>* in_gpair,
                       ObjFunction* obj = nullptr) = 0;

  /*!
--- a/include/xgboost/learner.h
+++ b/include/xgboost/learner.h
@ -37,7 +37,7 @@ namespace xgboost {
 class Learner : public rabit::Serializable {
 public:
  /*! \brief virtual destructor */
-  virtual ~Learner() {}
+  ~Learner() override = default;
  /*!
   * \brief set configuration from pair iterators.
   * \param begin The beginning iterator.
@ -62,12 +62,12 @@ class Learner : public rabit::Serializable {
   * \brief load model from stream
   * \param fi input stream.
   */
-  virtual void Load(dmlc::Stream* fi) = 0;
+  void Load(dmlc::Stream* fi) override = 0;
  /*!
   * \brief save model to stream.
   * \param fo output stream
   */
-  virtual void Save(dmlc::Stream* fo) const = 0;
+  void Save(dmlc::Stream* fo) const override = 0;
  /*!
   * \brief update the model for one iteration
   *  With the specified objective function.
@ -84,7 +84,7 @@ class Learner : public rabit::Serializable {
   */
  virtual void BoostOneIter(int iter,
                            DMatrix* train,
-                            HostDeviceVector<bst_gpair>* in_gpair) = 0;
+                            HostDeviceVector<GradientPair>* in_gpair) = 0;
  /*!
   * \brief evaluate the model for specific iteration using the configured metrics.
   * \param iter iteration number
@ -194,7 +194,7 @@ inline void Learner::Predict(const SparseBatch::Inst& inst,
                             bool output_margin,
                             HostDeviceVector<bst_float>* out_preds,
                             unsigned ntree_limit) const {
-  gbm_->PredictInstance(inst, &out_preds->data_h(), ntree_limit);
+  gbm_->PredictInstance(inst, &out_preds->HostVector(), ntree_limit);
  if (!output_margin) {
    obj_->PredTransform(out_preds);
  }
--- a/include/xgboost/linear_updater.h
+++ b/include/xgboost/linear_updater.h
@ -19,7 +19,7 @@ namespace xgboost {
 class LinearUpdater {
 public:
  /*! \brief virtual destructor */
-  virtual ~LinearUpdater() {}
+  virtual ~LinearUpdater() = default;
  /*!
   * \brief Initialize the updater with given arguments.
   * \param args arguments to the objective function.
@ -36,7 +36,7 @@ class LinearUpdater {
   * \param sum_instance_weight The sum instance weights, used to normalise l1/l2 penalty.
   */

-  virtual void Update(std::vector<bst_gpair>* in_gpair, DMatrix* data,
+  virtual void Update(std::vector<GradientPair>* in_gpair, DMatrix* data,
                      gbm::GBLinearModel* model,
                      double sum_instance_weight) = 0;

--- a/include/xgboost/logging.h
+++ b/include/xgboost/logging.h
@ -21,7 +21,7 @@ class BaseLogger {
    log_stream_ << "[" << dmlc::DateLogger().HumanDate() << "] ";
 #endif
  }
-  std::ostream& stream() { return log_stream_; }
+  std::ostream& stream() { return log_stream_; }  // NOLINT

 protected:
  std::ostringstream log_stream_;
--- a/include/xgboost/metric.h
+++ b/include/xgboost/metric.h
@ -35,7 +35,7 @@ class Metric {
  /*! \return name of metric */
  virtual const char* Name() const = 0;
  /*! \brief virtual destructor */
-  virtual ~Metric() {}
+  virtual ~Metric() = default;
  /*!
   * \brief create a metric according to name.
   * \param name name of the metric.
--- a/include/xgboost/objective.h
+++ b/include/xgboost/objective.h
@ -23,7 +23,7 @@ namespace xgboost {
 class ObjFunction {
 public:
  /*! \brief virtual destructor */
-  virtual ~ObjFunction() {}
+  virtual ~ObjFunction() = default;
  /*!
   * \brief set configuration from pair iterators.
   * \param begin The beginning iterator.
@ -47,7 +47,7 @@ class ObjFunction {
  virtual void GetGradient(HostDeviceVector<bst_float>* preds,
                           const MetaInfo& info,
                           int iteration,
-                           HostDeviceVector<bst_gpair>* out_gpair) = 0;
+                           HostDeviceVector<GradientPair>* out_gpair) = 0;

  /*! \return the default evaluation metric for the objective */
  virtual const char* DefaultEvalMetric() const = 0;
--- a/include/xgboost/predictor.h
+++ b/include/xgboost/predictor.h
@ -36,7 +36,7 @@ namespace xgboost {

 class Predictor {
 public:
-  virtual ~Predictor() {}
+  virtual ~Predictor() = default;

  /**
   * \fn  virtual void Predictor::Init(const std::vector<std::pair<std::string,
--- a/include/xgboost/tree_model.h
+++ b/include/xgboost/tree_model.h
@ -71,70 +71,70 @@ template<typename TSplitCond, typename TNodeStat>
 class TreeModel {
 public:
  /*! \brief data type to indicate split condition */
-  typedef TNodeStat  NodeStat;
+  using NodeStat = TNodeStat;
  /*! \brief auxiliary statistics of node to help tree building */
-  typedef TSplitCond SplitCond;
+  using SplitCond = TSplitCond;
  /*! \brief tree node */
  class Node {
   public:
-    Node() : sindex_(0) {
+    Node()  {
      // assert compact alignment
      static_assert(sizeof(Node) == 4 * sizeof(int) + sizeof(Info),
                    "Node: 64 bit align");
    }
    /*! \brief index of left child */
-    inline int cleft() const {
+    inline int LeftChild() const {
      return this->cleft_;
    }
    /*! \brief index of right child */
-    inline int cright() const {
+    inline int RightChild() const {
      return this->cright_;
    }
    /*! \brief index of default child when feature is missing */
-    inline int cdefault() const {
-      return this->default_left() ? this->cleft() : this->cright();
+    inline int DefaultChild() const {
+      return this->DefaultLeft() ? this->LeftChild() : this->RightChild();
    }
    /*! \brief feature index of split condition */
-    inline unsigned split_index() const {
+    inline unsigned SplitIndex() const {
      return sindex_ & ((1U << 31) - 1U);
    }
    /*! \brief when feature is unknown, whether goes to left child */
-    inline bool default_left() const {
+    inline bool DefaultLeft() const {
      return (sindex_ >> 31) != 0;
    }
    /*! \brief whether current node is leaf node */
-    inline bool is_leaf() const {
+    inline bool IsLeaf() const {
      return cleft_ == -1;
    }
    /*! \return get leaf value of leaf node */
-    inline bst_float leaf_value() const {
+    inline bst_float LeafValue() const {
      return (this->info_).leaf_value;
    }
    /*! \return get split condition of the node */
-    inline TSplitCond split_cond() const {
+    inline TSplitCond SplitCond() const {
      return (this->info_).split_cond;
    }
    /*! \brief get parent of the node */
-    inline int parent() const {
+    inline int Parent() const {
      return parent_ & ((1U << 31) - 1);
    }
    /*! \brief whether current node is left child */
-    inline bool is_left_child() const {
+    inline bool IsLeftChild() const {
      return (parent_ & (1U << 31)) != 0;
    }
    /*! \brief whether this node is deleted */
-    inline bool is_deleted() const {
+    inline bool IsDeleted() const {
      return sindex_ == std::numeric_limits<unsigned>::max();
    }
    /*! \brief whether current node is root */
-    inline bool is_root() const {
+    inline bool IsRoot() const {
      return parent_ == -1;
    }
    /*!
     * \brief set the right child
     * \param nid node id to right child
     */
-    inline void set_right_child(int nid) {
+    inline void SetRightChild(int nid) {
      this->cright_ = nid;
    }
    /*!
@ -143,7 +143,7 @@ class TreeModel {
     * \param split_cond  split condition
     * \param default_left the default direction when feature is unknown
     */
-    inline void set_split(unsigned split_index, TSplitCond split_cond,
+    inline void SetSplit(unsigned split_index, TSplitCond split_cond,
                          bool default_left = false) {
      if (default_left) split_index |= (1U << 31);
      this->sindex_ = split_index;
@ -155,13 +155,13 @@ class TreeModel {
     * \param right right index, could be used to store
     *        additional information
     */
-    inline void set_leaf(bst_float value, int right = -1) {
+    inline void SetLeaf(bst_float value, int right = -1) {
      (this->info_).leaf_value = value;
      this->cleft_ = -1;
      this->cright_ = right;
    }
    /*! \brief mark that this node is deleted */
-    inline void mark_delete() {
+    inline void MarkDelete() {
      this->sindex_ = std::numeric_limits<unsigned>::max();
    }

@ -181,11 +181,11 @@ class TreeModel {
    // pointer to left, right
    int cleft_, cright_;
    // split feature index, left split or right split depends on the highest bit
-    unsigned sindex_;
+    unsigned sindex_{0};
    // extra info
    Info info_;
    // set parent
-    inline void set_parent(int pidx, bool is_left_child = true) {
+    inline void SetParent(int pidx, bool is_left_child = true) {
      if (is_left_child) pidx |= (1U << 31);
      this->parent_ = pidx;
    }
@ -193,35 +193,35 @@ class TreeModel {

 protected:
  // vector of nodes
-  std::vector<Node> nodes;
+  std::vector<Node> nodes_;
  // free node space, used during training process
-  std::vector<int>  deleted_nodes;
+  std::vector<int>  deleted_nodes_;
  // stats of nodes
-  std::vector<TNodeStat> stats;
+  std::vector<TNodeStat> stats_;
  // leaf vector, that is used to store additional information
-  std::vector<bst_float> leaf_vector;
+  std::vector<bst_float> leaf_vector_;
  // allocate a new node,
  // !!!!!! NOTE: may cause BUG here, nodes.resize
  inline int AllocNode() {
    if (param.num_deleted != 0) {
-      int nd = deleted_nodes.back();
-      deleted_nodes.pop_back();
+      int nd = deleted_nodes_.back();
+      deleted_nodes_.pop_back();
      --param.num_deleted;
      return nd;
    }
    int nd = param.num_nodes++;
    CHECK_LT(param.num_nodes, std::numeric_limits<int>::max())
        << "number of nodes in the tree exceed 2^31";
-    nodes.resize(param.num_nodes);
-    stats.resize(param.num_nodes);
-    leaf_vector.resize(param.num_nodes * param.size_leaf_vector);
+    nodes_.resize(param.num_nodes);
+    stats_.resize(param.num_nodes);
+    leaf_vector_.resize(param.num_nodes * param.size_leaf_vector);
    return nd;
  }
  // delete a tree node, keep the parent field to allow trace back
  inline void DeleteNode(int nid) {
    CHECK_GE(nid, param.num_roots);
-    deleted_nodes.push_back(nid);
-    nodes[nid].mark_delete();
+    deleted_nodes_.push_back(nid);
+    nodes_[nid].MarkDelete();
    ++param.num_deleted;
  }

@ -232,11 +232,11 @@ class TreeModel {
   * \param value new leaf value
   */
  inline void ChangeToLeaf(int rid, bst_float value) {
-    CHECK(nodes[nodes[rid].cleft() ].is_leaf());
-    CHECK(nodes[nodes[rid].cright()].is_leaf());
-    this->DeleteNode(nodes[rid].cleft());
-    this->DeleteNode(nodes[rid].cright());
-    nodes[rid].set_leaf(value);
+    CHECK(nodes_[nodes_[rid].LeftChild() ].IsLeaf());
+    CHECK(nodes_[nodes_[rid].RightChild()].IsLeaf());
+    this->DeleteNode(nodes_[rid].LeftChild());
+    this->DeleteNode(nodes_[rid].RightChild());
+    nodes_[rid].SetLeaf(value);
  }
  /*!
   * \brief collapse a non leaf node to a leaf node, delete its children
@ -244,12 +244,12 @@ class TreeModel {
   * \param value new leaf value
   */
  inline void CollapseToLeaf(int rid, bst_float value) {
-    if (nodes[rid].is_leaf()) return;
-    if (!nodes[nodes[rid].cleft() ].is_leaf()) {
-      CollapseToLeaf(nodes[rid].cleft(), 0.0f);
+    if (nodes_[rid].IsLeaf()) return;
+    if (!nodes_[nodes_[rid].LeftChild() ].IsLeaf()) {
+      CollapseToLeaf(nodes_[rid].LeftChild(), 0.0f);
    }
-    if (!nodes[nodes[rid].cright() ].is_leaf()) {
-      CollapseToLeaf(nodes[rid].cright(), 0.0f);
+    if (!nodes_[nodes_[rid].RightChild() ].IsLeaf()) {
+      CollapseToLeaf(nodes_[rid].RightChild(), 0.0f);
    }
    this->ChangeToLeaf(rid, value);
  }
@ -262,47 +262,47 @@ class TreeModel {
    param.num_nodes = 1;
    param.num_roots = 1;
    param.num_deleted = 0;
-    nodes.resize(1);
+    nodes_.resize(1);
  }
  /*! \brief get node given nid */
  inline Node& operator[](int nid) {
-    return nodes[nid];
+    return nodes_[nid];
  }
  /*! \brief get node given nid */
  inline const Node& operator[](int nid) const {
-    return nodes[nid];
+    return nodes_[nid];
  }

  /*! \brief get const reference to nodes */
-  inline const std::vector<Node>& GetNodes() const { return nodes; }
+  inline const std::vector<Node>& GetNodes() const { return nodes_; }

  /*! \brief get node statistics given nid */
-  inline NodeStat& stat(int nid) {
-    return stats[nid];
+  inline NodeStat& Stat(int nid) {
+    return stats_[nid];
  }
  /*! \brief get node statistics given nid */
-  inline const NodeStat& stat(int nid) const {
-    return stats[nid];
+  inline const NodeStat& Stat(int nid) const {
+    return stats_[nid];
  }
  /*! \brief get leaf vector given nid */
-  inline bst_float* leafvec(int nid) {
-    if (leaf_vector.size() == 0) return nullptr;
-    return& leaf_vector[nid * param.size_leaf_vector];
+  inline bst_float* Leafvec(int nid) {
+    if (leaf_vector_.size() == 0) return nullptr;
+    return& leaf_vector_[nid * param.size_leaf_vector];
  }
  /*! \brief get leaf vector given nid */
-  inline const bst_float* leafvec(int nid) const {
-    if (leaf_vector.size() == 0) return nullptr;
-    return& leaf_vector[nid * param.size_leaf_vector];
+  inline const bst_float* Leafvec(int nid) const {
+    if (leaf_vector_.size() == 0) return nullptr;
+    return& leaf_vector_[nid * param.size_leaf_vector];
  }
  /*! \brief initialize the model */
  inline void InitModel() {
    param.num_nodes = param.num_roots;
-    nodes.resize(param.num_nodes);
-    stats.resize(param.num_nodes);
-    leaf_vector.resize(param.num_nodes * param.size_leaf_vector, 0.0f);
+    nodes_.resize(param.num_nodes);
+    stats_.resize(param.num_nodes);
+    leaf_vector_.resize(param.num_nodes * param.size_leaf_vector, 0.0f);
    for (int i = 0; i < param.num_nodes; i ++) {
-      nodes[i].set_leaf(0.0f);
-      nodes[i].set_parent(-1);
+      nodes_[i].SetLeaf(0.0f);
+      nodes_[i].SetParent(-1);
    }
  }
  /*!
@ -311,35 +311,35 @@ class TreeModel {
   */
  inline void Load(dmlc::Stream* fi) {
    CHECK_EQ(fi->Read(&param, sizeof(TreeParam)), sizeof(TreeParam));
-    nodes.resize(param.num_nodes);
-    stats.resize(param.num_nodes);
+    nodes_.resize(param.num_nodes);
+    stats_.resize(param.num_nodes);
    CHECK_NE(param.num_nodes, 0);
-    CHECK_EQ(fi->Read(dmlc::BeginPtr(nodes), sizeof(Node) * nodes.size()),
-             sizeof(Node) * nodes.size());
-    CHECK_EQ(fi->Read(dmlc::BeginPtr(stats), sizeof(NodeStat) * stats.size()),
-             sizeof(NodeStat) * stats.size());
+    CHECK_EQ(fi->Read(dmlc::BeginPtr(nodes_), sizeof(Node) * nodes_.size()),
+             sizeof(Node) * nodes_.size());
+    CHECK_EQ(fi->Read(dmlc::BeginPtr(stats_), sizeof(NodeStat) * stats_.size()),
+             sizeof(NodeStat) * stats_.size());
    if (param.size_leaf_vector != 0) {
-      CHECK(fi->Read(&leaf_vector));
+      CHECK(fi->Read(&leaf_vector_));
    }
    // chg deleted nodes
-    deleted_nodes.resize(0);
+    deleted_nodes_.resize(0);
    for (int i = param.num_roots; i < param.num_nodes; ++i) {
-      if (nodes[i].is_deleted()) deleted_nodes.push_back(i);
+      if (nodes_[i].IsDeleted()) deleted_nodes_.push_back(i);
    }
-    CHECK_EQ(static_cast<int>(deleted_nodes.size()), param.num_deleted);
+    CHECK_EQ(static_cast<int>(deleted_nodes_.size()), param.num_deleted);
  }
  /*!
   * \brief save model to stream
   * \param fo output stream
   */
  inline void Save(dmlc::Stream* fo) const {
-    CHECK_EQ(param.num_nodes, static_cast<int>(nodes.size()));
-    CHECK_EQ(param.num_nodes, static_cast<int>(stats.size()));
+    CHECK_EQ(param.num_nodes, static_cast<int>(nodes_.size()));
+    CHECK_EQ(param.num_nodes, static_cast<int>(stats_.size()));
    fo->Write(&param, sizeof(TreeParam));
    CHECK_NE(param.num_nodes, 0);
-    fo->Write(dmlc::BeginPtr(nodes), sizeof(Node) * nodes.size());
-    fo->Write(dmlc::BeginPtr(stats), sizeof(NodeStat) * nodes.size());
-    if (param.size_leaf_vector != 0) fo->Write(leaf_vector);
+    fo->Write(dmlc::BeginPtr(nodes_), sizeof(Node) * nodes_.size());
+    fo->Write(dmlc::BeginPtr(stats_), sizeof(NodeStat) * nodes_.size());
+    if (param.size_leaf_vector != 0) fo->Write(leaf_vector_);
  }
  /*!
   * \brief add child nodes to node
@ -348,10 +348,10 @@ class TreeModel {
  inline void AddChilds(int nid) {
    int pleft  = this->AllocNode();
    int pright = this->AllocNode();
-    nodes[nid].cleft_  = pleft;
-    nodes[nid].cright_ = pright;
-    nodes[nodes[nid].cleft() ].set_parent(nid, true);
-    nodes[nodes[nid].cright()].set_parent(nid, false);
+    nodes_[nid].cleft_  = pleft;
+    nodes_[nid].cright_ = pright;
+    nodes_[nodes_[nid].LeftChild() ].SetParent(nid, true);
+    nodes_[nodes_[nid].RightChild()].SetParent(nid, false);
  }
  /*!
   * \brief only add a right child to a leaf node
@ -359,8 +359,8 @@ class TreeModel {
   */
  inline void AddRightChild(int nid) {
    int pright = this->AllocNode();
-    nodes[nid].right  = pright;
-    nodes[nodes[nid].right].set_parent(nid, false);
+    nodes_[nid].right  = pright;
+    nodes_[nodes_[nid].right].SetParent(nid, false);
  }
  /*!
   * \brief get current depth
@ -369,9 +369,9 @@ class TreeModel {
   */
  inline int GetDepth(int nid, bool pass_rchild = false) const {
    int depth = 0;
-    while (!nodes[nid].is_root()) {
-      if (!pass_rchild || nodes[nid].is_left_child()) ++depth;
-      nid = nodes[nid].parent();
+    while (!nodes_[nid].IsRoot()) {
+      if (!pass_rchild || nodes_[nid].IsLeftChild()) ++depth;
+      nid = nodes_[nid].Parent();
    }
    return depth;
  }
@ -380,9 +380,9 @@ class TreeModel {
   * \param nid node id
   */
  inline int MaxDepth(int nid) const {
-    if (nodes[nid].is_leaf()) return 0;
-    return std::max(MaxDepth(nodes[nid].cleft())+1,
-                     MaxDepth(nodes[nid].cright())+1);
+    if (nodes_[nid].IsLeaf()) return 0;
+    return std::max(MaxDepth(nodes_[nid].LeftChild())+1,
+                     MaxDepth(nodes_[nid].RightChild())+1);
  }
  /*!
   * \brief get maximum depth
@ -395,7 +395,7 @@ class TreeModel {
    return maxd;
  }
  /*! \brief number of extra nodes besides the root */
-  inline int num_extra_nodes() const {
+  inline int NumExtraNodes() const {
    return param.num_nodes - param.num_roots - param.num_deleted;
  }
 };
@ -421,7 +421,7 @@ struct PathElement {
  bst_float zero_fraction;
  bst_float one_fraction;
  bst_float pweight;
-  PathElement() {}
+  PathElement() = default;
  PathElement(int i, bst_float z, bst_float o, bst_float w) :
    feature_index(i), zero_fraction(z), one_fraction(o), pweight(w) {}
 };
@ -457,19 +457,19 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat> {
     * \brief returns the size of the feature vector
     * \return the size of the feature vector
     */
-    inline size_t size() const;
+    inline size_t Size() const;
    /*!
     * \brief get ith value
     * \param i feature index.
     * \return the i-th feature value
     */
-    inline bst_float fvalue(size_t i) const;
+    inline bst_float Fvalue(size_t i) const;
    /*!
     * \brief check whether i-th entry is missing
     * \param i feature index.
     * \return whether i-th value is missing.
     */
-    inline bool is_missing(size_t i) const;
+    inline bool IsMissing(size_t i) const;

   private:
    /*!
@ -480,7 +480,7 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat> {
      bst_float fvalue;
      int flag;
    };
-    std::vector<Entry> data;
+    std::vector<Entry> data_;
  };
  /*!
   * \brief get the leaf index
@ -562,63 +562,63 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat> {
 private:
  inline bst_float FillNodeMeanValue(int nid);

-  std::vector<bst_float> node_mean_values;
+  std::vector<bst_float> node_mean_values_;
 };

 // implementations of inline functions
 // do not need to read if only use the model
 inline void RegTree::FVec::Init(size_t size) {
  Entry e; e.flag = -1;
-  data.resize(size);
-  std::fill(data.begin(), data.end(), e);
+  data_.resize(size);
+  std::fill(data_.begin(), data_.end(), e);
 }

 inline void RegTree::FVec::Fill(const RowBatch::Inst& inst) {
  for (bst_uint i = 0; i < inst.length; ++i) {
-    if (inst[i].index >= data.size()) continue;
-    data[inst[i].index].fvalue = inst[i].fvalue;
+    if (inst[i].index >= data_.size()) continue;
+    data_[inst[i].index].fvalue = inst[i].fvalue;
  }
 }

 inline void RegTree::FVec::Drop(const RowBatch::Inst& inst) {
  for (bst_uint i = 0; i < inst.length; ++i) {
-    if (inst[i].index >= data.size()) continue;
-    data[inst[i].index].flag = -1;
+    if (inst[i].index >= data_.size()) continue;
+    data_[inst[i].index].flag = -1;
  }
 }

-inline size_t RegTree::FVec::size() const {
-  return data.size();
+inline size_t RegTree::FVec::Size() const {
+  return data_.size();
 }

-inline bst_float RegTree::FVec::fvalue(size_t i) const {
-  return data[i].fvalue;
+inline bst_float RegTree::FVec::Fvalue(size_t i) const {
+  return data_[i].fvalue;
 }

-inline bool RegTree::FVec::is_missing(size_t i) const {
-  return data[i].flag == -1;
+inline bool RegTree::FVec::IsMissing(size_t i) const {
+  return data_[i].flag == -1;
 }

 inline int RegTree::GetLeafIndex(const RegTree::FVec& feat, unsigned root_id) const {
-  int pid = static_cast<int>(root_id);
-  while (!(*this)[pid].is_leaf()) {
-    unsigned split_index = (*this)[pid].split_index();
-    pid = this->GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index));
+  auto pid = static_cast<int>(root_id);
+  while (!(*this)[pid].IsLeaf()) {
+    unsigned split_index = (*this)[pid].SplitIndex();
+    pid = this->GetNext(pid, feat.Fvalue(split_index), feat.IsMissing(split_index));
  }
  return pid;
 }

 inline bst_float RegTree::Predict(const RegTree::FVec& feat, unsigned root_id) const {
  int pid = this->GetLeafIndex(feat, root_id);
-  return (*this)[pid].leaf_value();
+  return (*this)[pid].LeafValue();
 }

 inline void RegTree::FillNodeMeanValues() {
  size_t num_nodes = this->param.num_nodes;
-  if (this->node_mean_values.size() == num_nodes) {
+  if (this->node_mean_values_.size() == num_nodes) {
    return;
  }
-  this->node_mean_values.resize(num_nodes);
+  this->node_mean_values_.resize(num_nodes);
  for (int root_id = 0; root_id < param.num_roots; ++root_id) {
    this->FillNodeMeanValue(root_id);
  }
@ -627,40 +627,39 @@ inline void RegTree::FillNodeMeanValues() {
 inline bst_float RegTree::FillNodeMeanValue(int nid) {
  bst_float result;
  auto& node = (*this)[nid];
-  if (node.is_leaf()) {
-    result = node.leaf_value();
+  if (node.IsLeaf()) {
+    result = node.LeafValue();
  } else {
-    result  = this->FillNodeMeanValue(node.cleft()) * this->stat(node.cleft()).sum_hess;
-    result += this->FillNodeMeanValue(node.cright()) * this->stat(node.cright()).sum_hess;
-    result /= this->stat(nid).sum_hess;
+    result  = this->FillNodeMeanValue(node.LeftChild()) * this->Stat(node.LeftChild()).sum_hess;
+    result += this->FillNodeMeanValue(node.RightChild()) * this->Stat(node.RightChild()).sum_hess;
+    result /= this->Stat(nid).sum_hess;
  }
-  this->node_mean_values[nid] = result;
+  this->node_mean_values_[nid] = result;
  return result;
 }

 inline void RegTree::CalculateContributionsApprox(const RegTree::FVec& feat, unsigned root_id,
                                                  bst_float *out_contribs) const {
-  CHECK_GT(this->node_mean_values.size(), 0U);
+  CHECK_GT(this->node_mean_values_.size(), 0U);
  // this follows the idea of http://blog.datadive.net/interpreting-random-forests/
-  bst_float node_value;
-  unsigned split_index;
-  int pid = static_cast<int>(root_id);
+  unsigned split_index = 0;
+  auto pid = static_cast<int>(root_id);
  // update bias value
-  node_value = this->node_mean_values[pid];
-  out_contribs[feat.size()] += node_value;
-  if ((*this)[pid].is_leaf()) {
+  bst_float node_value = this->node_mean_values_[pid];
+  out_contribs[feat.Size()] += node_value;
+  if ((*this)[pid].IsLeaf()) {
    // nothing to do anymore
    return;
  }
-  while (!(*this)[pid].is_leaf()) {
-    split_index = (*this)[pid].split_index();
-    pid = this->GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index));
-    bst_float new_value = this->node_mean_values[pid];
+  while (!(*this)[pid].IsLeaf()) {
+    split_index = (*this)[pid].SplitIndex();
+    pid = this->GetNext(pid, feat.Fvalue(split_index), feat.IsMissing(split_index));
+    bst_float new_value = this->node_mean_values_[pid];
    // update feature weight
    out_contribs[split_index] += new_value - node_value;
    node_value = new_value;
  }
-  bst_float leaf_value = (*this)[pid].leaf_value();
+  bst_float leaf_value = (*this)[pid].LeafValue();
  // update leaf feature weight
  out_contribs[split_index] += leaf_value - node_value;
 }
@ -749,33 +748,33 @@ inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
    ExtendPath(unique_path, unique_depth, parent_zero_fraction,
               parent_one_fraction, parent_feature_index);
  }
-  const unsigned split_index = node.split_index();
+  const unsigned split_index = node.SplitIndex();

  // leaf node
-  if (node.is_leaf()) {
+  if (node.IsLeaf()) {
    for (unsigned i = 1; i <= unique_depth; ++i) {
      const bst_float w = UnwoundPathSum(unique_path, unique_depth, i);
      const PathElement &el = unique_path[i];
      phi[el.feature_index] += w * (el.one_fraction - el.zero_fraction)
-                                 * node.leaf_value() * condition_fraction;
+                                 * node.LeafValue() * condition_fraction;
    }

  // internal node
  } else {
    // find which branch is "hot" (meaning x would follow it)
    unsigned hot_index = 0;
-    if (feat.is_missing(split_index)) {
-      hot_index = node.cdefault();
-    } else if (feat.fvalue(split_index) < node.split_cond()) {
-      hot_index = node.cleft();
+    if (feat.IsMissing(split_index)) {
+      hot_index = node.DefaultChild();
+    } else if (feat.Fvalue(split_index) < node.SplitCond()) {
+      hot_index = node.LeftChild();
    } else {
-      hot_index = node.cright();
+      hot_index = node.RightChild();
    }
-    const unsigned cold_index = (static_cast<int>(hot_index) == node.cleft() ?
-                                 node.cright() : node.cleft());
-    const bst_float w = this->stat(node_index).sum_hess;
-    const bst_float hot_zero_fraction = this->stat(hot_index).sum_hess / w;
-    const bst_float cold_zero_fraction = this->stat(cold_index).sum_hess / w;
+    const unsigned cold_index = (static_cast<int>(hot_index) == node.LeftChild() ?
+                                 node.RightChild() : node.LeftChild());
+    const bst_float w = this->Stat(node_index).sum_hess;
+    const bst_float hot_zero_fraction = this->Stat(hot_index).sum_hess / w;
+    const bst_float cold_zero_fraction = this->Stat(cold_index).sum_hess / w;
    bst_float incoming_zero_fraction = 1;
    bst_float incoming_one_fraction = 1;

@ -820,13 +819,13 @@ inline void RegTree::CalculateContributions(const RegTree::FVec& feat, unsigned
                                            unsigned condition_feature) const {
  // find the expected value of the tree's predictions
  if (condition == 0) {
-    bst_float node_value = this->node_mean_values[static_cast<int>(root_id)];
-    out_contribs[feat.size()] += node_value;
+    bst_float node_value = this->node_mean_values_[static_cast<int>(root_id)];
+    out_contribs[feat.Size()] += node_value;
  }

  // Preallocate space for the unique path data
  const int maxd = this->MaxDepth(root_id) + 2;
-  PathElement *unique_path_data = new PathElement[(maxd * (maxd + 1)) / 2];
+  auto *unique_path_data = new PathElement[(maxd * (maxd + 1)) / 2];

  TreeShap(feat, out_contribs, root_id, 0, unique_path_data,
           1, 1, -1, condition, condition_feature, 1);
@ -835,14 +834,14 @@ inline void RegTree::CalculateContributions(const RegTree::FVec& feat, unsigned

 /*! \brief get next position of the tree given current pid */
 inline int RegTree::GetNext(int pid, bst_float fvalue, bool is_unknown) const {
-  bst_float split_value = (*this)[pid].split_cond();
+  bst_float split_value = (*this)[pid].SplitCond();
  if (is_unknown) {
-    return (*this)[pid].cdefault();
+    return (*this)[pid].DefaultChild();
  } else {
    if (fvalue < split_value) {
-      return (*this)[pid].cleft();
+      return (*this)[pid].LeftChild();
    } else {
-      return (*this)[pid].cright();
+      return (*this)[pid].RightChild();
    }
  }
 }
--- a/include/xgboost/tree_updater.h
+++ b/include/xgboost/tree_updater.h
@ -25,7 +25,7 @@ namespace xgboost {
 class TreeUpdater {
 public:
  /*! \brief virtual destructor */
-  virtual ~TreeUpdater() {}
+  virtual ~TreeUpdater() = default;
  /*!
   * \brief Initialize the updater with given arguments.
   * \param args arguments to the objective function.
@ -40,7 +40,7 @@ class TreeUpdater {
   *         but maybe different random seeds, usually one tree is passed in at a time,
   *         there can be multiple trees when we train random forest style model
   */
-  virtual void Update(HostDeviceVector<bst_gpair>* gpair,
+  virtual void Update(HostDeviceVector<GradientPair>* gpair,
                      DMatrix* data,
                      const std::vector<RegTree*>& trees) = 0;

--- a/plugin/example/custom_obj.cc
+++ b/plugin/example/custom_obj.cc
@ -36,21 +36,21 @@ class MyLogistic : public ObjFunction {
  void GetGradient(HostDeviceVector<bst_float> *preds,
                   const MetaInfo &info,
                   int iter,
-                   HostDeviceVector<bst_gpair> *out_gpair) override {
-    out_gpair->resize(preds->size());
-    std::vector<bst_float>& preds_h = preds->data_h();
-    std::vector<bst_gpair>& out_gpair_h = out_gpair->data_h();
+                   HostDeviceVector<GradientPair> *out_gpair) override {
+    out_gpair->Resize(preds->Size());
+    std::vector<bst_float>& preds_h = preds->HostVector();
+    std::vector<GradientPair>& out_gpair_h = out_gpair->HostVector();
    for (size_t i = 0; i < preds_h.size(); ++i) {
      bst_float w = info.GetWeight(i);
      // scale the negative examples!
-      if (info.labels[i] == 0.0f) w *= param_.scale_neg_weight;
+      if (info.labels_[i] == 0.0f) w *= param_.scale_neg_weight;
      // logistic transformation
      bst_float p = 1.0f / (1.0f + std::exp(-preds_h[i]));
      // this is the gradient
-      bst_float grad = (p - info.labels[i]) * w;
+      bst_float grad = (p - info.labels_[i]) * w;
      // this is the second order gradient
      bst_float hess = p * (1.0f - p) * w;
-      out_gpair_h.at(i) = bst_gpair(grad, hess);
+      out_gpair_h.at(i) = GradientPair(grad, hess);
    }
  }
  const char* DefaultEvalMetric() const override {
@ -58,7 +58,7 @@ class MyLogistic : public ObjFunction {
  }
  void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
    // transform margin value to probability.
-    std::vector<bst_float> &preds = io_preds->data_h();
+    std::vector<bst_float> &preds = io_preds->HostVector();
    for (size_t i = 0; i < preds.size(); ++i) {
      preds[i] = 1.0f / (1.0f + std::exp(-preds[i]));
    }
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@ -27,7 +27,7 @@ class Booster {
        initialized_(false),
        learner_(Learner::Create(cache_mats)) {}

-  inline Learner* learner() {
+  inline Learner* learner() {  // NOLINT
    return learner_.get();
  }

@ -40,7 +40,7 @@ class Booster {
        return x.first == name;
      });
    if (it == cfg_.end()) {
-      cfg_.push_back(std::make_pair(name, val));
+      cfg_.emplace_back(name, val);
    } else {
      (*it).second = val;
    }
@ -193,11 +193,11 @@ struct XGBAPIThreadLocalEntry {
  /*! \brief returning float vector. */
  HostDeviceVector<bst_float> ret_vec_float;
  /*! \brief temp variable of gradient pairs. */
-  HostDeviceVector<bst_gpair> tmp_gpair;
+  HostDeviceVector<GradientPair> tmp_gpair;
 };

 // define the threadlocal store.
-typedef dmlc::ThreadLocalStore<XGBAPIThreadLocalEntry> XGBAPIThreadLocalStore;
+using XGBAPIThreadLocalStore = dmlc::ThreadLocalStore<XGBAPIThreadLocalEntry>;

 int XGDMatrixCreateFromFile(const char *fname,
                            int silent,
@ -254,14 +254,14 @@ XGB_DLL int XGDMatrixCreateFromCSREx(const size_t* indptr,
    mat.row_ptr_.push_back(mat.row_data_.size());
  }

-  mat.info.num_col = num_column;
+  mat.info.num_col_ = num_column;
  if (num_col > 0) {
-    CHECK_LE(mat.info.num_col, num_col)
-        << "num_col=" << num_col << " vs " << mat.info.num_col;
-    mat.info.num_col = num_col;
+    CHECK_LE(mat.info.num_col_, num_col)
+        << "num_col=" << num_col << " vs " << mat.info.num_col_;
+    mat.info.num_col_ = num_col;
  }
-  mat.info.num_row = nindptr - 1;
-  mat.info.num_nonzero = mat.row_data_.size();
+  mat.info.num_row_ = nindptr - 1;
+  mat.info.num_nonzero_ = mat.row_data_.size();
  *out = new std::shared_ptr<DMatrix>(DMatrix::Create(std::move(source)));
  API_END();
 }
@ -317,13 +317,13 @@ XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t* col_ptr,
      }
    }
  }
-  mat.info.num_row = mat.row_ptr_.size() - 1;
+  mat.info.num_row_ = mat.row_ptr_.size() - 1;
  if (num_row > 0) {
-    CHECK_LE(mat.info.num_row, num_row);
-    mat.info.num_row = num_row;
+    CHECK_LE(mat.info.num_row_, num_row);
+    mat.info.num_row_ = num_row;
  }
-  mat.info.num_col = ncol;
-  mat.info.num_nonzero = nelem;
+  mat.info.num_col_ = ncol;
+  mat.info.num_nonzero_ = nelem;
  *out  = new std::shared_ptr<DMatrix>(DMatrix::Create(std::move(source)));
  API_END();
 }
@ -353,8 +353,8 @@ XGB_DLL int XGDMatrixCreateFromMat(const bst_float* data,
  data::SimpleCSRSource& mat = *source;
  mat.row_ptr_.resize(1+nrow);
  bool nan_missing = common::CheckNAN(missing);
-  mat.info.num_row = nrow;
-  mat.info.num_col = ncol;
+  mat.info.num_row_ = nrow;
+  mat.info.num_col_ = ncol;
  const bst_float* data0 = data;

  // count elements for sizing data
@ -389,12 +389,12 @@ XGB_DLL int XGDMatrixCreateFromMat(const bst_float* data,
    }
  }

-  mat.info.num_nonzero = mat.row_data_.size();
+  mat.info.num_nonzero_ = mat.row_data_.size();
  *out  = new std::shared_ptr<DMatrix>(DMatrix::Create(std::move(source)));
  API_END();
 }

-void prefixsum_inplace(size_t *x, size_t N) {
+void PrefixSum(size_t *x, size_t N) {
  size_t *suma;
 #pragma omp parallel
  {
@ -425,12 +425,10 @@ void prefixsum_inplace(size_t *x, size_t N) {
  delete[] suma;
 }

-
-XGB_DLL int XGDMatrixCreateFromMat_omp(const bst_float* data,
+XGB_DLL int XGDMatrixCreateFromMat_omp(const bst_float* data,  // NOLINT
                                       xgboost::bst_ulong nrow,
                                       xgboost::bst_ulong ncol,
-                                       bst_float missing,
-                                       DMatrixHandle* out,
+                                       bst_float missing, DMatrixHandle* out,
                                       int nthread) {
  // avoid openmp unless enough data to be worth it to avoid overhead costs
  if (nrow*ncol <= 10000*50) {
@ -446,8 +444,8 @@ XGB_DLL int XGDMatrixCreateFromMat_omp(const bst_float* data,
  std::unique_ptr<data::SimpleCSRSource> source(new data::SimpleCSRSource());
  data::SimpleCSRSource& mat = *source;
  mat.row_ptr_.resize(1+nrow);
-  mat.info.num_row = nrow;
-  mat.info.num_col = ncol;
+  mat.info.num_row_ = nrow;
+  mat.info.num_col_ = ncol;

  // Check for errors in missing elements
  // Count elements per row (to avoid otherwise need to copy)
@ -480,7 +478,7 @@ XGB_DLL int XGDMatrixCreateFromMat_omp(const bst_float* data,
  }

  // do cumulative sum (to avoid otherwise need to copy)
-  prefixsum_inplace(&mat.row_ptr_[0], mat.row_ptr_.size());
+  PrefixSum(&mat.row_ptr_[0], mat.row_ptr_.size());
  mat.row_data_.resize(mat.row_data_.size() + mat.row_ptr_.back());

  // Fill data matrix (now that know size, no need for slow push_back())
@ -500,7 +498,7 @@ XGB_DLL int XGDMatrixCreateFromMat_omp(const bst_float* data,
    }
  }

-  mat.info.num_nonzero = mat.row_data_.size();
+  mat.info.num_nonzero_ = mat.row_data_.size();
  *out  = new std::shared_ptr<DMatrix>(DMatrix::Create(std::move(source)));
  API_END();
 }
@ -516,12 +514,12 @@ XGB_DLL int XGDMatrixSliceDMatrix(DMatrixHandle handle,
  src.CopyFrom(static_cast<std::shared_ptr<DMatrix>*>(handle)->get());
  data::SimpleCSRSource& ret = *source;

-  CHECK_EQ(src.info.group_ptr.size(), 0U)
+  CHECK_EQ(src.info.group_ptr_.size(), 0U)
      << "slice does not support group structure";

  ret.Clear();
-  ret.info.num_row = len;
-  ret.info.num_col = src.info.num_col;
+  ret.info.num_row_ = len;
+  ret.info.num_col_ = src.info.num_col_;

  dmlc::DataIter<RowBatch>* iter = &src;
  iter->BeforeFirst();
@ -532,23 +530,22 @@ XGB_DLL int XGDMatrixSliceDMatrix(DMatrixHandle handle,
    const int ridx = idxset[i];
    RowBatch::Inst inst = batch[ridx];
    CHECK_LT(static_cast<xgboost::bst_ulong>(ridx), batch.size);
-    ret.row_data_.resize(ret.row_data_.size() + inst.length);
-    std::memcpy(dmlc::BeginPtr(ret.row_data_) + ret.row_ptr_.back(), inst.data,
-                sizeof(RowBatch::Entry) * inst.length);
+    ret.row_data_.insert(ret.row_data_.end(), inst.data,
+                         inst.data + inst.length);
    ret.row_ptr_.push_back(ret.row_ptr_.back() + inst.length);
-    ret.info.num_nonzero += inst.length;
+    ret.info.num_nonzero_ += inst.length;

-    if (src.info.labels.size() != 0) {
-      ret.info.labels.push_back(src.info.labels[ridx]);
+    if (src.info.labels_.size() != 0) {
+      ret.info.labels_.push_back(src.info.labels_[ridx]);
    }
-    if (src.info.weights.size() != 0) {
-      ret.info.weights.push_back(src.info.weights[ridx]);
+    if (src.info.weights_.size() != 0) {
+      ret.info.weights_.push_back(src.info.weights_[ridx]);
    }
-    if (src.info.base_margin.size() != 0) {
-      ret.info.base_margin.push_back(src.info.base_margin[ridx]);
+    if (src.info.base_margin_.size() != 0) {
+      ret.info.base_margin_.push_back(src.info.base_margin_[ridx]);
    }
-    if (src.info.root_index.size() != 0) {
-      ret.info.root_index.push_back(src.info.root_index[ridx]);
+    if (src.info.root_index_.size() != 0) {
+      ret.info.root_index_.push_back(src.info.root_index_[ridx]);
    }
  }
  *out = new std::shared_ptr<DMatrix>(DMatrix::Create(std::move(source)));
@ -575,7 +572,7 @@ XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle,
                          xgboost::bst_ulong len) {
  API_BEGIN();
  static_cast<std::shared_ptr<DMatrix>*>(handle)
-      ->get()->info().SetInfo(field, info, kFloat32, len);
+      ->get()->Info().SetInfo(field, info, kFloat32, len);
  API_END();
 }

@ -585,7 +582,7 @@ XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle,
                         xgboost::bst_ulong len) {
  API_BEGIN();
  static_cast<std::shared_ptr<DMatrix>*>(handle)
-      ->get()->info().SetInfo(field, info, kUInt32, len);
+      ->get()->Info().SetInfo(field, info, kUInt32, len);
  API_END();
 }

@ -593,12 +590,12 @@ XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle,
                              const unsigned* group,
                              xgboost::bst_ulong len) {
  API_BEGIN();
-  std::shared_ptr<DMatrix> *pmat = static_cast<std::shared_ptr<DMatrix>*>(handle);
-  MetaInfo& info = pmat->get()->info();
-  info.group_ptr.resize(len + 1);
-  info.group_ptr[0] = 0;
+  auto *pmat = static_cast<std::shared_ptr<DMatrix>*>(handle);
+  MetaInfo& info = pmat->get()->Info();
+  info.group_ptr_.resize(len + 1);
+  info.group_ptr_[0] = 0;
  for (uint64_t i = 0; i < len; ++i) {
-    info.group_ptr[i + 1] = info.group_ptr[i] + group[i];
+    info.group_ptr_[i + 1] = info.group_ptr_[i] + group[i];
  }
  API_END();
 }
@ -608,18 +605,18 @@ XGB_DLL int XGDMatrixGetFloatInfo(const DMatrixHandle handle,
                                  xgboost::bst_ulong* out_len,
                                  const bst_float** out_dptr) {
  API_BEGIN();
-  const MetaInfo& info = static_cast<std::shared_ptr<DMatrix>*>(handle)->get()->info();
+  const MetaInfo& info = static_cast<std::shared_ptr<DMatrix>*>(handle)->get()->Info();
  const std::vector<bst_float>* vec = nullptr;
  if (!std::strcmp(field, "label")) {
-    vec = &info.labels;
+    vec = &info.labels_;
  } else if (!std::strcmp(field, "weight")) {
-    vec = &info.weights;
+    vec = &info.weights_;
  } else if (!std::strcmp(field, "base_margin")) {
-    vec = &info.base_margin;
+    vec = &info.base_margin_;
  } else {
    LOG(FATAL) << "Unknown float field name " << field;
  }
-  *out_len = static_cast<xgboost::bst_ulong>(vec->size());
+  *out_len = static_cast<xgboost::bst_ulong>(vec->size());  // NOLINT
  *out_dptr = dmlc::BeginPtr(*vec);
  API_END();
 }
@ -629,15 +626,15 @@ XGB_DLL int XGDMatrixGetUIntInfo(const DMatrixHandle handle,
                                 xgboost::bst_ulong *out_len,
                                 const unsigned **out_dptr) {
  API_BEGIN();
-  const MetaInfo& info = static_cast<std::shared_ptr<DMatrix>*>(handle)->get()->info();
+  const MetaInfo& info = static_cast<std::shared_ptr<DMatrix>*>(handle)->get()->Info();
  const std::vector<unsigned>* vec = nullptr;
  if (!std::strcmp(field, "root_index")) {
-    vec = &info.root_index;
+    vec = &info.root_index_;
+    *out_len = static_cast<xgboost::bst_ulong>(vec->size());
+    *out_dptr = dmlc::BeginPtr(*vec);
  } else {
    LOG(FATAL) << "Unknown uint field name " << field;
  }
-  *out_len = static_cast<xgboost::bst_ulong>(vec->size());
-  *out_dptr = dmlc::BeginPtr(*vec);
  API_END();
 }

@ -645,7 +642,7 @@ XGB_DLL int XGDMatrixNumRow(const DMatrixHandle handle,
                            xgboost::bst_ulong *out) {
  API_BEGIN();
  *out = static_cast<xgboost::bst_ulong>(
-      static_cast<std::shared_ptr<DMatrix>*>(handle)->get()->info().num_row);
+      static_cast<std::shared_ptr<DMatrix>*>(handle)->get()->Info().num_row_);
  API_END();
 }

@ -653,7 +650,7 @@ XGB_DLL int XGDMatrixNumCol(const DMatrixHandle handle,
                            xgboost::bst_ulong *out) {
  API_BEGIN();
  *out = static_cast<size_t>(
-      static_cast<std::shared_ptr<DMatrix>*>(handle)->get()->info().num_col);
+      static_cast<std::shared_ptr<DMatrix>*>(handle)->get()->Info().num_col_);
  API_END();
 }

@ -688,8 +685,8 @@ XGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle,
                                   int iter,
                                   DMatrixHandle dtrain) {
  API_BEGIN();
-  Booster* bst = static_cast<Booster*>(handle);
-  std::shared_ptr<DMatrix> *dtr =
+  auto* bst = static_cast<Booster*>(handle);
+  auto *dtr =
      static_cast<std::shared_ptr<DMatrix>*>(dtrain);

  bst->LazyInit();
@ -702,15 +699,15 @@ XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle,
                                  bst_float *grad,
                                  bst_float *hess,
                                  xgboost::bst_ulong len) {
-  HostDeviceVector<bst_gpair>& tmp_gpair = XGBAPIThreadLocalStore::Get()->tmp_gpair;
+  HostDeviceVector<GradientPair>& tmp_gpair = XGBAPIThreadLocalStore::Get()->tmp_gpair;
  API_BEGIN();
-  Booster* bst = static_cast<Booster*>(handle);
-  std::shared_ptr<DMatrix>* dtr =
+  auto* bst = static_cast<Booster*>(handle);
+  auto* dtr =
      static_cast<std::shared_ptr<DMatrix>*>(dtrain);
-  tmp_gpair.resize(len);
-  std::vector<bst_gpair>& tmp_gpair_h = tmp_gpair.data_h();
+  tmp_gpair.Resize(len);
+  std::vector<GradientPair>& tmp_gpair_h = tmp_gpair.HostVector();
  for (xgboost::bst_ulong i = 0; i < len; ++i) {
-    tmp_gpair_h[i] = bst_gpair(grad[i], hess[i]);
+    tmp_gpair_h[i] = GradientPair(grad[i], hess[i]);
  }

  bst->LazyInit();
@ -726,13 +723,13 @@ XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle,
                                 const char** out_str) {
  std::string& eval_str = XGBAPIThreadLocalStore::Get()->ret_str;
  API_BEGIN();
-  Booster* bst = static_cast<Booster*>(handle);
+  auto* bst = static_cast<Booster*>(handle);
  std::vector<DMatrix*> data_sets;
  std::vector<std::string> data_names;

  for (xgboost::bst_ulong i = 0; i < len; ++i) {
    data_sets.push_back(static_cast<std::shared_ptr<DMatrix>*>(dmats[i])->get());
-    data_names.push_back(std::string(evnames[i]));
+    data_names.emplace_back(evnames[i]);
  }

  bst->LazyInit();
@ -750,7 +747,7 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
  HostDeviceVector<bst_float>& preds =
    XGBAPIThreadLocalStore::Get()->ret_vec_float;
  API_BEGIN();
-  Booster *bst = static_cast<Booster*>(handle);
+  auto *bst = static_cast<Booster*>(handle);
  bst->LazyInit();
  bst->learner()->Predict(
      static_cast<std::shared_ptr<DMatrix>*>(dmat)->get(),
@ -760,8 +757,8 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
      (option_mask & 4) != 0,
      (option_mask & 8) != 0,
      (option_mask & 16) != 0);
-  *out_result = dmlc::BeginPtr(preds.data_h());
-  *len = static_cast<xgboost::bst_ulong>(preds.size());
+  *out_result = dmlc::BeginPtr(preds.HostVector());
+  *len = static_cast<xgboost::bst_ulong>(preds.Size());
  API_END();
 }

@ -775,7 +772,7 @@ XGB_DLL int XGBoosterLoadModel(BoosterHandle handle, const char* fname) {
 XGB_DLL int XGBoosterSaveModel(BoosterHandle handle, const char* fname) {
  API_BEGIN();
  std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname, "w"));
-  Booster *bst = static_cast<Booster*>(handle);
+  auto *bst = static_cast<Booster*>(handle);
  bst->LazyInit();
  bst->learner()->Save(fo.get());
  API_END();
@ -798,7 +795,7 @@ XGB_DLL int XGBoosterGetModelRaw(BoosterHandle handle,

  API_BEGIN();
  common::MemoryBufferStream fo(&raw_str);
-  Booster *bst = static_cast<Booster*>(handle);
+  auto *bst = static_cast<Booster*>(handle);
  bst->LazyInit();
  bst->learner()->Save(&fo);
  *out_dptr = dmlc::BeginPtr(raw_str);
@ -815,7 +812,7 @@ inline void XGBoostDumpModelImpl(
    const char*** out_models) {
  std::vector<std::string>& str_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_str;
  std::vector<const char*>& charp_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_charp;
-  Booster *bst = static_cast<Booster*>(handle);
+  auto *bst = static_cast<Booster*>(handle);
  bst->LazyInit();
  str_vecs = bst->learner()->DumpModel(fmap, with_stats != 0, format);
  charp_vecs.resize(str_vecs.size());
@ -881,7 +878,7 @@ XGB_DLL int XGBoosterGetAttr(BoosterHandle handle,
                     const char* key,
                     const char** out,
                     int* success) {
-  Booster* bst = static_cast<Booster*>(handle);
+  auto* bst = static_cast<Booster*>(handle);
  std::string& ret_str = XGBAPIThreadLocalStore::Get()->ret_str;
  API_BEGIN();
  if (bst->learner()->GetAttr(key, &ret_str)) {
@ -897,7 +894,7 @@ XGB_DLL int XGBoosterGetAttr(BoosterHandle handle,
 XGB_DLL int XGBoosterSetAttr(BoosterHandle handle,
                     const char* key,
                     const char* value) {
-  Booster* bst = static_cast<Booster*>(handle);
+  auto* bst = static_cast<Booster*>(handle);
  API_BEGIN();
  if (value == nullptr) {
    bst->learner()->DelAttr(key);
@ -912,7 +909,7 @@ XGB_DLL int XGBoosterGetAttrNames(BoosterHandle handle,
                     const char*** out) {
  std::vector<std::string>& str_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_str;
  std::vector<const char*>& charp_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_charp;
-  Booster *bst = static_cast<Booster*>(handle);
+  auto *bst = static_cast<Booster*>(handle);
  API_BEGIN();
  str_vecs = bst->learner()->GetAttrNames();
  charp_vecs.resize(str_vecs.size());
@ -927,7 +924,7 @@ XGB_DLL int XGBoosterGetAttrNames(BoosterHandle handle,
 XGB_DLL int XGBoosterLoadRabitCheckpoint(BoosterHandle handle,
                                 int* version) {
  API_BEGIN();
-  Booster* bst = static_cast<Booster*>(handle);
+  auto* bst = static_cast<Booster*>(handle);
  *version = rabit::LoadCheckPoint(bst->learner());
  if (*version != 0) {
    bst->initialized_ = true;
@ -937,7 +934,7 @@ XGB_DLL int XGBoosterLoadRabitCheckpoint(BoosterHandle handle,

 XGB_DLL int XGBoosterSaveRabitCheckpoint(BoosterHandle handle) {
  API_BEGIN();
-  Booster* bst = static_cast<Booster*>(handle);
+  auto* bst = static_cast<Booster*>(handle);
  if (bst->learner()->AllowLazyCheckPoint()) {
    rabit::LazyCheckPoint(bst->learner());
  } else {
--- a/src/c_api/c_api_error.cc
+++ b/src/c_api/c_api_error.cc
@ -10,7 +10,7 @@ struct XGBAPIErrorEntry {
  std::string last_error;
 };

-typedef dmlc::ThreadLocalStore<XGBAPIErrorEntry> XGBAPIErrorStore;
+using XGBAPIErrorStore = dmlc::ThreadLocalStore<XGBAPIErrorEntry>;

 const char *XGBGetLastError() {
  return XGBAPIErrorStore::Get()->last_error.c_str();
--- a/src/cli_main.cc
+++ b/src/cli_main.cc
@ -134,7 +134,7 @@ struct CLIParam : public dmlc::Parameter<CLIParam> {
        char evname[256];
        CHECK_EQ(sscanf(kv.first.c_str(), "eval[%[^]]", evname), 1)
            << "must specify evaluation name for display";
-        eval_data_names.push_back(std::string(evname));
+        eval_data_names.emplace_back(evname);
        eval_data_paths.push_back(kv.second);
      }
    }
@ -177,7 +177,7 @@ void CLITrain(const CLIParam& param) {
  std::vector<std::string> eval_data_names = param.eval_data_names;
  if (param.eval_train) {
    eval_datasets.push_back(dtrain.get());
-    eval_data_names.push_back(std::string("train"));
+    eval_data_names.emplace_back("train");
  }
  // initialize the learner.
  std::unique_ptr<Learner> learner(Learner::Create(cache_mats));
@ -332,7 +332,7 @@ void CLIPredict(const CLIParam& param) {
  std::unique_ptr<dmlc::Stream> fo(
      dmlc::Stream::Create(param.name_pred.c_str(), "w"));
  dmlc::ostream os(fo.get());
-  for (bst_float p : preds.data_h()) {
+  for (bst_float p : preds.HostVector()) {
    os << p << '\n';
  }
  // force flush before fo destruct.
@ -347,17 +347,17 @@ int CLIRunTask(int argc, char *argv[]) {
  rabit::Init(argc, argv);

  std::vector<std::pair<std::string, std::string> > cfg;
-  cfg.push_back(std::make_pair("seed", "0"));
+  cfg.emplace_back("seed", "0");

  common::ConfigIterator itr(argv[1]);
  while (itr.Next()) {
-    cfg.push_back(std::make_pair(std::string(itr.name()), std::string(itr.val())));
+    cfg.emplace_back(std::string(itr.Name()), std::string(itr.Val()));
  }

  for (int i = 2; i < argc; ++i) {
    char name[256], val[256];
    if (sscanf(argv[i], "%[^=]=%s", name, val) == 2) {
-      cfg.push_back(std::make_pair(std::string(name), std::string(val)));
+      cfg.emplace_back(std::string(name), std::string(val));
    }
  }
  CLIParam param;
--- a/src/common/avx_helpers.h
+++ b/src/common/avx_helpers.h
@ -68,10 +68,10 @@ inline Float8 round(const Float8& x) {

 // Overload std::max/min
 namespace std {
-inline avx::Float8 max(const avx::Float8& a, const avx::Float8& b) {
+inline avx::Float8 max(const avx::Float8& a, const avx::Float8& b) {  // NOLINT
  return avx::Float8(_mm256_max_ps(a.x, b.x));
 }
-inline avx::Float8 min(const avx::Float8& a, const avx::Float8& b) {
+inline avx::Float8 min(const avx::Float8& a, const avx::Float8& b) {  // NOLINT
  return avx::Float8(_mm256_min_ps(a.x, b.x));
 }
 }  // namespace std
@ -172,7 +172,7 @@ inline Float8 Sigmoid(Float8 x) {
 }

 // Store 8 gradient pairs given vectors containing gradient and Hessian
-inline void StoreGpair(xgboost::bst_gpair* dst, const Float8& grad,
+inline void StoreGpair(xgboost::GradientPair* dst, const Float8& grad,
                       const Float8& hess) {
  float* ptr = reinterpret_cast<float*>(dst);
  __m256 gpair_low = _mm256_unpacklo_ps(grad.x, hess.x);
@ -190,11 +190,11 @@ namespace avx {
 * \brief Fallback implementation not using AVX.
 */

-struct Float8 {
+struct Float8 {  // NOLINT
  float x[8];
  explicit Float8(const float& val) {
-    for (int i = 0; i < 8; i++) {
-      x[i] = val;
+    for (float & i : x) {
+      i = val;
    }
  }
  explicit Float8(const float* vec) {
@ -202,7 +202,7 @@ struct Float8 {
      x[i] = vec[i];
    }
  }
-  Float8() {}
+  Float8() = default;
  Float8& operator+=(const Float8& rhs) {
    for (int i = 0; i < 8; i++) {
      x[i] += rhs.x[i];
@ -228,7 +228,7 @@ struct Float8 {
    return *this;
  }
  void Print() {
-    float* f = reinterpret_cast<float*>(&x);
+    auto* f = reinterpret_cast<float*>(&x);
    printf("%f %f %f %f %f %f %f %f\n", f[0], f[1], f[2], f[3], f[4], f[5],
           f[6], f[7]);
  }
@ -252,10 +252,10 @@ inline Float8 operator/(Float8 lhs, const Float8& rhs) {
 }

 // Store 8 gradient pairs given vectors containing gradient and Hessian
-inline void StoreGpair(xgboost::bst_gpair* dst, const Float8& grad,
+inline void StoreGpair(xgboost::GradientPair* dst, const Float8& grad,
                       const Float8& hess) {
  for (int i = 0; i < 8; i++) {
-    dst[i] = xgboost::bst_gpair(grad.x[i], hess.x[i]);
+    dst[i] = xgboost::GradientPair(grad.x[i], hess.x[i]);
  }
 }

@ -269,14 +269,14 @@ inline Float8 Sigmoid(Float8 x) {
 }  // namespace avx

 namespace std {
-inline avx::Float8 max(const avx::Float8& a, const avx::Float8& b) {
+inline avx::Float8 max(const avx::Float8& a, const avx::Float8& b) {  // NOLINT
  avx::Float8 max;
  for (int i = 0; i < 8; i++) {
    max.x[i] = std::max(a.x[i], b.x[i]);
  }
  return max;
 }
-inline avx::Float8 min(const avx::Float8& a, const avx::Float8& b) {
+inline avx::Float8 min(const avx::Float8& a, const avx::Float8& b) {  // NOLINT
  avx::Float8 min;
  for (int i = 0; i < 8; i++) {
    min.x[i] = std::min(a.x[i], b.x[i]);
--- a/src/common/bitmap.h
+++ b/src/common/bitmap.h
@ -42,7 +42,7 @@ struct BitMap {
  inline void InitFromBool(const std::vector<int>& vec) {
    this->Resize(vec.size());
    // parallel over the full cases
-    bst_omp_uint nsize = static_cast<bst_omp_uint>(vec.size() / 32);
+    auto nsize = static_cast<bst_omp_uint>(vec.size() / 32);
    #pragma omp parallel for schedule(static)
    for (bst_omp_uint i = 0; i < nsize; ++i) {
      uint32_t res = 0;
--- a/src/common/column_matrix.h
+++ b/src/common/column_matrix.h
@ -9,20 +9,26 @@
 #define XGBOOST_COMMON_COLUMN_MATRIX_H_

 #define XGBOOST_TYPE_SWITCH(dtype, OP)                      \
-switch (dtype) {                \
-  case xgboost::common::uint32 : {           \
-    typedef uint32_t DType;         \
-    OP; break;              \
+  \
+switch(dtype) {                                             \
+    case xgboost::common::uint32: {                         \
+      using DType = uint32_t;                               \
+      OP;                                                   \
+      break;                                                \
    }                                                       \
-  case xgboost::common::uint16 : {           \
-    typedef uint16_t DType;         \
-    OP; break;              \
+    case xgboost::common::uint16: {                         \
+      using DType = uint16_t;                               \
+      OP;                                                   \
+      break;                                                \
    }                                                       \
-  case xgboost::common::uint8 : {            \
-    typedef uint8_t DType;          \
-    OP; break;              \
-    default: LOG(FATAL) << "don't recognize type flag" << dtype;  \
+    case xgboost::common::uint8: {                          \
+      using DType = uint8_t;                                \
+      OP;                                                   \
+      break;                                                \
+      default:                                              \
+        LOG(FATAL) << "don't recognize type flag" << dtype; \
    }                                                       \
+  \
 }

 #include <type_traits>
@ -31,11 +37,12 @@ switch (dtype) {                \
 #include "hist_util.h"
 #include "../tree/fast_hist_param.h"

-using xgboost::tree::FastHistParam;

 namespace xgboost {
 namespace common {

+using tree::FastHistParam;
+
 /*! \brief indicator of data type used for storing bin id's in a column. */
 enum DataType {
  uint8 = 1,
@ -78,7 +85,7 @@ class ColumnMatrix {
       slot of internal buffer. */
    packing_factor_ = sizeof(uint32_t) / static_cast<size_t>(this->dtype);

-    const bst_uint nfeature = static_cast<bst_uint>(gmat.cut->row_ptr.size() - 1);
+    const auto nfeature = static_cast<bst_uint>(gmat.cut->row_ptr.size() - 1);
    const size_t nrow = gmat.row_ptr.size() - 1;

    // identify type of each column
--- a/src/common/common.cc
+++ b/src/common/common.cc
@ -14,7 +14,7 @@ struct RandomThreadLocalEntry {
  GlobalRandomEngine engine;
 };

-typedef dmlc::ThreadLocalStore<RandomThreadLocalEntry> RandomThreadLocalStore;
+using RandomThreadLocalStore = dmlc::ThreadLocalStore<RandomThreadLocalEntry>;

 GlobalRandomEngine& GlobalRandom() {
  return RandomThreadLocalStore::Get()->engine;
--- a/src/common/compressed_iterator.h
+++ b/src/common/compressed_iterator.h
@ -11,20 +11,20 @@
 namespace xgboost {
 namespace common {

-typedef unsigned char compressed_byte_t;
+using CompressedByteT = unsigned char;

 namespace detail {
-inline void SetBit(compressed_byte_t *byte, int bit_idx) {
+inline void SetBit(CompressedByteT *byte, int bit_idx) {
  *byte |= 1 << bit_idx;
 }
 template <typename T>
 inline T CheckBit(const T &byte, int bit_idx) {
  return byte & (1 << bit_idx);
 }
-inline void ClearBit(compressed_byte_t *byte, int bit_idx) {
+inline void ClearBit(CompressedByteT *byte, int bit_idx) {
  *byte &= ~(1 << bit_idx);
 }
-static const int padding = 4;  // Assign padding so we can read slightly off
+static const int kPadding = 4;  // Assign padding so we can read slightly off
                               // the beginning of the array

 // The number of bits required to represent a given unsigned range
@ -76,16 +76,16 @@ class CompressedBufferWriter {
    size_t compressed_size = static_cast<size_t>(std::ceil(
        static_cast<double>(detail::SymbolBits(num_symbols) * num_elements) /
        bits_per_byte));
-    return compressed_size + detail::padding;
+    return compressed_size + detail::kPadding;
  }

  template <typename T>
-  void WriteSymbol(compressed_byte_t *buffer, T symbol, size_t offset) {
+  void WriteSymbol(CompressedByteT *buffer, T symbol, size_t offset) {
    const int bits_per_byte = 8;

    for (size_t i = 0; i < symbol_bits_; i++) {
      size_t byte_idx = ((offset + 1) * symbol_bits_ - (i + 1)) / bits_per_byte;
-      byte_idx += detail::padding;
+      byte_idx += detail::kPadding;
      size_t bit_idx =
          ((bits_per_byte + i) - ((offset + 1) * symbol_bits_)) % bits_per_byte;

@ -96,20 +96,20 @@ class CompressedBufferWriter {
      }
    }
  }
-  template <typename iter_t>
-  void Write(compressed_byte_t *buffer, iter_t input_begin, iter_t input_end) {
+  template <typename IterT>
+  void Write(CompressedByteT *buffer, IterT input_begin, IterT input_end) {
    uint64_t tmp = 0;
    size_t stored_bits = 0;
    const size_t max_stored_bits = 64 - symbol_bits_;
-    size_t buffer_position = detail::padding;
+    size_t buffer_position = detail::kPadding;
    const size_t num_symbols = input_end - input_begin;
    for (size_t i = 0; i < num_symbols; i++) {
-      typename std::iterator_traits<iter_t>::value_type symbol = input_begin[i];
+      typename std::iterator_traits<IterT>::value_type symbol = input_begin[i];
      if (stored_bits > max_stored_bits) {
        // Eject only full bytes
        size_t tmp_bytes = stored_bits / 8;
        for (size_t j = 0; j < tmp_bytes; j++) {
-          buffer[buffer_position] = static_cast<compressed_byte_t>(
+          buffer[buffer_position] = static_cast<CompressedByteT>(
              tmp >> (stored_bits - (j + 1) * 8));
          buffer_position++;
        }
@ -129,10 +129,10 @@ class CompressedBufferWriter {
      int shift_bits = static_cast<int>(stored_bits) - (j + 1) * 8;
      if (shift_bits >= 0) {
        buffer[buffer_position] =
-            static_cast<compressed_byte_t>(tmp >> shift_bits);
+            static_cast<CompressedByteT>(tmp >> shift_bits);
      } else {
        buffer[buffer_position] =
-            static_cast<compressed_byte_t>(tmp << std::abs(shift_bits));
+            static_cast<CompressedByteT>(tmp << std::abs(shift_bits));
      }
      buffer_position++;
    }
@ -153,23 +153,21 @@ template <typename T>

 class CompressedIterator {
 public:
-  typedef CompressedIterator<T> self_type;  ///< My own type
-  typedef ptrdiff_t
-      difference_type;   ///< Type to express the result of subtracting
-                         /// one iterator from another
-  typedef T value_type;  ///< The type of the element the iterator can point to
-  typedef value_type *pointer;   ///< The type of a pointer to an element the
-                                 /// iterator can point to
-  typedef value_type reference;  ///< The type of a reference to an element the
-                                 /// iterator can point to
+  // Type definitions for thrust
+  typedef CompressedIterator<T> self_type;  // NOLINT
+  typedef ptrdiff_t difference_type;        // NOLINT
+  typedef T value_type;                     // NOLINT
+  typedef value_type *pointer;              // NOLINT
+  typedef value_type reference;             // NOLINT
+
 private:
-  compressed_byte_t *buffer_;
+  CompressedByteT *buffer_;
  size_t symbol_bits_;
  size_t offset_;

 public:
  CompressedIterator() : buffer_(nullptr), symbol_bits_(0), offset_(0) {}
-  CompressedIterator(compressed_byte_t *buffer, int num_symbols)
+  CompressedIterator(CompressedByteT *buffer, int num_symbols)
      : buffer_(buffer), offset_(0) {
    symbol_bits_ = detail::SymbolBits(num_symbols);
  }
@ -178,7 +176,7 @@ class CompressedIterator {
    const int bits_per_byte = 8;
    size_t start_bit_idx = ((offset_ + 1) * symbol_bits_ - 1);
    size_t start_byte_idx = start_bit_idx / bits_per_byte;
-    start_byte_idx += detail::padding;
+    start_byte_idx += detail::kPadding;

    // Read 5 bytes - the maximum we will need
    uint64_t tmp = static_cast<uint64_t>(buffer_[start_byte_idx - 4]) << 32 |
--- a/src/common/config.h
+++ b/src/common/config.h
@ -24,33 +24,33 @@ class ConfigReaderBase {
   * \brief get current name, called after Next returns true
   * \return current parameter name
   */
-  inline const char *name(void) const {
-    return s_name.c_str();
+  inline const char *Name() const {
+    return s_name_.c_str();
  }
  /*!
   * \brief get current value, called after Next returns true
   * \return current parameter value
   */
-  inline const char *val(void) const {
-    return s_val.c_str();
+  inline const char *Val() const {
+    return s_val_.c_str();
  }
  /*!
   * \brief move iterator to next position
   * \return true if there is value in next position
   */
-  inline bool Next(void) {
+  inline bool Next() {
    while (!this->IsEnd()) {
-      GetNextToken(&s_name);
-      if (s_name == "=") return false;
-      if (GetNextToken(&s_buf) || s_buf != "=")  return false;
-      if (GetNextToken(&s_val) || s_val == "=")  return false;
+      GetNextToken(&s_name_);
+      if (s_name_ == "=") return false;
+      if (GetNextToken(&s_buf_) || s_buf_ != "=")  return false;
+      if (GetNextToken(&s_val_) || s_val_ == "=")  return false;
      return true;
    }
    return false;
  }
  // called before usage
-  inline void Init(void) {
-    ch_buf = this->GetChar();
+  inline void Init() {
+    ch_buf_ = this->GetChar();
  }

 protected:
@ -58,38 +58,38 @@ class ConfigReaderBase {
   * \brief to be implemented by subclass,
   * get next token, return EOF if end of file
   */
-  virtual char GetChar(void) = 0;
+  virtual char GetChar() = 0;
  /*! \brief to be implemented by child, check if end of stream */
-  virtual bool IsEnd(void) = 0;
+  virtual bool IsEnd() = 0;

 private:
-  char ch_buf;
-  std::string s_name, s_val, s_buf;
+  char ch_buf_;
+  std::string s_name_, s_val_, s_buf_;

-  inline void SkipLine(void) {
+  inline void SkipLine() {
    do {
-      ch_buf = this->GetChar();
-    } while (ch_buf != EOF && ch_buf != '\n' && ch_buf != '\r');
+      ch_buf_ = this->GetChar();
+    } while (ch_buf_ != EOF && ch_buf_ != '\n' && ch_buf_ != '\r');
  }

  inline void ParseStr(std::string *tok) {
-    while ((ch_buf = this->GetChar()) != EOF) {
-      switch (ch_buf) {
+    while ((ch_buf_ = this->GetChar()) != EOF) {
+      switch (ch_buf_) {
        case '\\': *tok += this->GetChar(); break;
        case '\"': return;
        case '\r':
        case '\n': LOG(FATAL)<< "ConfigReader: unterminated string";
-        default: *tok += ch_buf;
+        default: *tok += ch_buf_;
      }
    }
    LOG(FATAL) << "ConfigReader: unterminated string";
  }
  inline void ParseStrML(std::string *tok) {
-    while ((ch_buf = this->GetChar()) != EOF) {
-      switch (ch_buf) {
+    while ((ch_buf_ = this->GetChar()) != EOF) {
+      switch (ch_buf_) {
        case '\\': *tok += this->GetChar(); break;
        case '\'': return;
-        default: *tok += ch_buf;
+        default: *tok += ch_buf_;
      }
    }
    LOG(FATAL) << "unterminated string";
@ -98,24 +98,24 @@ class ConfigReaderBase {
  inline bool GetNextToken(std::string *tok) {
    tok->clear();
    bool new_line = false;
-    while (ch_buf != EOF) {
-      switch (ch_buf) {
+    while (ch_buf_ != EOF) {
+      switch (ch_buf_) {
        case '#' : SkipLine(); new_line = true; break;
        case '\"':
          if (tok->length() == 0) {
-            ParseStr(tok); ch_buf = this->GetChar(); return new_line;
+            ParseStr(tok); ch_buf_ = this->GetChar(); return new_line;
          } else {
            LOG(FATAL) << "ConfigReader: token followed directly by string";
          }
        case '\'':
          if (tok->length() == 0) {
-            ParseStrML(tok); ch_buf = this->GetChar(); return new_line;
+            ParseStrML(tok); ch_buf_ = this->GetChar(); return new_line;
          } else {
            LOG(FATAL) << "ConfigReader: token followed directly by string";
          }
        case '=':
          if (tok->length() == 0) {
-            ch_buf = this->GetChar();
+            ch_buf_ = this->GetChar();
            *tok = '=';
          }
          return new_line;
@ -124,12 +124,12 @@ class ConfigReaderBase {
          if (tok->length() == 0) new_line = true;
        case '\t':
        case ' ' :
-          ch_buf = this->GetChar();
+          ch_buf_ = this->GetChar();
          if (tok->length() != 0) return new_line;
          break;
        default:
-          *tok += ch_buf;
-          ch_buf = this->GetChar();
+          *tok += ch_buf_;
+          ch_buf_ = this->GetChar();
          break;
      }
    }
@ -149,19 +149,19 @@ class ConfigStreamReader: public ConfigReaderBase {
   * \brief constructor
   * \param fin istream input stream
   */
-  explicit ConfigStreamReader(std::istream &fin) : fin(fin) {}
+  explicit ConfigStreamReader(std::istream &fin) : fin_(fin) {}

 protected:
-  virtual char GetChar(void) {
-    return fin.get();
+  char GetChar() override {
+    return fin_.get();
  }
  /*! \brief to be implemented by child, check if end of stream */
-  virtual bool IsEnd(void) {
-    return fin.eof();
+  bool IsEnd() override {
+    return fin_.eof();
  }

 private:
-  std::istream &fin;
+  std::istream &fin_;
 };

 /*!
@ -173,20 +173,20 @@ class ConfigIterator: public ConfigStreamReader {
   * \brief constructor
   * \param fname name of configure file
   */
-  explicit ConfigIterator(const char *fname) : ConfigStreamReader(fi) {
-    fi.open(fname);
-    if (fi.fail()) {
+  explicit ConfigIterator(const char *fname) : ConfigStreamReader(fi_) {
+    fi_.open(fname);
+    if (fi_.fail()) {
      LOG(FATAL) << "cannot open file " << fname;
    }
    ConfigReaderBase::Init();
  }
  /*! \brief destructor */
-  ~ConfigIterator(void) {
-    fi.close();
+  ~ConfigIterator() {
+    fi_.close();
  }

 private:
-  std::ifstream fi;
+  std::ifstream fi_;
 };
 }  // namespace common
 }  // namespace xgboost
--- a/src/common/device_helpers.cuh
+++ b/src/common/device_helpers.cuh
@ -25,16 +25,16 @@

 namespace dh {

-#define HOST_DEV_INLINE __host__ __device__ __forceinline__
+#define HOST_DEV_INLINE XGBOOST_DEVICE __forceinline__
 #define DEV_INLINE __device__ __forceinline__

 /*
 * Error handling  functions
 */

-#define safe_cuda(ans) throw_on_cuda_error((ans), __FILE__, __LINE__)
+#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)

-inline cudaError_t throw_on_cuda_error(cudaError_t code, const char *file,
+inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file,
                                       int line) {
  if (code != cudaSuccess) {
    std::stringstream ss;
@ -48,9 +48,9 @@ inline cudaError_t throw_on_cuda_error(cudaError_t code, const char *file,
 }

 #ifdef XGBOOST_USE_NCCL
-#define safe_nccl(ans) throw_on_nccl_error((ans), __FILE__, __LINE__)
+#define safe_nccl(ans) ThrowOnNcclError((ans), __FILE__, __LINE__)

-inline ncclResult_t throw_on_nccl_error(ncclResult_t code, const char *file,
+inline ncclResult_t ThrowOnNcclError(ncclResult_t code, const char *file,
                                        int line) {
  if (code != ncclSuccess) {
    std::stringstream ss;
@ -64,16 +64,16 @@ inline ncclResult_t throw_on_nccl_error(ncclResult_t code, const char *file,
 #endif

 template <typename T>
-T *raw(thrust::device_vector<T> &v) {  //  NOLINT
+T *Raw(thrust::device_vector<T> &v) {  //  NOLINT
  return raw_pointer_cast(v.data());
 }

 template <typename T>
-const T *raw(const thrust::device_vector<T> &v) {  //  NOLINT
+const T *Raw(const thrust::device_vector<T> &v) {  //  NOLINT
  return raw_pointer_cast(v.data());
 }

-inline int n_visible_devices() {
+inline int NVisibleDevices() {
  int n_visgpus = 0;

  dh::safe_cuda(cudaGetDeviceCount(&n_visgpus));
@ -81,40 +81,40 @@ inline int n_visible_devices() {
  return n_visgpus;
 }

-inline int n_devices_all(int n_gpus) {
-  int n_devices_visible = dh::n_visible_devices();
+inline int NDevicesAll(int n_gpus) {
+  int n_devices_visible = dh::NVisibleDevices();
  int n_devices = n_gpus < 0 ? n_devices_visible : n_gpus;
  return (n_devices);
 }
-inline int n_devices(int n_gpus, int num_rows) {
-  int n_devices = dh::n_devices_all(n_gpus);
+inline int NDevices(int n_gpus, int num_rows) {
+  int n_devices = dh::NDevicesAll(n_gpus);
  // fix-up device number to be limited by number of rows
  n_devices = n_devices > num_rows ? num_rows : n_devices;
  return (n_devices);
 }

 // if n_devices=-1, then use all visible devices
-inline void synchronize_n_devices(int n_devices, std::vector<int> dList) {
+inline void SynchronizeNDevices(int n_devices, std::vector<int> dList) {
  for (int d_idx = 0; d_idx < n_devices; d_idx++) {
    int device_idx = dList[d_idx];
    safe_cuda(cudaSetDevice(device_idx));
    safe_cuda(cudaDeviceSynchronize());
  }
 }
-inline void synchronize_all() {
-  for (int device_idx = 0; device_idx < n_visible_devices(); device_idx++) {
+inline void SynchronizeAll() {
+  for (int device_idx = 0; device_idx < NVisibleDevices(); device_idx++) {
    safe_cuda(cudaSetDevice(device_idx));
    safe_cuda(cudaDeviceSynchronize());
  }
 }

-inline std::string device_name(int device_idx) {
+inline std::string DeviceName(int device_idx) {
  cudaDeviceProp prop;
  dh::safe_cuda(cudaGetDeviceProperties(&prop, device_idx));
  return std::string(prop.name);
 }

-inline size_t available_memory(int device_idx) {
+inline size_t AvailableMemory(int device_idx) {
  size_t device_free = 0;
  size_t device_total = 0;
  safe_cuda(cudaSetDevice(device_idx));
@ -130,20 +130,20 @@ inline size_t available_memory(int device_idx) {
 * \param device_idx  Zero-based index of the device.
 */

-inline size_t max_shared_memory(int device_idx) {
+inline size_t MaxSharedMemory(int device_idx) {
  cudaDeviceProp prop;
  dh::safe_cuda(cudaGetDeviceProperties(&prop, device_idx));
  return prop.sharedMemPerBlock;
 }

 // ensure gpu_id is correct, so not dependent upon user knowing details
-inline int get_device_idx(int gpu_id) {
+inline int GetDeviceIdx(int gpu_id) {
  // protect against overrun for gpu_id
-  return (std::abs(gpu_id) + 0) % dh::n_visible_devices();
+  return (std::abs(gpu_id) + 0) % dh::NVisibleDevices();
 }

-inline void check_compute_capability() {
-  int n_devices = n_visible_devices();
+inline void CheckComputeCapability() {
+  int n_devices = NVisibleDevices();
  for (int d_idx = 0; d_idx < n_devices; ++d_idx) {
    cudaDeviceProp prop;
    safe_cuda(cudaGetDeviceProperties(&prop, d_idx));
@ -159,72 +159,72 @@ inline void check_compute_capability() {
 * Range iterator
 */

-class range {
+class Range {
 public:
-  class iterator {
-    friend class range;
+  class Iterator {
+    friend class Range;

   public:
-    __host__ __device__ int64_t operator*() const { return i_; }
-    __host__ __device__ const iterator &operator++() {
+    XGBOOST_DEVICE int64_t operator*() const { return i_; }
+    XGBOOST_DEVICE const Iterator &operator++() {
      i_ += step_;
      return *this;
    }
-    __host__ __device__ iterator operator++(int) {
-      iterator copy(*this);
+    XGBOOST_DEVICE Iterator operator++(int) {
+      Iterator copy(*this);
      i_ += step_;
      return copy;
    }

-    __host__ __device__ bool operator==(const iterator &other) const {
+    XGBOOST_DEVICE bool operator==(const Iterator &other) const {
      return i_ >= other.i_;
    }
-    __host__ __device__ bool operator!=(const iterator &other) const {
+    XGBOOST_DEVICE bool operator!=(const Iterator &other) const {
      return i_ < other.i_;
    }

-    __host__ __device__ void step(int s) { step_ = s; }
+    XGBOOST_DEVICE void Step(int s) { step_ = s; }

   protected:
-    __host__ __device__ explicit iterator(int64_t start) : i_(start) {}
+    XGBOOST_DEVICE explicit Iterator(int64_t start) : i_(start) {}

   public:
    uint64_t i_;
    int step_ = 1;
  };

-  __host__ __device__ iterator begin() const { return begin_; }
-  __host__ __device__ iterator end() const { return end_; }
-  __host__ __device__ range(int64_t begin, int64_t end)
+  XGBOOST_DEVICE Iterator begin() const { return begin_; }  // NOLINT
+  XGBOOST_DEVICE Iterator end() const { return end_; }      // NOLINT
+  XGBOOST_DEVICE Range(int64_t begin, int64_t end)
      : begin_(begin), end_(end) {}
-  __host__ __device__ void step(int s) { begin_.step(s); }
+  XGBOOST_DEVICE void Step(int s) { begin_.Step(s); }

 private:
-  iterator begin_;
-  iterator end_;
+  Iterator begin_;
+  Iterator end_;
 };

 template <typename T>
-__device__ range grid_stride_range(T begin, T end) {
+__device__ Range GridStrideRange(T begin, T end) {
  begin += blockDim.x * blockIdx.x + threadIdx.x;
-  range r(begin, end);
-  r.step(gridDim.x * blockDim.x);
+  Range r(begin, end);
+  r.Step(gridDim.x * blockDim.x);
  return r;
 }

 template <typename T>
-__device__ range block_stride_range(T begin, T end) {
+__device__ Range BlockStrideRange(T begin, T end) {
  begin += threadIdx.x;
-  range r(begin, end);
-  r.step(blockDim.x);
+  Range r(begin, end);
+  r.Step(blockDim.x);
  return r;
 }

 // Threadblock iterates over range, filling with value. Requires all threads in
 // block to be active.
 template <typename IterT, typename ValueT>
-__device__ void block_fill(IterT begin, size_t n, ValueT value) {
-  for (auto i : block_stride_range(static_cast<size_t>(0), n)) {
+__device__ void BlockFill(IterT begin, size_t n, ValueT value) {
+  for (auto i : BlockStrideRange(static_cast<size_t>(0), n)) {
    begin[i] = value;
  }
 }
@ -234,34 +234,34 @@ __device__ void block_fill(IterT begin, size_t n, ValueT value) {
 */

 template <typename T1, typename T2>
-T1 div_round_up(const T1 a, const T2 b) {
+T1 DivRoundUp(const T1 a, const T2 b) {
  return static_cast<T1>(ceil(static_cast<double>(a) / b));
 }

 template <typename L>
-__global__ void launch_n_kernel(size_t begin, size_t end, L lambda) {
-  for (auto i : grid_stride_range(begin, end)) {
+__global__ void LaunchNKernel(size_t begin, size_t end, L lambda) {
+  for (auto i : GridStrideRange(begin, end)) {
    lambda(i);
  }
 }
 template <typename L>
-__global__ void launch_n_kernel(int device_idx, size_t begin, size_t end,
+__global__ void LaunchNKernel(int device_idx, size_t begin, size_t end,
                                L lambda) {
-  for (auto i : grid_stride_range(begin, end)) {
+  for (auto i : GridStrideRange(begin, end)) {
    lambda(i, device_idx);
  }
 }

 template <int ITEMS_PER_THREAD = 8, int BLOCK_THREADS = 256, typename L>
-inline void launch_n(int device_idx, size_t n, L lambda) {
+inline void LaunchN(int device_idx, size_t n, L lambda) {
  if (n == 0) {
    return;
  }

  safe_cuda(cudaSetDevice(device_idx));
  const int GRID_SIZE =
-      static_cast<int>(div_round_up(n, ITEMS_PER_THREAD * BLOCK_THREADS));
-  launch_n_kernel<<<GRID_SIZE, BLOCK_THREADS>>>(static_cast<size_t>(0), n,
+      static_cast<int>(DivRoundUp(n, ITEMS_PER_THREAD * BLOCK_THREADS));
+  LaunchNKernel<<<GRID_SIZE, BLOCK_THREADS>>>(static_cast<size_t>(0), n,
                                                lambda);
 }

@ -269,91 +269,91 @@ inline void launch_n(int device_idx, size_t n, L lambda) {
 * Memory
 */

-enum memory_type { DEVICE, DEVICE_MANAGED };
+enum MemoryType { kDevice, kDeviceManaged };

-template <memory_type MemoryT>
-class bulk_allocator;
+template <MemoryType MemoryT>
+class BulkAllocator;
 template <typename T>
-class dvec2;
+class DVec2;

 template <typename T>
-class dvec {
-  friend class dvec2<T>;
+class DVec {
+  friend class DVec2<T>;

 private:
-  T *_ptr;
-  size_t _size;
-  int _device_idx;
+  T *ptr_;
+  size_t size_;
+  int device_idx_;

 public:
-  void external_allocate(int device_idx, void *ptr, size_t size) {
-    if (!empty()) {
-      throw std::runtime_error("Tried to allocate dvec but already allocated");
+  void ExternalAllocate(int device_idx, void *ptr, size_t size) {
+    if (!Empty()) {
+      throw std::runtime_error("Tried to allocate DVec but already allocated");
    }
-    _ptr = static_cast<T *>(ptr);
-    _size = size;
-    _device_idx = device_idx;
-    safe_cuda(cudaSetDevice(_device_idx));
+    ptr_ = static_cast<T *>(ptr);
+    size_ = size;
+    device_idx_ = device_idx;
+    safe_cuda(cudaSetDevice(device_idx_));
  }

-  dvec() : _ptr(NULL), _size(0), _device_idx(-1) {}
-  size_t size() const { return _size; }
-  int device_idx() const { return _device_idx; }
-  bool empty() const { return _ptr == NULL || _size == 0; }
+  DVec() : ptr_(NULL), size_(0), device_idx_(-1) {}
+  size_t Size() const { return size_; }
+  int DeviceIdx() const { return device_idx_; }
+  bool Empty() const { return ptr_ == NULL || size_ == 0; }

-  T *data() { return _ptr; }
+  T *Data() { return ptr_; }

-  const T *data() const { return _ptr; }
+  const T *Data() const { return ptr_; }

-  std::vector<T> as_vector() const {
-    std::vector<T> h_vector(size());
-    safe_cuda(cudaSetDevice(_device_idx));
-    safe_cuda(cudaMemcpy(h_vector.data(), _ptr, size() * sizeof(T),
+  std::vector<T> AsVector() const {
+    std::vector<T> h_vector(Size());
+    safe_cuda(cudaSetDevice(device_idx_));
+    safe_cuda(cudaMemcpy(h_vector.data(), ptr_, Size() * sizeof(T),
                         cudaMemcpyDeviceToHost));
    return h_vector;
  }

-  void fill(T value) {
-    auto d_ptr = _ptr;
-    launch_n(_device_idx, size(),
+  void Fill(T value) {
+    auto d_ptr = ptr_;
+    LaunchN(device_idx_, Size(),
             [=] __device__(size_t idx) { d_ptr[idx] = value; });
  }

-  void print() {
-    auto h_vector = this->as_vector();
+  void Print() {
+    auto h_vector = this->AsVector();
    for (auto e : h_vector) {
      std::cout << e << " ";
    }
    std::cout << "\n";
  }

-  thrust::device_ptr<T> tbegin() { return thrust::device_pointer_cast(_ptr); }
+  thrust::device_ptr<T> tbegin() { return thrust::device_pointer_cast(ptr_); }

  thrust::device_ptr<T> tend() {
-    return thrust::device_pointer_cast(_ptr + size());
+    return thrust::device_pointer_cast(ptr_ + Size());
  }

  template <typename T2>
-  dvec &operator=(const std::vector<T2> &other) {
+  DVec &operator=(const std::vector<T2> &other) {
    this->copy(other.begin(), other.end());
    return *this;
  }

-  dvec &operator=(dvec<T> &other) {
-    if (other.size() != size()) {
+  DVec &operator=(DVec<T> &other) {
+    if (other.Size() != Size()) {
      throw std::runtime_error(
-          "Cannot copy assign dvec to dvec, sizes are different");
+          "Cannot copy assign DVec to DVec, sizes are different");
    }
-    safe_cuda(cudaSetDevice(this->device_idx()));
-    if (other.device_idx() == this->device_idx()) {
-      dh::safe_cuda(cudaMemcpy(this->data(), other.data(),
-                               other.size() * sizeof(T),
+    safe_cuda(cudaSetDevice(this->DeviceIdx()));
+    if (other.DeviceIdx() == this->DeviceIdx()) {
+      dh::safe_cuda(cudaMemcpy(this->Data(), other.Data(),
+                               other.Size() * sizeof(T),
                               cudaMemcpyDeviceToDevice));
    } else {
-      std::cout << "deviceother: " << other.device_idx()
-                << " devicethis: " << this->device_idx() << std::endl;
-      std::cout << "size deviceother: " << other.size()
-                << " devicethis: " << this->device_idx() << std::endl;
+      std::cout << "deviceother: " << other.DeviceIdx()
+                << " devicethis: " << this->DeviceIdx() << std::endl;
+      std::cout << "size deviceother: " << other.Size()
+                << " devicethis: " << this->DeviceIdx() << std::endl;
      throw std::runtime_error("Cannot copy to/from different devices");
    }

@ -362,177 +362,178 @@ class dvec {

  template <typename IterT>
  void copy(IterT begin, IterT end) {
-    safe_cuda(cudaSetDevice(this->device_idx()));
-    if (end - begin != size()) {
+    safe_cuda(cudaSetDevice(this->DeviceIdx()));
+    if (end - begin != Size()) {
      throw std::runtime_error(
-          "Cannot copy assign vector to dvec, sizes are different");
+          "Cannot copy assign vector to DVec, sizes are different");
    }
    thrust::copy(begin, end, this->tbegin());
  }

  void copy(thrust::device_ptr<T> begin, thrust::device_ptr<T> end) {
-    safe_cuda(cudaSetDevice(this->device_idx()));
-    if (end - begin != size()) {
+    safe_cuda(cudaSetDevice(this->DeviceIdx()));
+    if (end - begin != Size()) {
      throw std::runtime_error(
-                               "Cannot copy assign vector to dvec, sizes are different");
+                               "Cannot copy assign vector to DVec, sizes are different");
    }
-    safe_cuda(cudaMemcpy(this->data(), begin.get(),
-                         size() * sizeof(T), cudaMemcpyDefault));
+    safe_cuda(cudaMemcpy(this->Data(), begin.get(),
+                         Size() * sizeof(T), cudaMemcpyDefault));
  }
 };

 /**
- * @class dvec2 device_helpers.cuh
- * @brief wrapper for storing 2 dvec's which are needed for cub::DoubleBuffer
+ * @class DVec2 device_helpers.cuh
+ * @brief wrapper for storing 2 DVec's which are needed for cub::DoubleBuffer
 */
 template <typename T>
-class dvec2 {
+class DVec2 {
 private:
-  dvec<T> _d1, _d2;
-  cub::DoubleBuffer<T> _buff;
-  int _device_idx;
+  DVec<T> d1_, d2_;
+  cub::DoubleBuffer<T> buff_;
+  int device_idx_;

 public:
-  void external_allocate(int device_idx, void *ptr1, void *ptr2, size_t size) {
-    if (!empty()) {
-      throw std::runtime_error("Tried to allocate dvec2 but already allocated");
+  void ExternalAllocate(int device_idx, void *ptr1, void *ptr2, size_t size) {
+    if (!Empty()) {
+      throw std::runtime_error("Tried to allocate DVec2 but already allocated");
    }
-    _device_idx = device_idx;
-    _d1.external_allocate(_device_idx, ptr1, size);
-    _d2.external_allocate(_device_idx, ptr2, size);
-    _buff.d_buffers[0] = static_cast<T *>(ptr1);
-    _buff.d_buffers[1] = static_cast<T *>(ptr2);
-    _buff.selector = 0;
+    device_idx_ = device_idx;
+    d1_.ExternalAllocate(device_idx_, ptr1, size);
+    d2_.ExternalAllocate(device_idx_, ptr2, size);
+    buff_.d_buffers[0] = static_cast<T *>(ptr1);
+    buff_.d_buffers[1] = static_cast<T *>(ptr2);
+    buff_.selector = 0;
  }
-  dvec2() : _d1(), _d2(), _buff(), _device_idx(-1) {}
+  DVec2() : d1_(), d2_(), buff_(), device_idx_(-1) {}

-  size_t size() const { return _d1.size(); }
-  int device_idx() const { return _device_idx; }
-  bool empty() const { return _d1.empty() || _d2.empty(); }
+  size_t Size() const { return d1_.Size(); }
+  int DeviceIdx() const { return device_idx_; }
+  bool Empty() const { return d1_.Empty() || d2_.Empty(); }

-  cub::DoubleBuffer<T> &buff() { return _buff; }
+  cub::DoubleBuffer<T> &buff() { return buff_; }

-  dvec<T> &d1() { return _d1; }
-  dvec<T> &d2() { return _d2; }
+  DVec<T> &D1() { return d1_; }

-  T *current() { return _buff.Current(); }
+  DVec<T> &D2() { return d2_; }

-  dvec<T> &current_dvec() { return _buff.selector == 0 ? d1() : d2(); }
+  T *Current() { return buff_.Current(); }

-  T *other() { return _buff.Alternate(); }
+  DVec<T> &CurrentDVec() { return buff_.selector == 0 ? D1() : D2(); }
+
+  T *other() { return buff_.Alternate(); }
 };

-template <memory_type MemoryT>
-class bulk_allocator {
-  std::vector<char *> d_ptr;
-  std::vector<size_t> _size;
-  std::vector<int> _device_idx;
+template <MemoryType MemoryT>
+class BulkAllocator {
+  std::vector<char *> d_ptr_;
+  std::vector<size_t> size_;
+  std::vector<int> device_idx_;

-  const int align = 256;
+  static const int kAlign = 256;

-  size_t align_round_up(size_t n) const {
-    n = (n + align - 1) / align;
-    return n * align;
+  size_t AlignRoundUp(size_t n) const {
+    n = (n + kAlign - 1) / kAlign;
+    return n * kAlign;
  }

  template <typename T>
-  size_t get_size_bytes(dvec<T> *first_vec, size_t first_size) {
-    return align_round_up(first_size * sizeof(T));
+  size_t GetSizeBytes(DVec<T> *first_vec, size_t first_size) {
+    return AlignRoundUp(first_size * sizeof(T));
  }

  template <typename T, typename... Args>
-  size_t get_size_bytes(dvec<T> *first_vec, size_t first_size, Args... args) {
-    return get_size_bytes<T>(first_vec, first_size) + get_size_bytes(args...);
+  size_t GetSizeBytes(DVec<T> *first_vec, size_t first_size, Args... args) {
+    return GetSizeBytes<T>(first_vec, first_size) + GetSizeBytes(args...);
  }

  template <typename T>
-  void allocate_dvec(int device_idx, char *ptr, dvec<T> *first_vec,
+  void AllocateDVec(int device_idx, char *ptr, DVec<T> *first_vec,
                     size_t first_size) {
-    first_vec->external_allocate(device_idx, static_cast<void *>(ptr),
+    first_vec->ExternalAllocate(device_idx, static_cast<void *>(ptr),
                                 first_size);
  }

  template <typename T, typename... Args>
-  void allocate_dvec(int device_idx, char *ptr, dvec<T> *first_vec,
+  void AllocateDVec(int device_idx, char *ptr, DVec<T> *first_vec,
                     size_t first_size, Args... args) {
-    allocate_dvec<T>(device_idx, ptr, first_vec, first_size);
-    ptr += align_round_up(first_size * sizeof(T));
-    allocate_dvec(device_idx, ptr, args...);
+    AllocateDVec<T>(device_idx, ptr, first_vec, first_size);
+    ptr += AlignRoundUp(first_size * sizeof(T));
+    AllocateDVec(device_idx, ptr, args...);
  }

-  char *allocate_device(int device_idx, size_t bytes, memory_type t) {
+  char *AllocateDevice(int device_idx, size_t bytes, MemoryType t) {
    char *ptr;
    safe_cuda(cudaSetDevice(device_idx));
    safe_cuda(cudaMalloc(&ptr, bytes));
    return ptr;
  }
  template <typename T>
-  size_t get_size_bytes(dvec2<T> *first_vec, size_t first_size) {
-    return 2 * align_round_up(first_size * sizeof(T));
+  size_t GetSizeBytes(DVec2<T> *first_vec, size_t first_size) {
+    return 2 * AlignRoundUp(first_size * sizeof(T));
  }

  template <typename T, typename... Args>
-  size_t get_size_bytes(dvec2<T> *first_vec, size_t first_size, Args... args) {
-    return get_size_bytes<T>(first_vec, first_size) + get_size_bytes(args...);
+  size_t GetSizeBytes(DVec2<T> *first_vec, size_t first_size, Args... args) {
+    return GetSizeBytes<T>(first_vec, first_size) + GetSizeBytes(args...);
  }

  template <typename T>
-  void allocate_dvec(int device_idx, char *ptr, dvec2<T> *first_vec,
+  void AllocateDVec(int device_idx, char *ptr, DVec2<T> *first_vec,
                     size_t first_size) {
-    first_vec->external_allocate(
+    first_vec->ExternalAllocate(
        device_idx, static_cast<void *>(ptr),
-        static_cast<void *>(ptr + align_round_up(first_size * sizeof(T))),
+        static_cast<void *>(ptr + AlignRoundUp(first_size * sizeof(T))),
        first_size);
  }

  template <typename T, typename... Args>
-  void allocate_dvec(int device_idx, char *ptr, dvec2<T> *first_vec,
+  void AllocateDVec(int device_idx, char *ptr, DVec2<T> *first_vec,
                     size_t first_size, Args... args) {
-    allocate_dvec<T>(device_idx, ptr, first_vec, first_size);
-    ptr += (align_round_up(first_size * sizeof(T)) * 2);
-    allocate_dvec(device_idx, ptr, args...);
+    AllocateDVec<T>(device_idx, ptr, first_vec, first_size);
+    ptr += (AlignRoundUp(first_size * sizeof(T)) * 2);
+    AllocateDVec(device_idx, ptr, args...);
  }

 public:
-  bulk_allocator() {}
+   BulkAllocator() = default;
  // prevent accidental copying, moving or assignment of this object
-  bulk_allocator(const bulk_allocator<MemoryT>&) = delete;
-  bulk_allocator(bulk_allocator<MemoryT>&&) = delete;
-  void operator=(const bulk_allocator<MemoryT>&) = delete;
-  void operator=(bulk_allocator<MemoryT>&&) = delete;
+  BulkAllocator(const BulkAllocator<MemoryT>&) = delete;
+  BulkAllocator(BulkAllocator<MemoryT>&&) = delete;
+  void operator=(const BulkAllocator<MemoryT>&) = delete;
+  void operator=(BulkAllocator<MemoryT>&&) = delete;
  
-  ~bulk_allocator() {
-    for (size_t i = 0; i < d_ptr.size(); i++) {
-      if (!(d_ptr[i] == nullptr)) {
-        safe_cuda(cudaSetDevice(_device_idx[i]));
-        safe_cuda(cudaFree(d_ptr[i]));
-        d_ptr[i] = nullptr;
+  ~BulkAllocator() {
+    for (size_t i = 0; i < d_ptr_.size(); i++) {
+      if (!(d_ptr_[i] == nullptr)) {
+        safe_cuda(cudaSetDevice(device_idx_[i]));
+        safe_cuda(cudaFree(d_ptr_[i]));
+        d_ptr_[i] = nullptr;
      }
    }
  }

  // returns sum of bytes for all allocations
-  size_t size() {
-    return std::accumulate(_size.begin(), _size.end(), static_cast<size_t>(0));
+  size_t Size() {
+    return std::accumulate(size_.begin(), size_.end(), static_cast<size_t>(0));
  }

  template <typename... Args>
-  void allocate(int device_idx, bool silent, Args... args) {
-    size_t size = get_size_bytes(args...);
+  void Allocate(int device_idx, bool silent, Args... args) {
+    size_t size = GetSizeBytes(args...);

-    char *ptr = allocate_device(device_idx, size, MemoryT);
+    char *ptr = AllocateDevice(device_idx, size, MemoryT);

-    allocate_dvec(device_idx, ptr, args...);
+    AllocateDVec(device_idx, ptr, args...);

-    d_ptr.push_back(ptr);
-    _size.push_back(size);
-    _device_idx.push_back(device_idx);
+    d_ptr_.push_back(ptr);
+    size_.push_back(size);
+    device_idx_.push_back(device_idx);

    if (!silent) {
      const int mb_size = 1048576;
      LOG(CONSOLE) << "Allocated " << size / mb_size << "MB on [" << device_idx
-                   << "] " << device_name(device_idx) << ", "
-                   << available_memory(device_idx) / mb_size << "MB remaining.";
+                   << "] " << DeviceName(device_idx) << ", "
+                   << AvailableMemory(device_idx) / mb_size << "MB remaining.";
    }
  }
 };
@ -543,7 +544,7 @@ struct CubMemory {
  size_t temp_storage_bytes;

  // Thrust
-  typedef char value_type;
+   using ValueT = char;

  CubMemory() : d_temp_storage(nullptr), temp_storage_bytes(0) {}

@ -568,17 +569,18 @@ struct CubMemory {
    }
  }
  // Thrust
-  char *allocate(std::ptrdiff_t num_bytes) {
+  char *allocate(std::ptrdiff_t num_bytes) {  // NOLINT
    LazyAllocate(num_bytes);
    return reinterpret_cast<char *>(d_temp_storage);
  }

  // Thrust
-  void deallocate(char *ptr, size_t n) {
+  void deallocate(char *ptr, size_t n) {  // NOLINT
+
    // Do nothing
  }

-  bool IsAllocated() { return d_temp_storage != NULL; }
+  bool IsAllocated() { return d_temp_storage != nullptr; }
 };

 /*
@ -586,7 +588,7 @@ struct CubMemory {
 */

 template <typename T>
-void print(const dvec<T> &v, size_t max_items = 10) {
+void Print(const DVec<T> &v, size_t max_items = 10) {
  std::vector<T> h = v.as_vector();
  for (size_t i = 0; i < std::min(max_items, h.size()); i++) {
    std::cout << " " << h[i];
@ -609,14 +611,14 @@ void print(const dvec<T> &v, size_t max_items = 10) {

 // Load balancing search

-template <typename coordinate_t, typename segments_t, typename offset_t>
-void FindMergePartitions(int device_idx, coordinate_t *d_tile_coordinates,
-                         size_t num_tiles, int tile_size, segments_t segments,
-                         offset_t num_rows, offset_t num_elements) {
-  dh::launch_n(device_idx, num_tiles + 1, [=] __device__(int idx) {
-    offset_t diagonal = idx * tile_size;
-    coordinate_t tile_coordinate;
-    cub::CountingInputIterator<offset_t> nonzero_indices(0);
+template <typename CoordinateT, typename SegmentT, typename OffsetT>
+void FindMergePartitions(int device_idx, CoordinateT *d_tile_coordinates,
+                         size_t num_tiles, int tile_size, SegmentT segments,
+                         OffsetT num_rows, OffsetT num_elements) {
+  dh::LaunchN(device_idx, num_tiles + 1, [=] __device__(int idx) {
+    OffsetT diagonal = idx * tile_size;
+    CoordinateT tile_coordinate;
+    cub::CountingInputIterator<OffsetT> nonzero_indices(0);

    // Search the merge path
    // Cast to signed integer as this function can have negatives
@ -630,27 +632,27 @@ void FindMergePartitions(int device_idx, coordinate_t *d_tile_coordinates,
 }

 template <int TILE_SIZE, int ITEMS_PER_THREAD, int BLOCK_THREADS,
-          typename offset_t, typename coordinate_t, typename func_t,
-          typename segments_iter>
-__global__ void LbsKernel(coordinate_t *d_coordinates,
-                          segments_iter segment_end_offsets, func_t f,
-                          offset_t num_segments) {
+          typename OffsetT, typename CoordinateT, typename FunctionT,
+          typename SegmentIterT>
+__global__ void LbsKernel(CoordinateT *d_coordinates,
+                          SegmentIterT segment_end_offsets, FunctionT f,
+                          OffsetT num_segments) {
  int tile = blockIdx.x;
-  coordinate_t tile_start_coord = d_coordinates[tile];
-  coordinate_t tile_end_coord = d_coordinates[tile + 1];
+  CoordinateT tile_start_coord = d_coordinates[tile];
+  CoordinateT tile_end_coord = d_coordinates[tile + 1];
  int64_t tile_num_rows = tile_end_coord.x - tile_start_coord.x;
  int64_t tile_num_elements = tile_end_coord.y - tile_start_coord.y;

-  cub::CountingInputIterator<offset_t> tile_element_indices(tile_start_coord.y);
-  coordinate_t thread_start_coord;
+  cub::CountingInputIterator<OffsetT> tile_element_indices(tile_start_coord.y);
+  CoordinateT thread_start_coord;

-  typedef typename std::iterator_traits<segments_iter>::value_type segment_t;
+  typedef typename std::iterator_traits<SegmentIterT>::value_type SegmentT;
  __shared__ struct {
-    segment_t tile_segment_end_offsets[TILE_SIZE + 1];
-    segment_t output_segment[TILE_SIZE];
+    SegmentT tile_segment_end_offsets[TILE_SIZE + 1];
+    SegmentT output_segment[TILE_SIZE];
  } temp_storage;

-  for (auto item : dh::block_stride_range(int(0), int(tile_num_rows + 1))) {
+  for (auto item : dh::BlockStrideRange(int(0), int(tile_num_rows + 1))) {
    temp_storage.tile_segment_end_offsets[item] =
        segment_end_offsets[min(static_cast<size_t>(tile_start_coord.x + item),
                                static_cast<size_t>(num_segments - 1))];
@ -665,7 +667,7 @@ __global__ void LbsKernel(coordinate_t *d_coordinates,
                       tile_element_indices,                   // List B
                       tile_num_rows, tile_num_elements, thread_start_coord);

-  coordinate_t thread_current_coord = thread_start_coord;
+  CoordinateT thread_current_coord = thread_start_coord;
 #pragma unroll
  for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) {
    if (tile_element_indices[thread_current_coord.y] <
@ -679,50 +681,50 @@ __global__ void LbsKernel(coordinate_t *d_coordinates,
  }
  __syncthreads();

-  for (auto item : dh::block_stride_range(int(0), int(tile_num_elements))) {
+  for (auto item : dh::BlockStrideRange(int(0), int(tile_num_elements))) {
    f(tile_start_coord.y + item, temp_storage.output_segment[item]);
  }
 }

-template <typename func_t, typename segments_iter, typename offset_t>
+template <typename FunctionT, typename SegmentIterT, typename OffsetT>
 void SparseTransformLbs(int device_idx, dh::CubMemory *temp_memory,
-                        offset_t count, segments_iter segments,
-                        offset_t num_segments, func_t f) {
-  typedef typename cub::CubVector<offset_t, 2>::Type coordinate_t;
+                        OffsetT count, SegmentIterT segments,
+                        OffsetT num_segments, FunctionT f) {
+  typedef typename cub::CubVector<OffsetT, 2>::Type CoordinateT;
  dh::safe_cuda(cudaSetDevice(device_idx));
  const int BLOCK_THREADS = 256;
  const int ITEMS_PER_THREAD = 1;
  const int TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD;
-  auto num_tiles = dh::div_round_up(count + num_segments, BLOCK_THREADS);
+  auto num_tiles = dh::DivRoundUp(count + num_segments, BLOCK_THREADS);
  CHECK(num_tiles < std::numeric_limits<unsigned int>::max());

-  temp_memory->LazyAllocate(sizeof(coordinate_t) * (num_tiles + 1));
-  coordinate_t *tmp_tile_coordinates =
-      reinterpret_cast<coordinate_t *>(temp_memory->d_temp_storage);
+  temp_memory->LazyAllocate(sizeof(CoordinateT) * (num_tiles + 1));
+  CoordinateT *tmp_tile_coordinates =
+      reinterpret_cast<CoordinateT *>(temp_memory->d_temp_storage);

  FindMergePartitions(device_idx, tmp_tile_coordinates, num_tiles,
                      BLOCK_THREADS, segments, num_segments, count);

-  LbsKernel<TILE_SIZE, ITEMS_PER_THREAD, BLOCK_THREADS, offset_t>
+  LbsKernel<TILE_SIZE, ITEMS_PER_THREAD, BLOCK_THREADS, OffsetT>
      <<<uint32_t(num_tiles), BLOCK_THREADS>>>(tmp_tile_coordinates,
                                               segments + 1, f, num_segments);
 }

-template <typename func_t, typename offset_t>
-void DenseTransformLbs(int device_idx, offset_t count, offset_t num_segments,
-                       func_t f) {
+template <typename FunctionT, typename OffsetT>
+void DenseTransformLbs(int device_idx, OffsetT count, OffsetT num_segments,
+                       FunctionT f) {
  CHECK(count % num_segments == 0) << "Data is not dense.";

-  launch_n(device_idx, count, [=] __device__(offset_t idx) {
-    offset_t segment = idx / (count / num_segments);
+  LaunchN(device_idx, count, [=] __device__(OffsetT idx) {
+    OffsetT segment = idx / (count / num_segments);
    f(idx, segment);
  });
 }

 /**
- * \fn  template <typename func_t, typename segments_iter, typename offset_t>
- * void TransformLbs(int device_idx, dh::CubMemory *temp_memory, offset_t count,
- * segments_iter segments, offset_t num_segments, bool is_dense, func_t f)
+ * \fn  template <typename FunctionT, typename SegmentIterT, typename OffsetT>
+ * void TransformLbs(int device_idx, dh::CubMemory *temp_memory, OffsetT count,
+ * SegmentIterT segments, OffsetT num_segments, bool is_dense, FunctionT f)
 *
 * \brief Load balancing search function. Reads a CSR type matrix description
 * and allows a function to be executed on each element. Search 'modern GPU load
@ -731,9 +733,9 @@ void DenseTransformLbs(int device_idx, offset_t count, offset_t num_segments,
 * \author  Rory
 * \date  7/9/2017
 *
- * \tparam  func_t        Type of the function t.
- * \tparam  segments_iter Type of the segments iterator.
- * \tparam  offset_t      Type of the offset.
+ * \tparam  FunctionT        Type of the function t.
+ * \tparam  SegmentIterT Type of the segments iterator.
+ * \tparam  OffsetT      Type of the offset.
 * \param           device_idx    Zero-based index of the device.
 * \param [in,out]  temp_memory   Temporary memory allocator.
 * \param           count         Number of elements.
@ -743,10 +745,10 @@ void DenseTransformLbs(int device_idx, offset_t count, offset_t num_segments,
 * \param           f             Lambda to be executed on matrix elements.
 */

-template <typename func_t, typename segments_iter, typename offset_t>
-void TransformLbs(int device_idx, dh::CubMemory *temp_memory, offset_t count,
-                  segments_iter segments, offset_t num_segments, bool is_dense,
-                  func_t f) {
+template <typename FunctionT, typename SegmentIterT, typename OffsetT>
+void TransformLbs(int device_idx, dh::CubMemory *temp_memory, OffsetT count,
+                  SegmentIterT segments, OffsetT num_segments, bool is_dense,
+                  FunctionT f) {
  if (is_dense) {
    DenseTransformLbs(device_idx, count, num_segments, f);
  } else {
@ -765,18 +767,18 @@ void TransformLbs(int device_idx, dh::CubMemory *temp_memory, offset_t count,
 * @param offsets the segments
 */
 template <typename T1, typename T2>
-void segmentedSort(dh::CubMemory *tmp_mem, dh::dvec2<T1> *keys,
-                   dh::dvec2<T2> *vals, int nVals, int nSegs,
-                   const dh::dvec<int> &offsets, int start = 0,
+void SegmentedSort(dh::CubMemory *tmp_mem, dh::DVec2<T1> *keys,
+                   dh::DVec2<T2> *vals, int nVals, int nSegs,
+                   const dh::DVec<int> &offsets, int start = 0,
                   int end = sizeof(T1) * 8) {
  size_t tmpSize;
  dh::safe_cuda(cub::DeviceSegmentedRadixSort::SortPairs(
-      NULL, tmpSize, keys->buff(), vals->buff(), nVals, nSegs, offsets.data(),
-      offsets.data() + 1, start, end));
+      NULL, tmpSize, keys->buff(), vals->buff(), nVals, nSegs, offsets.Data(),
+      offsets.Data() + 1, start, end));
  tmp_mem->LazyAllocate(tmpSize);
  dh::safe_cuda(cub::DeviceSegmentedRadixSort::SortPairs(
      tmp_mem->d_temp_storage, tmpSize, keys->buff(), vals->buff(), nVals,
-      nSegs, offsets.data(), offsets.data() + 1, start, end));
+      nSegs, offsets.Data(), offsets.Data() + 1, start, end));
 }

 /**
@ -787,14 +789,14 @@ void segmentedSort(dh::CubMemory *tmp_mem, dh::dvec2<T1> *keys,
 * @param nVals number of elements in the input array
 */
 template <typename T>
-void sumReduction(dh::CubMemory &tmp_mem, dh::dvec<T> &in, dh::dvec<T> &out,
+void SumReduction(dh::CubMemory &tmp_mem, dh::DVec<T> &in, dh::DVec<T> &out,
                  int nVals) {
  size_t tmpSize;
  dh::safe_cuda(
-      cub::DeviceReduce::Sum(NULL, tmpSize, in.data(), out.data(), nVals));
+      cub::DeviceReduce::Sum(NULL, tmpSize, in.Data(), out.Data(), nVals));
  tmp_mem.LazyAllocate(tmpSize);
  dh::safe_cuda(cub::DeviceReduce::Sum(tmp_mem.d_temp_storage, tmpSize,
-                                       in.data(), out.data(), nVals));
+                                       in.Data(), out.Data(), nVals));
 }

 /**
@ -805,7 +807,7 @@ void sumReduction(dh::CubMemory &tmp_mem, dh::dvec<T> &in, dh::dvec<T> &out,
 * @param nVals number of elements in the input array
 */
 template <typename T>
-T sumReduction(dh::CubMemory &tmp_mem, T *in, int nVals) {
+T SumReduction(dh::CubMemory &tmp_mem, T *in, int nVals) {
  size_t tmpSize;
  dh::safe_cuda(cub::DeviceReduce::Sum(nullptr, tmpSize, in, in, nVals));
  // Allocate small extra memory for the return value
@ -827,8 +829,8 @@ T sumReduction(dh::CubMemory &tmp_mem, T *in, int nVals) {
 * @param def default value to be filled
 */
 template <typename T, int BlkDim = 256, int ItemsPerThread = 4>
-void fillConst(int device_idx, T *out, int len, T def) {
-  dh::launch_n<ItemsPerThread, BlkDim>(device_idx, len,
+void FillConst(int device_idx, T *out, int len, T def) {
+  dh::LaunchN<ItemsPerThread, BlkDim>(device_idx, len,
                                       [=] __device__(int i) { out[i] = def; });
 }

@ -842,9 +844,9 @@ void fillConst(int device_idx, T *out, int len, T def) {
 * @param nVals length of the buffers
 */
 template <typename T1, typename T2, int BlkDim = 256, int ItemsPerThread = 4>
-void gather(int device_idx, T1 *out1, const T1 *in1, T2 *out2, const T2 *in2,
+void Gather(int device_idx, T1 *out1, const T1 *in1, T2 *out2, const T2 *in2,
            const int *instId, int nVals) {
-  dh::launch_n<ItemsPerThread, BlkDim>(device_idx, nVals,
+  dh::LaunchN<ItemsPerThread, BlkDim>(device_idx, nVals,
                                       [=] __device__(int i) {
                                         int iid = instId[i];
                                         T1 v1 = in1[iid];
@ -862,8 +864,8 @@ void gather(int device_idx, T1 *out1, const T1 *in1, T2 *out2, const T2 *in2,
 * @param nVals length of the buffers
 */
 template <typename T, int BlkDim = 256, int ItemsPerThread = 4>
-void gather(int device_idx, T *out, const T *in, const int *instId, int nVals) {
-  dh::launch_n<ItemsPerThread, BlkDim>(device_idx, nVals,
+void Gather(int device_idx, T *out, const T *in, const int *instId, int nVals) {
+  dh::LaunchN<ItemsPerThread, BlkDim>(device_idx, nVals,
                                       [=] __device__(int i) {
                                         int iid = instId[i];
                                         out[i] = in[iid];
--- a/src/common/group_data.h
+++ b/src/common/group_data.h
@ -29,12 +29,12 @@ struct ParallelGroupBuilder {
  // parallel group builder of data
  ParallelGroupBuilder(std::vector<SizeType> *p_rptr,
                       std::vector<ValueType> *p_data)
-      : rptr(*p_rptr), data(*p_data), thread_rptr(tmp_thread_rptr) {
+      : rptr_(*p_rptr), data_(*p_data), thread_rptr_(tmp_thread_rptr_) {
  }
  ParallelGroupBuilder(std::vector<SizeType> *p_rptr,
                       std::vector<ValueType> *p_data,
                       std::vector< std::vector<SizeType> > *p_thread_rptr)
-      : rptr(*p_rptr), data(*p_data), thread_rptr(*p_thread_rptr) {
+      : rptr_(*p_rptr), data_(*p_data), thread_rptr_(*p_thread_rptr) {
  }

 public:
@ -45,10 +45,10 @@ struct ParallelGroupBuilder {
   * \param nthread number of thread that will be used in construction
   */
  inline void InitBudget(size_t nkeys, int nthread) {
-    thread_rptr.resize(nthread);
-    for (size_t i = 0;  i < thread_rptr.size(); ++i) {
-      thread_rptr[i].resize(nkeys);
-      std::fill(thread_rptr[i].begin(), thread_rptr[i].end(), 0);
+    thread_rptr_.resize(nthread);
+    for (size_t i = 0;  i < thread_rptr_.size(); ++i) {
+      thread_rptr_[i].resize(nkeys);
+      std::fill(thread_rptr_[i].begin(), thread_rptr_[i].end(), 0);
    }
  }
  /*!
@ -58,34 +58,34 @@ struct ParallelGroupBuilder {
   * \param nelem number of element budget add to this row
   */
  inline void AddBudget(size_t key, int threadid, SizeType nelem = 1) {
-    std::vector<SizeType> &trptr = thread_rptr[threadid];
+    std::vector<SizeType> &trptr = thread_rptr_[threadid];
    if (trptr.size() < key + 1) {
      trptr.resize(key + 1, 0);
    }
    trptr[key] += nelem;
  }
  /*! \brief step 3: initialize the necessary storage */
-  inline void InitStorage(void) {
+  inline void InitStorage() {
    // set rptr to correct size
-    for (size_t tid = 0; tid < thread_rptr.size(); ++tid) {
-      if (rptr.size() <= thread_rptr[tid].size()) {
-        rptr.resize(thread_rptr[tid].size() + 1);
+    for (size_t tid = 0; tid < thread_rptr_.size(); ++tid) {
+      if (rptr_.size() <= thread_rptr_[tid].size()) {
+        rptr_.resize(thread_rptr_[tid].size() + 1);
      }
    }
    // initialize rptr to be beginning of each segment
    size_t start = 0;
-    for (size_t i = 0; i + 1 < rptr.size(); ++i) {
-      for (size_t tid = 0; tid < thread_rptr.size(); ++tid) {
-        std::vector<SizeType> &trptr = thread_rptr[tid];
+    for (size_t i = 0; i + 1 < rptr_.size(); ++i) {
+      for (size_t tid = 0; tid < thread_rptr_.size(); ++tid) {
+        std::vector<SizeType> &trptr = thread_rptr_[tid];
        if (i < trptr.size()) {
          size_t ncnt = trptr[i];
          trptr[i] = start;
          start += ncnt;
        }
      }
-      rptr[i + 1] = start;
+      rptr_[i + 1] = start;
    }
-    data.resize(start);
+    data_.resize(start);
  }
  /*!
   * \brief step 4: add data to the allocated space,
@ -96,19 +96,19 @@ struct ParallelGroupBuilder {
   * \param threadid the id of thread that calls this function
   */
  inline void Push(size_t key, ValueType value, int threadid) {
-    SizeType &rp = thread_rptr[threadid][key];
-    data[rp++] = value;
+    SizeType &rp = thread_rptr_[threadid][key];
+    data_[rp++] = value;
  }

 private:
  /*! \brief pointer to the beginning and end of each continuous key */
-  std::vector<SizeType> &rptr;
+  std::vector<SizeType> &rptr_;
  /*! \brief index of nonzero entries in each row */
-  std::vector<ValueType> &data;
+  std::vector<ValueType> &data_;
  /*! \brief thread local data structure */
-  std::vector<std::vector<SizeType> > &thread_rptr;
+  std::vector<std::vector<SizeType> > &thread_rptr_;
  /*! \brief local temp thread ptr, use this if not specified by the constructor */
-  std::vector<std::vector<SizeType> > tmp_thread_rptr;
+  std::vector<std::vector<SizeType> > tmp_thread_rptr_;
 };
 }  // namespace common
 }  // namespace xgboost
--- a/src/common/hist_util.cc
+++ b/src/common/hist_util.cc
@ -17,20 +17,20 @@ namespace xgboost {
 namespace common {

 void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) {
-  typedef common::WXQuantileSketch<bst_float, bst_float> WXQSketch;
-  const MetaInfo& info = p_fmat->info();
+  using WXQSketch = common::WXQuantileSketch<bst_float, bst_float>;
+  const MetaInfo& info = p_fmat->Info();

  // safe factor for better accuracy
-  const int kFactor = 8;
+  constexpr int kFactor = 8;
  std::vector<WXQSketch> sketchs;

  const int nthread = omp_get_max_threads();

-  unsigned nstep = static_cast<unsigned>((info.num_col + nthread - 1) / nthread);
-  unsigned ncol = static_cast<unsigned>(info.num_col);
-  sketchs.resize(info.num_col);
+  auto nstep = static_cast<unsigned>((info.num_col_ + nthread - 1) / nthread);
+  auto ncol = static_cast<unsigned>(info.num_col_);
+  sketchs.resize(info.num_col_);
  for (auto& s : sketchs) {
-    s.Init(info.num_row, 1.0 / (max_num_bins * kFactor));
+    s.Init(info.num_row_, 1.0 / (max_num_bins * kFactor));
  }

  dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator();
@ -40,7 +40,7 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) {
    #pragma omp parallel num_threads(nthread)
    {
      CHECK_EQ(nthread, omp_get_num_threads());
-      unsigned tid = static_cast<unsigned>(omp_get_thread_num());
+      auto tid = static_cast<unsigned>(omp_get_thread_num());
      unsigned begin = std::min(nstep * tid, ncol);
      unsigned end = std::min(nstep * (tid + 1), ncol);
      for (size_t i = 0; i < batch.size; ++i) { // NOLINT(*)
@ -68,7 +68,7 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) {
  size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_num_bins * kFactor);
  sreducer.Allreduce(dmlc::BeginPtr(summary_array), nbytes, summary_array.size());

-  this->min_val.resize(info.num_col);
+  this->min_val.resize(info.num_col_);
  row_ptr.push_back(0);
  for (size_t fid = 0; fid < summary_array.size(); ++fid) {
    WXQSketch::SummaryContainer a;
@ -105,7 +105,7 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) {
 }

 void GHistIndexMatrix::Init(DMatrix* p_fmat) {
-  CHECK(cut != nullptr);
+  CHECK(cut != nullptr);  // NOLINT
  dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator();

  const int nthread = omp_get_max_threads();
@ -126,7 +126,7 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat) {
    CHECK_GT(cut->cut.size(), 0U);
    CHECK_EQ(cut->row_ptr.back(), cut->cut.size());

-    omp_ulong bsize = static_cast<omp_ulong>(batch.size);
+    auto bsize = static_cast<omp_ulong>(batch.size);
    #pragma omp parallel for num_threads(nthread) schedule(static)
    for (omp_ulong i = 0; i < bsize; ++i) { // NOLINT(*)
      const int tid = omp_get_thread_num();
@ -217,7 +217,7 @@ FindGroups_(const std::vector<unsigned>& feature_list,
  std::vector<std::vector<bool>> conflict_marks;
  std::vector<size_t> group_nnz;
  std::vector<size_t> group_conflict_cnt;
-  const size_t max_conflict_cnt
+  const auto max_conflict_cnt
    = static_cast<size_t>(param.max_conflict_rate * nrow);

  for (auto fid : feature_list) {
@ -336,14 +336,14 @@ FastFeatureGrouping(const GHistIndexMatrix& gmat,
 void GHistIndexBlockMatrix::Init(const GHistIndexMatrix& gmat,
                                 const ColumnMatrix& colmat,
                                 const FastHistParam& param) {
-  cut = gmat.cut;
+  cut_ = gmat.cut;

  const size_t nrow = gmat.row_ptr.size() - 1;
  const uint32_t nbins = gmat.cut->row_ptr.back();

  /* step 1: form feature groups */
  auto groups = FastFeatureGrouping(gmat, colmat, param);
-  const uint32_t nblock = static_cast<uint32_t>(groups.size());
+  const auto nblock = static_cast<uint32_t>(groups.size());

  /* step 2: build a new CSR matrix for each feature group */
  std::vector<uint32_t> bin2block(nbins);  // lookup table [bin id] => [block id]
@ -380,24 +380,24 @@ void GHistIndexBlockMatrix::Init(const GHistIndexMatrix& gmat,
  index_blk_ptr.push_back(0);
  row_ptr_blk_ptr.push_back(0);
  for (uint32_t block_id = 0; block_id < nblock; ++block_id) {
-    index.insert(index.end(), index_temp[block_id].begin(), index_temp[block_id].end());
-    row_ptr.insert(row_ptr.end(), row_ptr_temp[block_id].begin(), row_ptr_temp[block_id].end());
-    index_blk_ptr.push_back(index.size());
-    row_ptr_blk_ptr.push_back(row_ptr.size());
+    index_.insert(index_.end(), index_temp[block_id].begin(), index_temp[block_id].end());
+    row_ptr_.insert(row_ptr_.end(), row_ptr_temp[block_id].begin(), row_ptr_temp[block_id].end());
+    index_blk_ptr.push_back(index_.size());
+    row_ptr_blk_ptr.push_back(row_ptr_.size());
  }

  // save shortcut for each block
  for (uint32_t block_id = 0; block_id < nblock; ++block_id) {
    Block blk;
-    blk.index_begin = &index[index_blk_ptr[block_id]];
-    blk.row_ptr_begin = &row_ptr[row_ptr_blk_ptr[block_id]];
-    blk.index_end = &index[index_blk_ptr[block_id + 1]];
-    blk.row_ptr_end = &row_ptr[row_ptr_blk_ptr[block_id + 1]];
-    blocks.push_back(blk);
+    blk.index_begin = &index_[index_blk_ptr[block_id]];
+    blk.row_ptr_begin = &row_ptr_[row_ptr_blk_ptr[block_id]];
+    blk.index_end = &index_[index_blk_ptr[block_id + 1]];
+    blk.row_ptr_end = &row_ptr_[row_ptr_blk_ptr[block_id + 1]];
+    blocks_.push_back(blk);
  }
 }

-void GHistBuilder::BuildHist(const std::vector<bst_gpair>& gpair,
+void GHistBuilder::BuildHist(const std::vector<GradientPair>& gpair,
                             const RowSetCollection::Elem row_indices,
                             const GHistIndexMatrix& gmat,
                             const std::vector<bst_uint>& feat_set,
@ -405,30 +405,30 @@ void GHistBuilder::BuildHist(const std::vector<bst_gpair>& gpair,
  data_.resize(nbins_ * nthread_, GHistEntry());
  std::fill(data_.begin(), data_.end(), GHistEntry());

-  const int K = 8;  // loop unrolling factor
-  const bst_omp_uint nthread = static_cast<bst_omp_uint>(this->nthread_);
+  constexpr int kUnroll = 8;  // loop unrolling factor
+  const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
  const size_t nrows = row_indices.end - row_indices.begin;
-  const size_t rest = nrows % K;
+  const size_t rest = nrows % kUnroll;

  #pragma omp parallel for num_threads(nthread) schedule(guided)
-  for (bst_omp_uint i = 0; i < nrows - rest; i += K) {
+  for (bst_omp_uint i = 0; i < nrows - rest; i += kUnroll) {
    const bst_omp_uint tid = omp_get_thread_num();
    const size_t off = tid * nbins_;
-    size_t rid[K];
-    size_t ibegin[K];
-    size_t iend[K];
-    bst_gpair stat[K];
-    for (int k = 0; k < K; ++k) {
+    size_t rid[kUnroll];
+    size_t ibegin[kUnroll];
+    size_t iend[kUnroll];
+    GradientPair stat[kUnroll];
+    for (int k = 0; k < kUnroll; ++k) {
      rid[k] = row_indices.begin[i + k];
    }
-    for (int k = 0; k < K; ++k) {
+    for (int k = 0; k < kUnroll; ++k) {
      ibegin[k] = gmat.row_ptr[rid[k]];
      iend[k] = gmat.row_ptr[rid[k] + 1];
    }
-    for (int k = 0; k < K; ++k) {
+    for (int k = 0; k < kUnroll; ++k) {
      stat[k] = gpair[rid[k]];
    }
-    for (int k = 0; k < K; ++k) {
+    for (int k = 0; k < kUnroll; ++k) {
      for (size_t j = ibegin[k]; j < iend[k]; ++j) {
        const uint32_t bin = gmat.index[j];
        data_[off + bin].Add(stat[k]);
@ -439,7 +439,7 @@ void GHistBuilder::BuildHist(const std::vector<bst_gpair>& gpair,
    const size_t rid = row_indices.begin[i];
    const size_t ibegin = gmat.row_ptr[rid];
    const size_t iend = gmat.row_ptr[rid + 1];
-    const bst_gpair stat = gpair[rid];
+    const GradientPair stat = gpair[rid];
    for (size_t j = ibegin; j < iend; ++j) {
      const uint32_t bin = gmat.index[j];
      data_[bin].Add(stat);
@ -456,37 +456,40 @@ void GHistBuilder::BuildHist(const std::vector<bst_gpair>& gpair,
  }
 }

-void GHistBuilder::BuildBlockHist(const std::vector<bst_gpair>& gpair,
+void GHistBuilder::BuildBlockHist(const std::vector<GradientPair>& gpair,
                                  const RowSetCollection::Elem row_indices,
                                  const GHistIndexBlockMatrix& gmatb,
                                  const std::vector<bst_uint>& feat_set,
                                  GHistRow hist) {
-  const int K = 8;  // loop unrolling factor
-  const bst_omp_uint nthread = static_cast<bst_omp_uint>(this->nthread_);
+  constexpr int kUnroll = 8;  // loop unrolling factor
  const size_t nblock = gmatb.GetNumBlock();
  const size_t nrows = row_indices.end - row_indices.begin;
-  const size_t rest = nrows % K;
+  const size_t rest = nrows % kUnroll;
+
+#if defined(_OPENMP)
+  const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
+#endif

  #pragma omp parallel for num_threads(nthread) schedule(guided)
  for (bst_omp_uint bid = 0; bid < nblock; ++bid) {
    auto gmat = gmatb[bid];

-    for (size_t i = 0; i < nrows - rest; i += K) {
-      size_t rid[K];
-      size_t ibegin[K];
-      size_t iend[K];
-      bst_gpair stat[K];
-      for (int k = 0; k < K; ++k) {
+    for (size_t i = 0; i < nrows - rest; i += kUnroll) {
+      size_t rid[kUnroll];
+      size_t ibegin[kUnroll];
+      size_t iend[kUnroll];
+      GradientPair stat[kUnroll];
+      for (int k = 0; k < kUnroll; ++k) {
        rid[k] = row_indices.begin[i + k];
      }
-      for (int k = 0; k < K; ++k) {
+      for (int k = 0; k < kUnroll; ++k) {
        ibegin[k] = gmat.row_ptr[rid[k]];
        iend[k] = gmat.row_ptr[rid[k] + 1];
      }
-      for (int k = 0; k < K; ++k) {
+      for (int k = 0; k < kUnroll; ++k) {
        stat[k] = gpair[rid[k]];
      }
-      for (int k = 0; k < K; ++k) {
+      for (int k = 0; k < kUnroll; ++k) {
        for (size_t j = ibegin[k]; j < iend[k]; ++j) {
          const uint32_t bin = gmat.index[j];
          hist.begin[bin].Add(stat[k]);
@ -497,7 +500,7 @@ void GHistBuilder::BuildBlockHist(const std::vector<bst_gpair>& gpair,
      const size_t rid = row_indices.begin[i];
      const size_t ibegin = gmat.row_ptr[rid];
      const size_t iend = gmat.row_ptr[rid + 1];
-      const bst_gpair stat = gpair[rid];
+      const GradientPair stat = gpair[rid];
      for (size_t j = ibegin; j < iend; ++j) {
        const uint32_t bin = gmat.index[j];
        hist.begin[bin].Add(stat);
@ -507,21 +510,26 @@ void GHistBuilder::BuildBlockHist(const std::vector<bst_gpair>& gpair,
 }

 void GHistBuilder::SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow parent) {
-  const bst_omp_uint nthread = static_cast<bst_omp_uint>(this->nthread_);
  const uint32_t nbins = static_cast<bst_omp_uint>(nbins_);
-  const int K = 8;  // loop unrolling factor
-  const uint32_t rest = nbins % K;
+  constexpr int kUnroll = 8;  // loop unrolling factor
+  const uint32_t rest = nbins % kUnroll;
+
+#if defined(_OPENMP)
+  const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
+#endif
+
  #pragma omp parallel for num_threads(nthread) schedule(static)
-  for (bst_omp_uint bin_id = 0; bin_id < static_cast<bst_omp_uint>(nbins - rest); bin_id += K) {
-    GHistEntry pb[K];
-    GHistEntry sb[K];
-    for (int k = 0; k < K; ++k) {
+  for (bst_omp_uint bin_id = 0;
+       bin_id < static_cast<bst_omp_uint>(nbins - rest); bin_id += kUnroll) {
+    GHistEntry pb[kUnroll];
+    GHistEntry sb[kUnroll];
+    for (int k = 0; k < kUnroll; ++k) {
      pb[k] = parent.begin[bin_id + k];
    }
-    for (int k = 0; k < K; ++k) {
+    for (int k = 0; k < kUnroll; ++k) {
      sb[k] = sibling.begin[bin_id + k];
    }
-    for (int k = 0; k < K; ++k) {
+    for (int k = 0; k < kUnroll; ++k) {
      self.begin[bin_id + k].SetSubtract(pb[k], sb[k]);
    }
  }
--- a/src/common/hist_util.h
+++ b/src/common/hist_util.h
@ -13,26 +13,26 @@
 #include "row_set.h"
 #include "../tree/fast_hist_param.h"

-using xgboost::tree::FastHistParam;
-
 namespace xgboost {
 namespace common {

+using tree::FastHistParam;
+
 /*! \brief sums of gradient statistics corresponding to a histogram bin */
 struct GHistEntry {
  /*! \brief sum of first-order gradient statistics */
-  double sum_grad;
+  double sum_grad{0};
  /*! \brief sum of second-order gradient statistics */
-  double sum_hess;
+  double sum_hess{0};

-  GHistEntry() : sum_grad(0), sum_hess(0) {}
+  GHistEntry()  = default;

  inline void Clear() {
    sum_grad = sum_hess = 0;
  }

-  /*! \brief add a bst_gpair to the sum */
-  inline void Add(const bst_gpair& e) {
+  /*! \brief add a GradientPair to the sum */
+  inline void Add(const GradientPair& e) {
    sum_grad += e.GetGrad();
    sum_hess += e.GetHess();
  }
@ -58,7 +58,7 @@ struct HistCutUnit {
  /*! \brief number of cutting point, containing the maximum point */
  uint32_t size;
  // default constructor
-  HistCutUnit() {}
+  HistCutUnit() = default;
  // constructor
  HistCutUnit(const bst_float* cut, uint32_t size)
      : cut(cut), size(size) {}
@ -74,8 +74,8 @@ struct HistCutMatrix {
  std::vector<bst_float> cut;
  /*! \brief Get histogram bound for fid */
  inline HistCutUnit operator[](bst_uint fid) const {
-    return HistCutUnit(dmlc::BeginPtr(cut) + row_ptr[fid],
-                       row_ptr[fid + 1] - row_ptr[fid]);
+    return {dmlc::BeginPtr(cut) + row_ptr[fid],
+                       row_ptr[fid + 1] - row_ptr[fid]};
  }
  // create histogram cut matrix given statistics from data
  // using approximate quantile sketch approach
@ -92,7 +92,7 @@ struct GHistIndexRow {
  const uint32_t* index;
  /*! \brief The size of the histogram */
  size_t size;
-  GHistIndexRow() {}
+  GHistIndexRow() = default;
  GHistIndexRow(const uint32_t* index, size_t size)
      : index(index), size(size) {}
 };
@ -115,7 +115,7 @@ struct GHistIndexMatrix {
  void Init(DMatrix* p_fmat);
  // get i-th row
  inline GHistIndexRow operator[](size_t i) const {
-    return GHistIndexRow(&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]);
+    return {&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]};
  }
  inline void GetFeatureCounts(size_t* counts) const {
    auto nfeature = cut->row_ptr.size() - 1;
@ -141,7 +141,7 @@ struct GHistIndexBlock {

  // get i-th row
  inline GHistIndexRow operator[](size_t i) const {
-    return GHistIndexRow(&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]);
+    return {&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]};
  }
 };

@ -154,24 +154,24 @@ class GHistIndexBlockMatrix {
            const FastHistParam& param);

  inline GHistIndexBlock operator[](size_t i) const {
-    return GHistIndexBlock(blocks[i].row_ptr_begin, blocks[i].index_begin);
+    return {blocks_[i].row_ptr_begin, blocks_[i].index_begin};
  }

  inline size_t GetNumBlock() const {
-    return blocks.size();
+    return blocks_.size();
  }

 private:
-  std::vector<size_t> row_ptr;
-  std::vector<uint32_t> index;
-  const HistCutMatrix* cut;
+  std::vector<size_t> row_ptr_;
+  std::vector<uint32_t> index_;
+  const HistCutMatrix* cut_;
  struct Block {
    const size_t* row_ptr_begin;
    const size_t* row_ptr_end;
    const uint32_t* index_begin;
    const uint32_t* index_end;
  };
-  std::vector<Block> blocks;
+  std::vector<Block> blocks_;
 };

 /*!
@ -186,7 +186,7 @@ struct GHistRow {
  /*! \brief number of entries */
  uint32_t size;

-  GHistRow() {}
+  GHistRow() = default;
  GHistRow(GHistEntry* begin, uint32_t size)
    : begin(begin), size(size) {}
 };
@ -198,15 +198,15 @@ class HistCollection {
 public:
  // access histogram for i-th node
  inline GHistRow operator[](bst_uint nid) const {
-    const uint32_t kMax = std::numeric_limits<uint32_t>::max();
+    constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
    CHECK_NE(row_ptr_[nid], kMax);
-    return GHistRow(const_cast<GHistEntry*>(dmlc::BeginPtr(data_) + row_ptr_[nid]), nbins_);
+    return {const_cast<GHistEntry*>(dmlc::BeginPtr(data_) + row_ptr_[nid]), nbins_};
  }

  // have we computed a histogram for i-th node?
  inline bool RowExists(bst_uint nid) const {
-    const uint32_t kMax = std::numeric_limits<uint32_t>::max();
-    return (nid < row_ptr_.size() && row_ptr_[nid] != kMax);
+    const uint32_t k_max = std::numeric_limits<uint32_t>::max();
+    return (nid < row_ptr_.size() && row_ptr_[nid] != k_max);
  }

  // initialize histogram collection
@ -218,7 +218,7 @@ class HistCollection {

  // create an empty histogram for i-th node
  inline void AddHistRow(bst_uint nid) {
-    const uint32_t kMax = std::numeric_limits<uint32_t>::max();
+    constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
    if (nid >= row_ptr_.size()) {
      row_ptr_.resize(nid + 1, kMax);
    }
@ -250,13 +250,13 @@ class GHistBuilder {
  }

  // construct a histogram via histogram aggregation
-  void BuildHist(const std::vector<bst_gpair>& gpair,
+  void BuildHist(const std::vector<GradientPair>& gpair,
                 const RowSetCollection::Elem row_indices,
                 const GHistIndexMatrix& gmat,
                 const std::vector<bst_uint>& feat_set,
                 GHistRow hist);
  // same, with feature grouping
-  void BuildBlockHist(const std::vector<bst_gpair>& gpair,
+  void BuildBlockHist(const std::vector<GradientPair>& gpair,
                      const RowSetCollection::Elem row_indices,
                      const GHistIndexBlockMatrix& gmatb,
                      const std::vector<bst_uint>& feat_set,
--- a/src/common/host_device_vector.cc
+++ b/src/common/host_device_vector.cc
@ -6,6 +6,8 @@
 // dummy implementation of HostDeviceVector in case CUDA is not used

 #include <xgboost/base.h>
+
+#include <utility>
 #include "./host_device_vector.h"

 namespace xgboost {
@ -13,8 +15,8 @@ namespace xgboost {
 template <typename T>
 struct HostDeviceVectorImpl {
  explicit HostDeviceVectorImpl(size_t size, T v) : data_h_(size, v) {}
-  explicit HostDeviceVectorImpl(std::initializer_list<T> init) : data_h_(init) {}
-  explicit HostDeviceVectorImpl(const std::vector<T>& init) : data_h_(init) {}
+  HostDeviceVectorImpl(std::initializer_list<T> init) : data_h_(init) {}
+  explicit HostDeviceVectorImpl(std::vector<T>  init) : data_h_(std::move(init)) {}
  std::vector<T> data_h_;
 };

@ -43,25 +45,25 @@ HostDeviceVector<T>::~HostDeviceVector() {
 }

 template <typename T>
-size_t HostDeviceVector<T>::size() const { return impl_->data_h_.size(); }
+size_t HostDeviceVector<T>::Size() const { return impl_->data_h_.size(); }

 template <typename T>
-int HostDeviceVector<T>::device() const { return -1; }
+int HostDeviceVector<T>::DeviceIdx() const { return -1; }

 template <typename T>
-T* HostDeviceVector<T>::ptr_d(int device) { return nullptr; }
+T* HostDeviceVector<T>::DevicePointer(int device) { return nullptr; }

 template <typename T>
-std::vector<T>& HostDeviceVector<T>::data_h() { return impl_->data_h_; }
+std::vector<T>& HostDeviceVector<T>::HostVector() { return impl_->data_h_; }

 template <typename T>
-void HostDeviceVector<T>::resize(size_t new_size, T v, int new_device) {
+void HostDeviceVector<T>::Resize(size_t new_size, T v, int new_device) {
  impl_->data_h_.resize(new_size, v);
 }

 // explicit instantiations are required, as HostDeviceVector isn't header-only
 template class HostDeviceVector<bst_float>;
-template class HostDeviceVector<bst_gpair>;
+template class HostDeviceVector<GradientPair>;

 }  // namespace xgboost

--- a/src/common/host_device_vector.cu
+++ b/src/common/host_device_vector.cu
@ -35,27 +35,27 @@ struct HostDeviceVectorImpl {
  void operator=(const HostDeviceVectorImpl<T>&) = delete;
  void operator=(HostDeviceVectorImpl<T>&&) = delete;

-  size_t size() const { return on_d_ ? data_d_.size() : data_h_.size(); }
+  size_t Size() const { return on_d_ ? data_d_.size() : data_h_.size(); }

-  int device() const { return device_; }
+  int DeviceIdx() const { return device_; }

-  T* ptr_d(int device) {
-    lazy_sync_device(device);
+  T* DevicePointer(int device) {
+    LazySyncDevice(device);
    return data_d_.data().get();
  }
-  thrust::device_ptr<T> tbegin(int device) {
-    return thrust::device_ptr<T>(ptr_d(device));
+  thrust::device_ptr<T> tbegin(int device) {  // NOLINT
+    return thrust::device_ptr<T>(DevicePointer(device));
  }
-  thrust::device_ptr<T> tend(int device) {
+  thrust::device_ptr<T> tend(int device) {  // NOLINT
    auto begin = tbegin(device);
-    return begin + size();
+    return begin + Size();
  }
-  std::vector<T>& data_h() {
-    lazy_sync_host();
+  std::vector<T>& HostVector() {
+    LazySyncHost();
    return data_h_;
  }
-  void resize(size_t new_size, T v, int new_device) {
-    if (new_size == this->size() && new_device == device_)
+  void Resize(size_t new_size, T v, int new_device) {
+    if (new_size == this->Size() && new_device == device_)
      return;
    if (new_device != -1)
      device_ = new_device;
@ -70,26 +70,26 @@ struct HostDeviceVectorImpl {
    }
  }

-  void lazy_sync_host() {
+  void LazySyncHost() {
    if (!on_d_)
      return;
-    if (data_h_.size() != this->size())
-      data_h_.resize(this->size());
+    if (data_h_.size() != this->Size())
+      data_h_.resize(this->Size());
    dh::safe_cuda(cudaSetDevice(device_));
    thrust::copy(data_d_.begin(), data_d_.end(), data_h_.begin());
    on_d_ = false;
  }

-  void lazy_sync_device(int device) {
+  void LazySyncDevice(int device) {
    if (on_d_)
      return;
    if (device != device_) {
      CHECK_EQ(device_, -1);
      device_ = device;
    }
-    if (data_d_.size() != this->size()) {
+    if (data_d_.size() != this->Size()) {
      dh::safe_cuda(cudaSetDevice(device_));
-      data_d_.resize(this->size());
+      data_d_.resize(this->Size());
    }
    dh::safe_cuda(cudaSetDevice(device_));
    thrust::copy(data_h_.begin(), data_h_.end(), data_d_.begin());
@ -128,34 +128,34 @@ HostDeviceVector<T>::~HostDeviceVector() {
 }

 template <typename T>
-size_t HostDeviceVector<T>::size() const { return impl_->size(); }
+size_t HostDeviceVector<T>::Size() const { return impl_->Size(); }

 template <typename T>
-int HostDeviceVector<T>::device() const { return impl_->device(); }
+int HostDeviceVector<T>::DeviceIdx() const { return impl_->DeviceIdx(); }

 template <typename T>
-T* HostDeviceVector<T>::ptr_d(int device) { return impl_->ptr_d(device); }
+T* HostDeviceVector<T>::DevicePointer(int device) { return impl_->DevicePointer(device); }

 template <typename T>
-thrust::device_ptr<T> HostDeviceVector<T>::tbegin(int device) {
+thrust::device_ptr<T> HostDeviceVector<T>::tbegin(int device) {  // NOLINT
  return impl_->tbegin(device);
 }

 template <typename T>
-thrust::device_ptr<T> HostDeviceVector<T>::tend(int device) {
+thrust::device_ptr<T> HostDeviceVector<T>::tend(int device) {  // NOLINT
  return impl_->tend(device);
 }

 template <typename T>
-std::vector<T>& HostDeviceVector<T>::data_h() { return impl_->data_h(); }
+std::vector<T>& HostDeviceVector<T>::HostVector() { return impl_->HostVector(); }

 template <typename T>
-void HostDeviceVector<T>::resize(size_t new_size, T v, int new_device) {
-  impl_->resize(new_size, v, new_device);
+void HostDeviceVector<T>::Resize(size_t new_size, T v, int new_device) {
+  impl_->Resize(new_size, v, new_device);
 }

 // explicit instantiations are required, as HostDeviceVector isn't header-only
 template class HostDeviceVector<bst_float>;
-template class HostDeviceVector<bst_gpair>;
+template class HostDeviceVector<GradientPair>;

 }  // namespace xgboost
--- a/src/common/host_device_vector.h
+++ b/src/common/host_device_vector.h
@ -70,10 +70,10 @@ class HostDeviceVector {
  HostDeviceVector(HostDeviceVector<T>&&) = delete;
  void operator=(const HostDeviceVector<T>&) = delete;
  void operator=(HostDeviceVector<T>&&) = delete;
-  size_t size() const;
-  int device() const;
-  T* ptr_d(int device);
-  T* ptr_h() { return data_h().data(); }
+  size_t Size() const;
+  int DeviceIdx() const;
+  T* DevicePointer(int device);
+  T* HostPointer() { return HostVector().data(); }

  // only define functions returning device_ptr
  // if HostDeviceVector.h is included from a .cu file
@ -82,10 +82,10 @@ class HostDeviceVector {
  thrust::device_ptr<T> tend(int device);
 #endif

-  std::vector<T>& data_h();
+  std::vector<T>& HostVector();

  // passing in new_device == -1 keeps the device as is
-  void resize(size_t new_size, T v = T(), int new_device = -1);
+  void Resize(size_t new_size, T v = T(), int new_device = -1);

 private:
  HostDeviceVectorImpl<T>* impl_;
--- a/src/common/io.h
+++ b/src/common/io.h
@ -15,8 +15,8 @@

 namespace xgboost {
 namespace common {
-typedef rabit::utils::MemoryFixSizeBuffer MemoryFixSizeBuffer;
-typedef rabit::utils::MemoryBufferStream MemoryBufferStream;
+using MemoryFixSizeBuffer = rabit::utils::MemoryFixSizeBuffer;
+using MemoryBufferStream = rabit::utils::MemoryBufferStream;

 /*!
 * \brief Input stream that support additional PeekRead
--- a/src/common/math.h
+++ b/src/common/math.h
@ -39,12 +39,12 @@ inline void Softmax(std::vector<float>* p_rec) {
    wmax = std::max(rec[i], wmax);
  }
  double wsum = 0.0f;
-  for (size_t i = 0; i < rec.size(); ++i) {
-    rec[i] = std::exp(rec[i] - wmax);
-    wsum += rec[i];
+  for (float & elem : rec) {
+    elem = std::exp(elem - wmax);
+    wsum += elem;
  }
-  for (size_t i = 0; i < rec.size(); ++i) {
-    rec[i] /= static_cast<float>(wsum);
+  for (float & elem : rec) {
+    elem /= static_cast<float>(wsum);
  }
 }

--- a/src/common/quantile.h
+++ b/src/common/quantile.h
@ -35,7 +35,7 @@ struct WQSummary {
    /*! \brief the value of data */
    DType value;
    // constructor
-    Entry() {}
+    Entry() = default;
    // constructor
    Entry(RType rmin, RType rmax, RType wmin, DType value)
        : rmin(rmin), rmax(rmax), wmin(wmin), value(value) {}
@ -48,11 +48,11 @@ struct WQSummary {
      CHECK(rmax- rmin - wmin > -eps) <<  "relation constraint: min/max";
    }
    /*! \return rmin estimation for v strictly bigger than value */
-    inline RType rmin_next() const {
+    inline RType RMinNext() const {
      return rmin + wmin;
    }
    /*! \return rmax estimation for v strictly smaller than value */
-    inline RType rmax_prev() const {
+    inline RType RMaxPrev() const {
      return rmax - wmin;
    }
  };
@ -65,7 +65,7 @@ struct WQSummary {
      // weight of instance
      RType weight;
      // default constructor
-      QEntry() {}
+      QEntry() = default;
      // constructor
      QEntry(DType value, RType weight)
          : value(value), weight(weight) {}
@ -116,7 +116,7 @@ struct WQSummary {
  inline RType MaxError() const {
    RType res = data[0].rmax - data[0].rmin - data[0].wmin;
    for (size_t i = 1; i < size; ++i) {
-      res = std::max(data[i].rmax_prev() - data[i - 1].rmin_next(), res);
+      res = std::max(data[i].RMaxPrev() - data[i - 1].RMinNext(), res);
      res = std::max(data[i].rmax - data[i].rmin - data[i].wmin, res);
    }
    return res;
@ -140,8 +140,8 @@ struct WQSummary {
      if (istart == 0) {
        return Entry(0.0f, 0.0f, 0.0f, qvalue);
      } else {
-        return Entry(data[istart - 1].rmin_next(),
-                     data[istart].rmax_prev(),
+        return Entry(data[istart - 1].RMinNext(),
+                     data[istart].RMaxPrev(),
                     0.0f, qvalue);
      }
    }
@ -197,7 +197,7 @@ struct WQSummary {
      while (i < src.size - 1
             && dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i;
      CHECK(i != src.size - 1);
-      if (dx2 < src.data[i].rmin_next() + src.data[i + 1].rmax_prev()) {
+      if (dx2 < src.data[i].RMinNext() + src.data[i + 1].RMaxPrev()) {
        if (i != lastidx) {
          data[size++] = src.data[i]; lastidx = i;
        }
@ -236,20 +236,20 @@ struct WQSummary {
        *dst = Entry(a->rmin + b->rmin,
                     a->rmax + b->rmax,
                     a->wmin + b->wmin, a->value);
-        aprev_rmin = a->rmin_next();
-        bprev_rmin = b->rmin_next();
+        aprev_rmin = a->RMinNext();
+        bprev_rmin = b->RMinNext();
        ++dst; ++a; ++b;
      } else if (a->value < b->value) {
        *dst = Entry(a->rmin + bprev_rmin,
-                     a->rmax + b->rmax_prev(),
+                     a->rmax + b->RMaxPrev(),
                     a->wmin, a->value);
-        aprev_rmin = a->rmin_next();
+        aprev_rmin = a->RMinNext();
        ++dst; ++a;
      } else {
        *dst = Entry(b->rmin + aprev_rmin,
-                     b->rmax + a->rmax_prev(),
+                     b->rmax + a->RMaxPrev(),
                     b->wmin, b->value);
-        bprev_rmin = b->rmin_next();
+        bprev_rmin = b->RMinNext();
        ++dst; ++b;
      }
    }
@ -307,7 +307,7 @@ struct WQSummary {
        data[i].rmax = prev_rmax;
        *err_maxgap = std::max(*err_maxgap, prev_rmax - data[i].rmax);
      }
-      RType rmin_next = data[i].rmin_next();
+      RType rmin_next = data[i].RMinNext();
      if (data[i].rmax < rmin_next) {
        data[i].rmax = rmin_next;
        *err_wgap = std::max(*err_wgap, data[i].rmax - rmin_next);
@ -334,13 +334,13 @@ struct WQSummary {
 template<typename DType, typename RType>
 struct WXQSummary : public WQSummary<DType, RType> {
  // redefine entry type
-  typedef typename WQSummary<DType, RType>::Entry Entry;
+  using Entry = typename WQSummary<DType, RType>::Entry;
  // constructor
  WXQSummary(Entry *data, size_t size)
      : WQSummary<DType, RType>(data, size) {}
  // check if the block is large chunk
  inline static bool CheckLarge(const Entry &e, RType chunk) {
-    return  e.rmin_next() > e.rmax_prev() + chunk;
+    return  e.RMinNext() > e.RMaxPrev() + chunk;
  }
  // set prune
  inline void SetPrune(const WQSummary<DType, RType> &src, size_t maxsize) {
@ -377,13 +377,13 @@ struct WXQSummary : public WQSummary<DType, RType> {
        if (CheckLarge(src.data[i], chunk)) {
          if (bid != i - 1) {
            // accumulate the range of the rest points
-            mrange += src.data[i].rmax_prev() - src.data[bid].rmin_next();
+            mrange += src.data[i].RMaxPrev() - src.data[bid].RMinNext();
          }
          bid = i; ++nbig;
        }
      }
      if (bid != src.size - 2) {
-        mrange += src.data[src.size-1].rmax_prev() - src.data[bid].rmin_next();
+        mrange += src.data[src.size-1].RMaxPrev() - src.data[bid].RMinNext();
      }
    }
    // assert: there cannot be more than n big data points
@ -405,14 +405,14 @@ struct WXQSummary : public WQSummary<DType, RType> {
      if (end == src.size - 1 || CheckLarge(src.data[end], chunk)) {
        if (bid != end - 1) {
          size_t i = bid;
-          RType maxdx2 = src.data[end].rmax_prev() * 2;
+          RType maxdx2 = src.data[end].RMaxPrev() * 2;
          for (; k < n; ++k) {
            RType dx2 =  2 * ((k * mrange) / n + begin);
            if (dx2 >= maxdx2) break;
            while (i < end &&
                   dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i;
            if (i == end) break;
-            if (dx2 < src.data[i].rmin_next() + src.data[i + 1].rmax_prev()) {
+            if (dx2 < src.data[i].RMinNext() + src.data[i + 1].RMaxPrev()) {
              if (i != lastidx) {
                this->data[this->size++] = src.data[i]; lastidx = i;
              }
@ -429,7 +429,7 @@ struct WXQSummary : public WQSummary<DType, RType> {
        }
        bid = end;
        // shift base by the gap
-        begin += src.data[bid].rmin_next() - src.data[bid].rmax_prev();
+        begin += src.data[bid].RMinNext() - src.data[bid].RMaxPrev();
      }
    }
  }
@ -448,7 +448,7 @@ struct GKSummary {
    /*! \brief the value of data */
    DType value;
    // constructor
-    Entry() {}
+    Entry() = default;
    // constructor
    Entry(RType rmin, RType rmax, DType value)
        : rmin(rmin), rmax(rmax), value(value) {}
@ -591,17 +591,17 @@ template<typename DType, typename RType, class TSummary>
 class QuantileSketchTemplate {
 public:
  /*! \brief type of summary type */
-  typedef TSummary Summary;
+  using Summary = TSummary;
  /*! \brief the entry type */
-  typedef typename Summary::Entry Entry;
+  using Entry = typename Summary::Entry;
  /*! \brief same as summary, but use STL to backup the space */
  struct SummaryContainer : public Summary {
    std::vector<Entry> space;
-    SummaryContainer(const SummaryContainer &src) : Summary(NULL, src.size) {
+    SummaryContainer(const SummaryContainer &src) : Summary(nullptr, src.size) {
      this->space = src.space;
      this->data = dmlc::BeginPtr(this->space);
    }
-    SummaryContainer() : Summary(NULL, 0) {
+    SummaryContainer() : Summary(nullptr, 0) {
    }
    /*! \brief reserve space for summary */
    inline void Reserve(size_t size) {
@ -775,7 +775,7 @@ class QuantileSketchTemplate {
  inline void InitLevel(size_t nlevel) {
    if (level.size() >= nlevel) return;
    data.resize(limit_size * nlevel);
-    level.resize(nlevel, Summary(NULL, 0));
+    level.resize(nlevel, Summary(nullptr, 0));
    for (size_t l = 0; l < level.size(); ++l) {
      level[l].data = dmlc::BeginPtr(data) + l * limit_size;
    }
--- a/src/common/random.h
+++ b/src/common/random.h
@ -15,7 +15,7 @@ namespace common {
 /*!
 * \brief Define mt19937 as default type Random Engine.
 */
-typedef std::mt19937 RandomEngine;
+using RandomEngine = std::mt19937;

 #if XGBOOST_CUSTOMIZE_GLOBAL_PRNG
 /*!
@ -56,7 +56,7 @@ typedef CustomGlobalRandomEngine GlobalRandomEngine;
 /*!
 * \brief global random engine
 */
-typedef RandomEngine GlobalRandomEngine;
+using GlobalRandomEngine = RandomEngine;
 #endif

 /*!
--- a/src/common/row_set.h
+++ b/src/common/row_set.h
@ -21,18 +21,18 @@ class RowSetCollection {
   *  rows (instances) associated with a particular node in a decision
   *  tree. */
  struct Elem {
-    const size_t* begin;
-    const size_t* end;
-    int node_id;
+    const size_t* begin{nullptr};
+    const size_t* end{nullptr};
+    int node_id{-1};
      // id of node associated with this instance set; -1 means uninitialized
-    Elem(void)
-        : begin(nullptr), end(nullptr), node_id(-1) {}
+    Elem()
+         = default;
    Elem(const size_t* begin,
         const size_t* end,
         int node_id)
        : begin(begin), end(end), node_id(node_id) {}

-    inline size_t size() const {
+    inline size_t Size() const {
      return end - begin;
    }
  };
@ -42,11 +42,11 @@ class RowSetCollection {
    std::vector<size_t> right;
  };

-  inline std::vector<Elem>::const_iterator begin() const {
+  inline std::vector<Elem>::const_iterator begin() const {  // NOLINT
    return elem_of_each_node_.begin();
  }

-  inline std::vector<Elem>::const_iterator end() const {
+  inline std::vector<Elem>::const_iterator end() const {  // NOLINT
    return elem_of_each_node_.end();
  }

@ -88,7 +88,7 @@ class RowSetCollection {
                       unsigned left_node_id,
                       unsigned right_node_id) {
    const Elem e = elem_of_each_node_[node_id];
-    const bst_omp_uint nthread = static_cast<bst_omp_uint>(row_split_tloc.size());
+    const auto nthread = static_cast<bst_omp_uint>(row_split_tloc.size());
    CHECK(e.begin != nullptr);
    size_t* all_begin = dmlc::BeginPtr(row_indices_);
    size_t* begin = all_begin + (e.begin - all_begin);
--- a/src/common/timer.h
+++ b/src/common/timer.h
@ -12,10 +12,10 @@
 namespace xgboost {
 namespace common {
 struct Timer {
-  typedef std::chrono::high_resolution_clock ClockT;
-  typedef std::chrono::high_resolution_clock::time_point TimePointT;
-  typedef std::chrono::high_resolution_clock::duration DurationT;
-  typedef std::chrono::duration<double> SecondsT;
+  using ClockT = std::chrono::high_resolution_clock;
+  using TimePointT = std::chrono::high_resolution_clock::time_point;
+  using DurationT = std::chrono::high_resolution_clock::duration;
+  using SecondsT = std::chrono::duration<double>;

  TimePointT start;
  DurationT elapsed;
@ -70,7 +70,7 @@ struct Monitor {
    if (debug_verbose) {
 #ifdef __CUDACC__
 #include "device_helpers.cuh"
-      dh::synchronize_n_devices(dList.size(), dList);
+      dh::SynchronizeNDevices(dList.size(), dList);
 #endif
    }
    timer_map[name].Start();
@ -80,7 +80,7 @@ struct Monitor {
    if (debug_verbose) {
 #ifdef __CUDACC__
 #include "device_helpers.cuh"
-      dh::synchronize_n_devices(dList.size(), dList);
+      dh::SynchronizeNDevices(dList.size(), dList);
 #endif
    }
    timer_map[name].Stop();
--- a/src/data/data.cc
+++ b/src/data/data.cc
@ -24,51 +24,51 @@ DMLC_REGISTRY_ENABLE(::xgboost::data::SparsePageFormatReg);
 namespace xgboost {
 // implementation of inline functions
 void MetaInfo::Clear() {
-  num_row = num_col = num_nonzero = 0;
-  labels.clear();
-  root_index.clear();
-  group_ptr.clear();
-  weights.clear();
-  base_margin.clear();
+  num_row_ = num_col_ = num_nonzero_ = 0;
+  labels_.clear();
+  root_index_.clear();
+  group_ptr_.clear();
+  weights_.clear();
+  base_margin_.clear();
 }

 void MetaInfo::SaveBinary(dmlc::Stream *fo) const {
  int32_t version = kVersion;
  fo->Write(&version, sizeof(version));
-  fo->Write(&num_row, sizeof(num_row));
-  fo->Write(&num_col, sizeof(num_col));
-  fo->Write(&num_nonzero, sizeof(num_nonzero));
-  fo->Write(labels);
-  fo->Write(group_ptr);
-  fo->Write(weights);
-  fo->Write(root_index);
-  fo->Write(base_margin);
+  fo->Write(&num_row_, sizeof(num_row_));
+  fo->Write(&num_col_, sizeof(num_col_));
+  fo->Write(&num_nonzero_, sizeof(num_nonzero_));
+  fo->Write(labels_);
+  fo->Write(group_ptr_);
+  fo->Write(weights_);
+  fo->Write(root_index_);
+  fo->Write(base_margin_);
 }

 void MetaInfo::LoadBinary(dmlc::Stream *fi) {
  int version;
  CHECK(fi->Read(&version, sizeof(version)) == sizeof(version)) << "MetaInfo: invalid version";
  CHECK_EQ(version, kVersion) << "MetaInfo: invalid format";
-  CHECK(fi->Read(&num_row, sizeof(num_row)) == sizeof(num_row)) << "MetaInfo: invalid format";
-  CHECK(fi->Read(&num_col, sizeof(num_col)) == sizeof(num_col)) << "MetaInfo: invalid format";
-  CHECK(fi->Read(&num_nonzero, sizeof(num_nonzero)) == sizeof(num_nonzero))
+  CHECK(fi->Read(&num_row_, sizeof(num_row_)) == sizeof(num_row_)) << "MetaInfo: invalid format";
+  CHECK(fi->Read(&num_col_, sizeof(num_col_)) == sizeof(num_col_)) << "MetaInfo: invalid format";
+  CHECK(fi->Read(&num_nonzero_, sizeof(num_nonzero_)) == sizeof(num_nonzero_))
      << "MetaInfo: invalid format";
-  CHECK(fi->Read(&labels)) <<  "MetaInfo: invalid format";
-  CHECK(fi->Read(&group_ptr)) << "MetaInfo: invalid format";
-  CHECK(fi->Read(&weights)) << "MetaInfo: invalid format";
-  CHECK(fi->Read(&root_index)) << "MetaInfo: invalid format";
-  CHECK(fi->Read(&base_margin)) << "MetaInfo: invalid format";
+  CHECK(fi->Read(&labels_)) <<  "MetaInfo: invalid format";
+  CHECK(fi->Read(&group_ptr_)) << "MetaInfo: invalid format";
+  CHECK(fi->Read(&weights_)) << "MetaInfo: invalid format";
+  CHECK(fi->Read(&root_index_)) << "MetaInfo: invalid format";
+  CHECK(fi->Read(&base_margin_)) << "MetaInfo: invalid format";
 }

 // try to load group information from file, if exists
 inline bool MetaTryLoadGroup(const std::string& fname,
                             std::vector<unsigned>* group) {
  std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r", true));
-  if (fi.get() == nullptr) return false;
+  if (fi == nullptr) return false;
  dmlc::istream is(fi.get());
  group->clear();
  group->push_back(0);
-  unsigned nline;
+  unsigned nline = 0;
  while (is >> nline) {
    group->push_back(group->back() + nline);
  }
@ -79,7 +79,7 @@ inline bool MetaTryLoadGroup(const std::string& fname,
 inline bool MetaTryLoadFloatInfo(const std::string& fname,
                                 std::vector<bst_float>* data) {
  std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r", true));
-  if (fi.get() == nullptr) return false;
+  if (fi == nullptr) return false;
  dmlc::istream is(fi.get());
  data->clear();
  bst_float value;
@ -93,16 +93,16 @@ inline bool MetaTryLoadFloatInfo(const std::string& fname,
 #define DISPATCH_CONST_PTR(dtype, old_ptr, cast_ptr, proc)              \
  switch (dtype) {                                                      \
    case kFloat32: {                                                    \
-      const float* cast_ptr = reinterpret_cast<const float*>(old_ptr); proc; break; \
+      auto cast_ptr = reinterpret_cast<const float*>(old_ptr); proc; break; \
    }                                                                   \
    case kDouble: {                                                     \
-      const double* cast_ptr = reinterpret_cast<const double*>(old_ptr); proc; break; \
+      auto cast_ptr = reinterpret_cast<const double*>(old_ptr); proc; break; \
    }                                                                   \
    case kUInt32: {                                                     \
-      const uint32_t* cast_ptr = reinterpret_cast<const uint32_t*>(old_ptr); proc; break; \
+      auto cast_ptr = reinterpret_cast<const uint32_t*>(old_ptr); proc; break; \
    }                                                                   \
    case kUInt64: {                                                     \
-      const uint64_t* cast_ptr = reinterpret_cast<const uint64_t*>(old_ptr); proc; break; \
+      auto cast_ptr = reinterpret_cast<const uint64_t*>(old_ptr); proc; break; \
    }                                                                   \
    default: LOG(FATAL) << "Unknown data type" << dtype;                \
  }                                                                     \
@ -110,28 +110,28 @@ inline bool MetaTryLoadFloatInfo(const std::string& fname,

 void MetaInfo::SetInfo(const char* key, const void* dptr, DataType dtype, size_t num) {
  if (!std::strcmp(key, "root_index")) {
-    root_index.resize(num);
+    root_index_.resize(num);
    DISPATCH_CONST_PTR(dtype, dptr, cast_dptr,
-                       std::copy(cast_dptr, cast_dptr + num, root_index.begin()));
+                       std::copy(cast_dptr, cast_dptr + num, root_index_.begin()));
  } else if (!std::strcmp(key, "label")) {
-    labels.resize(num);
+    labels_.resize(num);
    DISPATCH_CONST_PTR(dtype, dptr, cast_dptr,
-                       std::copy(cast_dptr, cast_dptr + num, labels.begin()));
+                       std::copy(cast_dptr, cast_dptr + num, labels_.begin()));
  } else if (!std::strcmp(key, "weight")) {
-    weights.resize(num);
+    weights_.resize(num);
    DISPATCH_CONST_PTR(dtype, dptr, cast_dptr,
-                       std::copy(cast_dptr, cast_dptr + num, weights.begin()));
+                       std::copy(cast_dptr, cast_dptr + num, weights_.begin()));
  } else if (!std::strcmp(key, "base_margin")) {
-    base_margin.resize(num);
+    base_margin_.resize(num);
    DISPATCH_CONST_PTR(dtype, dptr, cast_dptr,
-                       std::copy(cast_dptr, cast_dptr + num, base_margin.begin()));
+                       std::copy(cast_dptr, cast_dptr + num, base_margin_.begin()));
  } else if (!std::strcmp(key, "group")) {
-    group_ptr.resize(num + 1);
+    group_ptr_.resize(num + 1);
    DISPATCH_CONST_PTR(dtype, dptr, cast_dptr,
-                       std::copy(cast_dptr, cast_dptr + num, group_ptr.begin() + 1));
-    group_ptr[0] = 0;
-    for (size_t i = 1; i < group_ptr.size(); ++i) {
-      group_ptr[i] = group_ptr[i - 1] + group_ptr[i];
+                       std::copy(cast_dptr, cast_dptr + num, group_ptr_.begin() + 1));
+    group_ptr_[0] = 0;
+    for (size_t i = 1; i < group_ptr_.size(); ++i) {
+      group_ptr_[i] = group_ptr_[i - 1] + group_ptr_[i];
    }
  }
 }
@ -163,7 +163,9 @@ DMatrix* DMatrix::Load(const std::string& uri,
             << "-" <<  rabit::GetWorldSize()
             << cache_shards[i].substr(pos, cache_shards[i].length());
        }
-        if (i + 1 != cache_shards.size()) os << ':';
+        if (i + 1 != cache_shards.size()) {
+          os << ':';
+        }
      }
      cache_file = os.str();
    }
@ -187,7 +189,7 @@ DMatrix* DMatrix::Load(const std::string& uri,
  if (file_format == "auto" && npart == 1) {
    int magic;
    std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r", true));
-    if (fi.get() != nullptr) {
+    if (fi != nullptr) {
      common::PeekableInStream is(fi.get());
      if (is.PeekRead(&magic, sizeof(magic)) == sizeof(magic) &&
          magic == data::SimpleCSRSource::kMagic) {
@ -195,8 +197,8 @@ DMatrix* DMatrix::Load(const std::string& uri,
        source->LoadBinary(&is);
        DMatrix* dmat = DMatrix::Create(std::move(source), cache_file);
        if (!silent) {
-          LOG(CONSOLE) << dmat->info().num_row << 'x' << dmat->info().num_col << " matrix with "
-                       << dmat->info().num_nonzero << " entries loaded from " << uri;
+          LOG(CONSOLE) << dmat->Info().num_row_ << 'x' << dmat->Info().num_col_ << " matrix with "
+                       << dmat->Info().num_nonzero_ << " entries loaded from " << uri;
        }
        return dmat;
      }
@ -207,26 +209,26 @@ DMatrix* DMatrix::Load(const std::string& uri,
      dmlc::Parser<uint32_t>::Create(fname.c_str(), partid, npart, file_format.c_str()));
  DMatrix* dmat = DMatrix::Create(parser.get(), cache_file);
  if (!silent) {
-    LOG(CONSOLE) << dmat->info().num_row << 'x' << dmat->info().num_col << " matrix with "
-                 << dmat->info().num_nonzero << " entries loaded from " << uri;
+    LOG(CONSOLE) << dmat->Info().num_row_ << 'x' << dmat->Info().num_col_ << " matrix with "
+                 << dmat->Info().num_nonzero_ << " entries loaded from " << uri;
  }
  /* sync up number of features after matrix loaded.
   * partitioned data will fail the train/val validation check 
   * since partitioned data not knowing the real number of features. */
-  rabit::Allreduce<rabit::op::Max>(&dmat->info().num_col, 1);
+  rabit::Allreduce<rabit::op::Max>(&dmat->Info().num_col_, 1);
  // backward compatiblity code.
  if (!load_row_split) {
-    MetaInfo& info = dmat->info();
-    if (MetaTryLoadGroup(fname + ".group", &info.group_ptr) && !silent) {
-      LOG(CONSOLE) << info.group_ptr.size() - 1
+    MetaInfo& info = dmat->Info();
+    if (MetaTryLoadGroup(fname + ".group", &info.group_ptr_) && !silent) {
+      LOG(CONSOLE) << info.group_ptr_.size() - 1
                   << " groups are loaded from " << fname << ".group";
    }
-    if (MetaTryLoadFloatInfo(fname + ".base_margin", &info.base_margin) && !silent) {
-      LOG(CONSOLE) << info.base_margin.size()
+    if (MetaTryLoadFloatInfo(fname + ".base_margin", &info.base_margin_) && !silent) {
+      LOG(CONSOLE) << info.base_margin_.size()
                   << " base_margin are loaded from " << fname << ".base_margin";
    }
-    if (MetaTryLoadFloatInfo(fname + ".weight", &info.weights) && !silent) {
-      LOG(CONSOLE) << info.weights.size()
+    if (MetaTryLoadFloatInfo(fname + ".weight", &info.weights_) && !silent) {
+      LOG(CONSOLE) << info.weights_.size()
                   << " weights are loaded from " << fname << ".weight";
    }
  }
--- a/src/data/simple_csr_source.cc
+++ b/src/data/simple_csr_source.cc
@ -18,7 +18,7 @@ void SimpleCSRSource::Clear() {

 void SimpleCSRSource::CopyFrom(DMatrix* src) {
  this->Clear();
-  this->info = src->info();
+  this->info = src->Info();
  dmlc::DataIter<RowBatch>* iter = src->RowIterator();
  iter->BeforeFirst();
  while (iter->Next()) {
@ -36,10 +36,10 @@ void SimpleCSRSource::CopyFrom(dmlc::Parser<uint32_t>* parser) {
  while (parser->Next()) {
    const dmlc::RowBlock<uint32_t>& batch = parser->Value();
    if (batch.label != nullptr) {
-      info.labels.insert(info.labels.end(), batch.label, batch.label + batch.size);
+      info.labels_.insert(info.labels_.end(), batch.label, batch.label + batch.size);
    }
    if (batch.weight != nullptr) {
-      info.weights.insert(info.weights.end(), batch.weight, batch.weight + batch.size);
+      info.weights_.insert(info.weights_.end(), batch.weight, batch.weight + batch.size);
    }
    // Remove the assertion on batch.index, which can be null in the case that the data in this
    // batch is entirely sparse. Although it's true that this indicates a likely issue with the
@ -48,13 +48,13 @@ void SimpleCSRSource::CopyFrom(dmlc::Parser<uint32_t>* parser) {
    // CHECK(batch.index != nullptr);

    // update information
-    this->info.num_row += batch.size;
+    this->info.num_row_ += batch.size;
    // copy the data over
    for (size_t i = batch.offset[0]; i < batch.offset[batch.size]; ++i) {
      uint32_t index = batch.index[i];
      bst_float fvalue = batch.value == nullptr ? 1.0f : batch.value[i];
-      row_data_.push_back(SparseBatch::Entry(index, fvalue));
-      this->info.num_col = std::max(this->info.num_col,
+      row_data_.emplace_back(index, fvalue);
+      this->info.num_col_ = std::max(this->info.num_col_,
                                    static_cast<uint64_t>(index + 1));
    }
    size_t top = row_ptr_.size();
@ -62,7 +62,7 @@ void SimpleCSRSource::CopyFrom(dmlc::Parser<uint32_t>* parser) {
      row_ptr_.push_back(row_ptr_[top - 1] + batch.offset[i + 1] - batch.offset[0]);
    }
  }
-  this->info.num_nonzero = static_cast<uint64_t>(row_data_.size());
+  this->info.num_nonzero_ = static_cast<uint64_t>(row_data_.size());
 }

 void SimpleCSRSource::LoadBinary(dmlc::Stream* fi) {
--- a/src/data/simple_csr_source.h
+++ b/src/data/simple_csr_source.h
@ -35,9 +35,9 @@ class SimpleCSRSource : public DataSource {
  std::vector<RowBatch::Entry> row_data_;
  // functions
  /*! \brief default constructor */
-  SimpleCSRSource() : row_ptr_(1, 0), at_first_(true) {}
+  SimpleCSRSource() : row_ptr_(1, 0) {}
  /*! \brief destructor */
-  virtual ~SimpleCSRSource() {}
+  ~SimpleCSRSource() override = default;
  /*! \brief clear the data structure */
  void Clear();
  /*!
@ -72,7 +72,7 @@ class SimpleCSRSource : public DataSource {

 private:
  /*! \brief internal variable, used to support iterator interface */
-  bool at_first_;
+  bool at_first_{true};
  /*! \brief */
  RowBatch batch_;
 };
--- a/src/data/simple_dmatrix.cc
+++ b/src/data/simple_dmatrix.cc
@ -20,7 +20,7 @@ bool SimpleDMatrix::ColBatchIter::Next() {
  data_ptr_ += 1;
  SparsePage* pcol = cpages_[data_ptr_ - 1].get();
  batch_.size = col_index_.size();
-  col_data_.resize(col_index_.size(), SparseBatch::Inst(NULL, 0));
+  col_data_.resize(col_index_.size(), SparseBatch::Inst(nullptr, 0));
  for (size_t i = 0; i < col_data_.size(); ++i) {
    const bst_uint ridx = col_index_[i];
    col_data_[i] = SparseBatch::Inst
@ -33,7 +33,7 @@ bool SimpleDMatrix::ColBatchIter::Next() {
 }

 dmlc::DataIter<ColBatch>* SimpleDMatrix::ColIterator() {
-  size_t ncol = this->info().num_col;
+  size_t ncol = this->Info().num_col_;
  col_iter_.col_index_.resize(ncol);
  for (size_t i = 0; i < ncol; ++i) {
    col_iter_.col_index_[i] = static_cast<bst_uint>(i);
@ -43,10 +43,10 @@ dmlc::DataIter<ColBatch>* SimpleDMatrix::ColIterator() {
 }

 dmlc::DataIter<ColBatch>* SimpleDMatrix::ColIterator(const std::vector<bst_uint>&fset) {
-  size_t ncol = this->info().num_col;
+  size_t ncol = this->Info().num_col_;
  col_iter_.col_index_.resize(0);
-  for (size_t i = 0; i < fset.size(); ++i) {
-    if (fset[i] < ncol) col_iter_.col_index_.push_back(fset[i]);
+  for (auto fidx : fset) {
+    if (fidx < ncol) col_iter_.col_index_.push_back(fidx);
  }
  col_iter_.BeforeFirst();
  return &col_iter_;
@ -56,9 +56,9 @@ void SimpleDMatrix::InitColAccess(const std::vector<bool> &enabled,
                                  float pkeep,
                                  size_t max_row_perbatch, bool sorted) {
  if (this->HaveColAccess(sorted)) return;
-  col_iter_.sorted = sorted;
+  col_iter_.sorted_ = sorted;
  col_iter_.cpages_.clear();
-  if (info().num_row < max_row_perbatch) {
+  if (Info().num_row_ < max_row_perbatch) {
    std::unique_ptr<SparsePage> page(new SparsePage());
    this->MakeOneBatch(enabled, pkeep, page.get(), sorted);
    col_iter_.cpages_.push_back(std::move(page));
@ -66,10 +66,10 @@ void SimpleDMatrix::InitColAccess(const std::vector<bool> &enabled,
    this->MakeManyBatch(enabled, pkeep, max_row_perbatch, sorted);
  }
  // setup col-size
-  col_size_.resize(info().num_col);
+  col_size_.resize(Info().num_col_);
  std::fill(col_size_.begin(), col_size_.end(), 0);
-  for (size_t i = 0; i < col_iter_.cpages_.size(); ++i) {
-    SparsePage *pcol = col_iter_.cpages_[i].get();
+  for (auto & cpage : col_iter_.cpages_) {
+    SparsePage *pcol = cpage.get();
    for (size_t j = 0; j < pcol->Size(); ++j) {
      col_size_[j] += pcol->offset[j + 1] - pcol->offset[j];
    }
@ -80,14 +80,14 @@ void SimpleDMatrix::InitColAccess(const std::vector<bool> &enabled,
 void SimpleDMatrix::MakeOneBatch(const std::vector<bool>& enabled, float pkeep,
                                 SparsePage* pcol, bool sorted) {
  // clear rowset
-  buffered_rowset_.clear();
+  buffered_rowset_.Clear();
  // bit map
  const int nthread = omp_get_max_threads();
  std::vector<bool> bmap;
  pcol->Clear();
  common::ParallelGroupBuilder<SparseBatch::Entry>
      builder(&pcol->offset, &pcol->data);
-  builder.InitBudget(info().num_col, nthread);
+  builder.InitBudget(Info().num_col_, nthread);
  // start working
  dmlc::DataIter<RowBatch>* iter = this->RowIterator();
  iter->BeforeFirst();
@ -99,9 +99,9 @@ void SimpleDMatrix::MakeOneBatch(const std::vector<bool>& enabled, float pkeep,

    long batch_size = static_cast<long>(batch.size); // NOLINT(*)
    for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
-      bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
+      auto ridx = static_cast<bst_uint>(batch.base_rowid + i);
      if (pkeep == 1.0f || coin_flip(rnd)) {
-        buffered_rowset_.push_back(ridx);
+        buffered_rowset_.PushBack(ridx);
      } else {
        bmap[i] = false;
      }
@ -109,7 +109,7 @@ void SimpleDMatrix::MakeOneBatch(const std::vector<bool>& enabled, float pkeep,
    #pragma omp parallel for schedule(static)
    for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
      int tid = omp_get_thread_num();
-      bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
+      auto ridx = static_cast<bst_uint>(batch.base_rowid + i);
      if (bmap[ridx]) {
        RowBatch::Inst inst = batch[i];
        for (bst_uint j = 0; j < inst.length; ++j) {
@ -128,13 +128,13 @@ void SimpleDMatrix::MakeOneBatch(const std::vector<bool>& enabled, float pkeep,
    #pragma omp parallel for schedule(static)
    for (long i = 0; i < static_cast<long>(batch.size); ++i) { // NOLINT(*)
      int tid = omp_get_thread_num();
-      bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
+      auto ridx = static_cast<bst_uint>(batch.base_rowid + i);
      if (bmap[ridx]) {
        RowBatch::Inst inst = batch[i];
        for (bst_uint j = 0; j < inst.length; ++j) {
          if (enabled[inst[j].index]) {
            builder.Push(inst[j].index,
-                         SparseBatch::Entry((bst_uint)(batch.base_rowid+i),
+                         SparseBatch::Entry(static_cast<bst_uint>(batch.base_rowid+i),
                                            inst[j].fvalue), tid);
          }
        }
@ -142,11 +142,11 @@ void SimpleDMatrix::MakeOneBatch(const std::vector<bool>& enabled, float pkeep,
    }
  }

-  CHECK_EQ(pcol->Size(), info().num_col);
+  CHECK_EQ(pcol->Size(), Info().num_col_);

  if (sorted) {
    // sort columns
-    bst_omp_uint ncol = static_cast<bst_omp_uint>(pcol->Size());
+    auto ncol = static_cast<bst_omp_uint>(pcol->Size());
 #pragma omp parallel for schedule(dynamic, 1) num_threads(nthread)
    for (bst_omp_uint i = 0; i < ncol; ++i) {
      if (pcol->offset[i] < pcol->offset[i + 1]) {
@ -164,7 +164,7 @@ void SimpleDMatrix::MakeManyBatch(const std::vector<bool>& enabled,
  size_t btop = 0;
  std::bernoulli_distribution coin_flip(pkeep);
  auto& rnd = common::GlobalRandom();
-  buffered_rowset_.clear();
+  buffered_rowset_.Clear();
  // internal temp cache
  SparsePage tmp; tmp.Clear();
  // start working
@ -174,16 +174,16 @@ void SimpleDMatrix::MakeManyBatch(const std::vector<bool>& enabled,
  while (iter->Next()) {
    const RowBatch &batch = iter->Value();
    for (size_t i = 0; i < batch.size; ++i) {
-      bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
+      auto ridx = static_cast<bst_uint>(batch.base_rowid + i);
      if (pkeep == 1.0f || coin_flip(rnd)) {
-        buffered_rowset_.push_back(ridx);
+        buffered_rowset_.PushBack(ridx);
        tmp.Push(batch[i]);
      }
      if (tmp.Size() >= max_row_perbatch) {
        std::unique_ptr<SparsePage> page(new SparsePage());
        this->MakeColPage(tmp.GetRowBatch(0), btop, enabled, page.get(), sorted);
        col_iter_.cpages_.push_back(std::move(page));
-        btop = buffered_rowset_.size();
+        btop = buffered_rowset_.Size();
        tmp.Clear();
      }
    }
@ -205,7 +205,7 @@ void SimpleDMatrix::MakeColPage(const RowBatch& batch,
  pcol->Clear();
  common::ParallelGroupBuilder<SparseBatch::Entry>
      builder(&pcol->offset, &pcol->data);
-  builder.InitBudget(info().num_col, nthread);
+  builder.InitBudget(Info().num_col_, nthread);
  bst_omp_uint ndata = static_cast<bst_uint>(batch.size);
  #pragma omp parallel for schedule(static) num_threads(nthread)
  for (bst_omp_uint i = 0; i < ndata; ++i) {
@ -231,10 +231,10 @@ void SimpleDMatrix::MakeColPage(const RowBatch& batch,
          tid);
    }
  }
-  CHECK_EQ(pcol->Size(), info().num_col);
+  CHECK_EQ(pcol->Size(), Info().num_col_);
  // sort columns
  if (sorted) {
-    bst_omp_uint ncol = static_cast<bst_omp_uint>(pcol->Size());
+    auto ncol = static_cast<bst_omp_uint>(pcol->Size());
 #pragma omp parallel for schedule(dynamic, 1) num_threads(nthread)
    for (bst_omp_uint i = 0; i < ncol; ++i) {
      if (pcol->offset[i] < pcol->offset[i + 1]) {
--- a/src/data/simple_dmatrix.h
+++ b/src/data/simple_dmatrix.h
@ -22,11 +22,11 @@ class SimpleDMatrix : public DMatrix {
  explicit SimpleDMatrix(std::unique_ptr<DataSource>&& source)
      : source_(std::move(source)) {}

-  MetaInfo& info() override {
+  MetaInfo& Info() override {
    return source_->info;
  }

-  const MetaInfo& info() const override {
+  const MetaInfo& Info() const override {
    return source_->info;
  }

@ -37,10 +37,10 @@ class SimpleDMatrix : public DMatrix {
  }

  bool HaveColAccess(bool sorted) const override {
-    return col_size_.size() != 0 && col_iter_.sorted == sorted;
+    return col_size_.size() != 0 && col_iter_.sorted_ == sorted;
  }

-  const RowSet& buffered_rowset() const override {
+  const RowSet& BufferedRowset() const override {
    return buffered_rowset_;
  }

@ -49,8 +49,8 @@ class SimpleDMatrix : public DMatrix {
  }

  float GetColDensity(size_t cidx) const override {
-    size_t nmiss = buffered_rowset_.size() - col_size_[cidx];
-    return 1.0f - (static_cast<float>(nmiss)) / buffered_rowset_.size();
+    size_t nmiss = buffered_rowset_.Size() - col_size_[cidx];
+    return 1.0f - (static_cast<float>(nmiss)) / buffered_rowset_.Size();
  }

  dmlc::DataIter<ColBatch>* ColIterator() override;
@ -67,7 +67,7 @@ class SimpleDMatrix : public DMatrix {
  // in-memory column batch iterator.
  struct ColBatchIter: dmlc::DataIter<ColBatch> {
   public:
-    ColBatchIter() : data_ptr_(0), sorted(false) {}
+    ColBatchIter()  = default;
    void BeforeFirst() override {
      data_ptr_ = 0;
    }
@ -86,11 +86,11 @@ class SimpleDMatrix : public DMatrix {
    // column sparse pages
    std::vector<std::unique_ptr<SparsePage> > cpages_;
    // data pointer
-    size_t data_ptr_;
+    size_t data_ptr_{0};
    // temporal space for batch
    ColBatch batch_;
    // Is column sorted?
-    bool sorted;
+    bool sorted_{false};
  };

  // source data pointer.
--- a/src/data/sparse_batch_page.h
+++ b/src/data/sparse_batch_page.h
@ -51,11 +51,11 @@ class SparsePage {
    return offset.size() - 1;
  }
  /*! \return estimation of memory cost of this page */
-  inline size_t MemCostBytes(void) const {
+  inline size_t MemCostBytes() const {
    return offset.size() * sizeof(size_t) + data.size() * sizeof(SparseBatch::Entry);
  }
  /*! \brief clear the page */
-  inline void Clear(void) {
+  inline void Clear() {
    min_index = 0;
    offset.clear();
    offset.push_back(0);
@ -92,7 +92,7 @@ class SparsePage {
    for (size_t i = batch.offset[0]; i < batch.offset[batch.size]; ++i) {
      uint32_t index = batch.index[i];
      bst_float fvalue = batch.value == nullptr ? 1.0f : batch.value[i];
-      data.push_back(SparseBatch::Entry(index, fvalue));
+      data.emplace_back(index, fvalue);
    }
    CHECK_EQ(offset.back(), data.size());
  }
@ -145,7 +145,7 @@ class SparsePage {
 class SparsePage::Format {
 public:
  /*! \brief virtual destructor */
-  virtual ~Format() {}
+  virtual ~Format() = default;
  /*!
   * \brief Load all the segments into page, advance fi to end of the block.
   * \param page The data to read page into.
--- a/src/data/sparse_page_dmatrix.cc
+++ b/src/data/sparse_page_dmatrix.cc
@ -94,9 +94,9 @@ void SparsePageDMatrix::ColPageIter::Init(const std::vector<bst_uint>& index_set
 }

 dmlc::DataIter<ColBatch>* SparsePageDMatrix::ColIterator() {
-  CHECK(col_iter_.get() != nullptr);
+  CHECK(col_iter_ != nullptr);
  std::vector<bst_uint> col_index;
-  size_t ncol = this->info().num_col;
+  size_t ncol = this->Info().num_col_;
  for (size_t i = 0; i < ncol; ++i) {
    col_index.push_back(static_cast<bst_uint>(i));
  }
@ -106,12 +106,12 @@ dmlc::DataIter<ColBatch>* SparsePageDMatrix::ColIterator() {

 dmlc::DataIter<ColBatch>* SparsePageDMatrix::
 ColIterator(const std::vector<bst_uint>& fset) {
-  CHECK(col_iter_.get() != nullptr);
+  CHECK(col_iter_ != nullptr);
  std::vector<bst_uint> col_index;
-  size_t ncol = this->info().num_col;
-  for (size_t i = 0; i < fset.size(); ++i) {
-    if (fset[i] < ncol) {
-      col_index.push_back(fset[i]);
+  size_t ncol = this->Info().num_col_;
+  for (auto fidx : fset) {
+    if (fidx < ncol) {
+      col_index.push_back(fidx);
    }
  }
  col_iter_->Init(col_index, false);
@ -126,7 +126,7 @@ bool SparsePageDMatrix::TryInitColData(bool sorted) {
    std::string col_meta_name = cache_shards[0] + ".col.meta";
    std::unique_ptr<dmlc::Stream> fmeta(
        dmlc::Stream::Create(col_meta_name.c_str(), "r", true));
-    if (fmeta.get() == nullptr) return false;
+    if (fmeta == nullptr) return false;
    CHECK(fmeta->Read(&buffered_rowset_)) << "invalid col.meta file";
    CHECK(fmeta->Read(&col_size_)) << "invalid col.meta file";
  }
@ -136,7 +136,7 @@ bool SparsePageDMatrix::TryInitColData(bool sorted) {
    std::string col_data_name = prefix + ".col.page";
    std::unique_ptr<dmlc::SeekStream> fdata(
        dmlc::SeekStream::CreateForRead(col_data_name.c_str(), true));
-    if (fdata.get() == nullptr) return false;
+    if (fdata == nullptr) return false;
    files.push_back(std::move(fdata));
  }
  col_iter_.reset(new ColPageIter(std::move(files)));
@ -150,12 +150,12 @@ void SparsePageDMatrix::InitColAccess(const std::vector<bool>& enabled,
                                      size_t max_row_perbatch, bool sorted) {
  if (HaveColAccess(sorted)) return;
  if (TryInitColData(sorted)) return;
-  const MetaInfo& info = this->info();
+  const MetaInfo& info = this->Info();
  if (max_row_perbatch == std::numeric_limits<size_t>::max()) {
    max_row_perbatch = kMaxRowPerBatch;
  }
-  buffered_rowset_.clear();
-  col_size_.resize(info.num_col);
+  buffered_rowset_.Clear();
+  col_size_.resize(info.num_col_);
  std::fill(col_size_.begin(), col_size_.end(), 0);
  dmlc::DataIter<RowBatch>* iter = this->RowIterator();
  std::bernoulli_distribution coin_flip(pkeep);
@ -173,7 +173,7 @@ void SparsePageDMatrix::InitColAccess(const std::vector<bool>& enabled,
    const int nthread = std::max(omp_get_max_threads(), std::max(omp_get_num_procs() / 2 - 1, 1));
    common::ParallelGroupBuilder<SparseBatch::Entry>
    builder(&pcol->offset, &pcol->data);
-    builder.InitBudget(info.num_col, nthread);
+    builder.InitBudget(info.num_col_, nthread);
    bst_omp_uint ndata = static_cast<bst_uint>(prow.Size());
    #pragma omp parallel for schedule(static) num_threads(nthread)
    for (bst_omp_uint i = 0; i < ndata; ++i) {
@ -196,10 +196,10 @@ void SparsePageDMatrix::InitColAccess(const std::vector<bool>& enabled,
                     tid);
      }
    }
-    CHECK_EQ(pcol->Size(), info.num_col);
+    CHECK_EQ(pcol->Size(), info.num_col_);
    // sort columns
    if (sorted) {
-      bst_omp_uint ncol = static_cast<bst_omp_uint>(pcol->Size());
+      auto ncol = static_cast<bst_omp_uint>(pcol->Size());
 #pragma omp parallel for schedule(dynamic, 1) num_threads(nthread)
      for (bst_omp_uint i = 0; i < ncol; ++i) {
        if (pcol->offset[i] < pcol->offset[i + 1]) {
@ -213,16 +213,16 @@ void SparsePageDMatrix::InitColAccess(const std::vector<bool>& enabled,

  auto make_next_col = [&] (SparsePage* dptr) {
    tmp.Clear();
-    size_t btop = buffered_rowset_.size();
+    size_t btop = buffered_rowset_.Size();

    while (true) {
      if (batch_ptr != batch_top) {
        const RowBatch& batch = iter->Value();
        CHECK_EQ(batch_top, batch.size);
        for (size_t i = batch_ptr; i < batch_top; ++i) {
-          bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
+          auto ridx = static_cast<bst_uint>(batch.base_rowid + i);
          if (pkeep == 1.0f || coin_flip(rnd)) {
-            buffered_rowset_.push_back(ridx);
+            buffered_rowset_.PushBack(ridx);
            tmp.Push(batch[i]);
          }

@ -263,7 +263,7 @@ void SparsePageDMatrix::InitColAccess(const std::vector<bool>& enabled,
    double tstart = dmlc::GetTime();
    size_t bytes_write = 0;
    // print every 4 sec.
-    const double kStep = 4.0;
+    constexpr double kStep = 4.0;
    size_t tick_expected = kStep;

    while (make_next_col(page.get())) {
--- a/src/data/sparse_page_dmatrix.h
+++ b/src/data/sparse_page_dmatrix.h
@ -10,6 +10,7 @@
 #include <xgboost/base.h>
 #include <xgboost/data.h>
 #include <dmlc/threadediter.h>
+#include <utility>
 #include <vector>
 #include <algorithm>
 #include <string>
@ -22,15 +23,15 @@ namespace data {
 class SparsePageDMatrix : public DMatrix {
 public:
  explicit SparsePageDMatrix(std::unique_ptr<DataSource>&& source,
-                             const std::string& cache_info)
-      : source_(std::move(source)), cache_info_(cache_info) {
+                             std::string  cache_info)
+      : source_(std::move(source)), cache_info_(std::move(cache_info)) {
  }

-  MetaInfo& info() override {
+  MetaInfo& Info() override {
    return source_->info;
  }

-  const MetaInfo& info() const override {
+  const MetaInfo& Info() const override {
    return source_->info;
  }

@ -41,10 +42,10 @@ class SparsePageDMatrix : public DMatrix {
  }

  bool HaveColAccess(bool sorted) const override {
-    return col_iter_.get() != nullptr && col_iter_->sorted == sorted;
+    return col_iter_ != nullptr && col_iter_->sorted == sorted;
  }

-  const RowSet& buffered_rowset() const override {
+  const RowSet& BufferedRowset() const override {
    return buffered_rowset_;
  }

@ -53,8 +54,8 @@ class SparsePageDMatrix : public DMatrix {
  }

  float GetColDensity(size_t cidx) const override {
-    size_t nmiss = buffered_rowset_.size() - col_size_[cidx];
-    return 1.0f - (static_cast<float>(nmiss)) / buffered_rowset_.size();
+    size_t nmiss = buffered_rowset_.Size() - col_size_[cidx];
+    return 1.0f - (static_cast<float>(nmiss)) / buffered_rowset_.Size();
  }

  bool SingleColBlock() const override {
@ -79,7 +80,7 @@ class SparsePageDMatrix : public DMatrix {
  class ColPageIter : public dmlc::DataIter<ColBatch> {
   public:
    explicit ColPageIter(std::vector<std::unique_ptr<dmlc::SeekStream> >&& files);
-    virtual ~ColPageIter();
+    ~ColPageIter() override;
    void BeforeFirst() override;
    const ColBatch &Value() const override {
      return out_;
--- a/src/data/sparse_page_raw_format.cc
+++ b/src/data/sparse_page_raw_format.cc
@ -34,8 +34,7 @@ class SparsePageRawFormat : public SparsePage::Format {
    // setup the offset
    page->offset.clear();
    page->offset.push_back(0);
-    for (size_t i = 0; i < sorted_index_set.size(); ++i) {
-      bst_uint fid = sorted_index_set[i];
+    for (unsigned int fid : sorted_index_set) {
      CHECK_LT(fid + 1, disk_offset_.size());
      size_t size = disk_offset_[fid + 1] - disk_offset_[fid];
      page->offset.push_back(page->offset.back() + size);
--- a/src/data/sparse_page_source.cc
+++ b/src/data/sparse_page_source.cc
@ -89,12 +89,12 @@ bool SparsePageSource::CacheExist(const std::string& cache_info) {
  {
    std::string name_info = cache_shards[0];
    std::unique_ptr<dmlc::Stream> finfo(dmlc::Stream::Create(name_info.c_str(), "r", true));
-    if (finfo.get() == nullptr) return false;
+    if (finfo == nullptr) return false;
  }
  for (const std::string& prefix : cache_shards) {
    std::string name_row = prefix + ".row.page";
    std::unique_ptr<dmlc::Stream> frow(dmlc::Stream::Create(name_row.c_str(), "r", true));
-    if (frow.get() == nullptr) return false;
+    if (frow == nullptr) return false;
  }
  return true;
 }
@ -119,22 +119,22 @@ void SparsePageSource::Create(dmlc::Parser<uint32_t>* src,
    size_t bytes_write = 0;
    double tstart = dmlc::GetTime();
    // print every 4 sec.
-    const double kStep = 4.0;
+    constexpr double kStep = 4.0;
    size_t tick_expected = static_cast<double>(kStep);

    while (src->Next()) {
      const dmlc::RowBlock<uint32_t>& batch = src->Value();
      if (batch.label != nullptr) {
-        info.labels.insert(info.labels.end(), batch.label, batch.label + batch.size);
+        info.labels_.insert(info.labels_.end(), batch.label, batch.label + batch.size);
      }
      if (batch.weight != nullptr) {
-        info.weights.insert(info.weights.end(), batch.weight, batch.weight + batch.size);
+        info.weights_.insert(info.weights_.end(), batch.weight, batch.weight + batch.size);
      }
-      info.num_row += batch.size;
-      info.num_nonzero +=  batch.offset[batch.size] - batch.offset[0];
+      info.num_row_ += batch.size;
+      info.num_nonzero_ +=  batch.offset[batch.size] - batch.offset[0];
      for (size_t i = batch.offset[0]; i < batch.offset[batch.size]; ++i) {
        uint32_t index = batch.index[i];
-        info.num_col = std::max(info.num_col,
+        info.num_col_ = std::max(info.num_col_,
                                static_cast<uint64_t>(index + 1));
      }
      page->Push(batch);
@ -183,7 +183,7 @@ void SparsePageSource::Create(DMatrix* src,
    std::shared_ptr<SparsePage> page;
    writer.Alloc(&page); page->Clear();

-    MetaInfo info = src->info();
+    MetaInfo info = src->Info();
    size_t bytes_write = 0;
    double tstart = dmlc::GetTime();
    dmlc::DataIter<RowBatch>* iter = src->RowIterator();
--- a/src/data/sparse_page_source.h
+++ b/src/data/sparse_page_source.h
@ -33,7 +33,7 @@ class SparsePageSource : public DataSource {
   */
  explicit SparsePageSource(const std::string& cache_prefix) noexcept(false);
  /*! \brief destructor */
-  virtual ~SparsePageSource();
+  ~SparsePageSource() override;
  // implement Next
  bool Next() override;
  // implement BeforeFirst
--- a/src/data/sparse_page_writer.cc
+++ b/src/data/sparse_page_writer.cc
@ -34,7 +34,7 @@ SparsePage::Writer::Writer(
          fo->Write(format_shard);
          std::shared_ptr<SparsePage> page;
          while (wqueue->Pop(&page)) {
-            if (page.get() == nullptr) break;
+            if (page == nullptr) break;
            fmt->Write(*page, fo.get());
            qrecycle_.Push(std::move(page));
          }
@ -61,7 +61,7 @@ void SparsePage::Writer::PushWrite(std::shared_ptr<SparsePage>&& page) {
 }

 void SparsePage::Writer::Alloc(std::shared_ptr<SparsePage>* out_page) {
-  CHECK(out_page->get() == nullptr);
+  CHECK(*out_page == nullptr);
  if (num_free_buffer_ != 0) {
    out_page->reset(new SparsePage());
    --num_free_buffer_;
--- a/src/gbm/gblinear.cc
+++ b/src/gbm/gblinear.cc
@ -52,9 +52,9 @@ class GBLinear : public GradientBooster {
  explicit GBLinear(const std::vector<std::shared_ptr<DMatrix> > &cache,
                    bst_float base_margin)
      : base_margin_(base_margin),
-        sum_instance_weight(0),
-        sum_weight_complete(false),
-        is_converged(false) {
+        sum_instance_weight_(0),
+        sum_weight_complete_(false),
+        is_converged_(false) {
    // Add matrices to the prediction cache
    for (auto &d : cache) {
      PredictionCacheEntry e;
@ -63,46 +63,46 @@ class GBLinear : public GradientBooster {
    }
  }
  void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override {
-    if (model.weight.size() == 0) {
-      model.param.InitAllowUnknown(cfg);
+    if (model_.weight.size() == 0) {
+      model_.param.InitAllowUnknown(cfg);
    }
-    param.InitAllowUnknown(cfg);
-    updater.reset(LinearUpdater::Create(param.updater));
-    updater->Init(cfg);
-    monitor.Init("GBLinear ", param.debug_verbose);
+    param_.InitAllowUnknown(cfg);
+    updater_.reset(LinearUpdater::Create(param_.updater));
+    updater_->Init(cfg);
+    monitor_.Init("GBLinear ", param_.debug_verbose);
  }
  void Load(dmlc::Stream* fi) override {
-    model.Load(fi);
+    model_.Load(fi);
  }
  void Save(dmlc::Stream* fo) const override {
-    model.Save(fo);
+    model_.Save(fo);
  }

  void DoBoost(DMatrix *p_fmat,
-               HostDeviceVector<bst_gpair> *in_gpair,
+               HostDeviceVector<GradientPair> *in_gpair,
               ObjFunction* obj) override {
-    monitor.Start("DoBoost");
+    monitor_.Start("DoBoost");

    if (!p_fmat->HaveColAccess(false)) {
-      std::vector<bool> enabled(p_fmat->info().num_col, true);
-      p_fmat->InitColAccess(enabled, 1.0f, param.max_row_perbatch, false);
+      std::vector<bool> enabled(p_fmat->Info().num_col_, true);
+      p_fmat->InitColAccess(enabled, 1.0f, param_.max_row_perbatch, false);
    }

-    model.LazyInitModel();
+    model_.LazyInitModel();
    this->LazySumWeights(p_fmat);

    if (!this->CheckConvergence()) {
-      updater->Update(&in_gpair->data_h(), p_fmat, &model, sum_instance_weight);
+      updater_->Update(&in_gpair->HostVector(), p_fmat, &model_, sum_instance_weight_);
    }
    this->UpdatePredictionCache();

-    monitor.Stop("DoBoost");
+    monitor_.Stop("DoBoost");
  }

  void PredictBatch(DMatrix *p_fmat,
                    HostDeviceVector<bst_float> *out_preds,
                    unsigned ntree_limit) override {
-    monitor.Start("PredictBatch");
+    monitor_.Start("PredictBatch");
    CHECK_EQ(ntree_limit, 0U)
        << "GBLinear::Predict ntrees is only valid for gbtree predictor";

@ -110,19 +110,19 @@ class GBLinear : public GradientBooster {
    auto it = cache_.find(p_fmat);
    if (it != cache_.end() && it->second.predictions.size() != 0) {
      std::vector<bst_float> &y = it->second.predictions;
-      out_preds->resize(y.size());
-      std::copy(y.begin(), y.end(), out_preds->data_h().begin());
+      out_preds->Resize(y.size());
+      std::copy(y.begin(), y.end(), out_preds->HostVector().begin());
    } else {
-      this->PredictBatchInternal(p_fmat, &out_preds->data_h());
+      this->PredictBatchInternal(p_fmat, &out_preds->HostVector());
    }
-    monitor.Stop("PredictBatch");
+    monitor_.Stop("PredictBatch");
  }
  // add base margin
  void PredictInstance(const SparseBatch::Inst &inst,
               std::vector<bst_float> *out_preds,
               unsigned ntree_limit,
               unsigned root_index) override {
-    const int ngroup = model.param.num_output_group;
+    const int ngroup = model_.param.num_output_group;
    for (int gid = 0; gid < ngroup; ++gid) {
      this->Pred(inst, dmlc::BeginPtr(*out_preds), gid, base_margin_);
    }
@ -138,15 +138,15 @@ class GBLinear : public GradientBooster {
                           std::vector<bst_float>* out_contribs,
                           unsigned ntree_limit, bool approximate, int condition = 0,
                           unsigned condition_feature = 0) override {
-    model.LazyInitModel();
+    model_.LazyInitModel();
    CHECK_EQ(ntree_limit, 0U)
        << "GBLinear::PredictContribution: ntrees is only valid for gbtree predictor";
-    const std::vector<bst_float>& base_margin = p_fmat->info().base_margin;
-    const int ngroup = model.param.num_output_group;
-    const size_t ncolumns = model.param.num_feature + 1;
+    const std::vector<bst_float>& base_margin = p_fmat->Info().base_margin_;
+    const int ngroup = model_.param.num_output_group;
+    const size_t ncolumns = model_.param.num_feature + 1;
    // allocate space for (#features + bias) times #groups times #rows
    std::vector<bst_float>& contribs = *out_contribs;
-    contribs.resize(p_fmat->info().num_row * ncolumns * ngroup);
+    contribs.resize(p_fmat->Info().num_row_ * ncolumns * ngroup);
    // make sure contributions is zeroed, we could be reusing a previously allocated one
    std::fill(contribs.begin(), contribs.end(), 0);
    // start collecting the contributions
@ -155,21 +155,21 @@ class GBLinear : public GradientBooster {
    while (iter->Next()) {
      const RowBatch& batch = iter->Value();
      // parallel over local batch
-      const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
+      const auto nsize = static_cast<bst_omp_uint>(batch.size);
      #pragma omp parallel for schedule(static)
      for (bst_omp_uint i = 0; i < nsize; ++i) {
        const RowBatch::Inst &inst = batch[i];
-        size_t row_idx = static_cast<size_t>(batch.base_rowid + i);
+        auto row_idx = static_cast<size_t>(batch.base_rowid + i);
        // loop over output groups
        for (int gid = 0; gid < ngroup; ++gid) {
          bst_float *p_contribs = &contribs[(row_idx * ngroup + gid) * ncolumns];
          // calculate linear terms' contributions
          for (bst_uint c = 0; c < inst.length; ++c) {
-            if (inst[c].index >= model.param.num_feature) continue;
-            p_contribs[inst[c].index] = inst[c].fvalue * model[inst[c].index][gid];
+            if (inst[c].index >= model_.param.num_feature) continue;
+            p_contribs[inst[c].index] = inst[c].fvalue * model_[inst[c].index][gid];
          }
          // add base margin to BIAS
-          p_contribs[ncolumns - 1] = model.bias()[gid] +
+          p_contribs[ncolumns - 1] = model_.bias()[gid] +
            ((base_margin.size() != 0) ? base_margin[row_idx * ngroup + gid] : base_margin_);
        }
      }
@ -182,34 +182,34 @@ class GBLinear : public GradientBooster {
                             std::vector<bst_float>& contribs = *out_contribs;

     // linear models have no interaction effects
-     const size_t nelements = model.param.num_feature*model.param.num_feature;
-     contribs.resize(p_fmat->info().num_row * nelements * model.param.num_output_group);
+     const size_t nelements = model_.param.num_feature*model_.param.num_feature;
+     contribs.resize(p_fmat->Info().num_row_ * nelements * model_.param.num_output_group);
     std::fill(contribs.begin(), contribs.end(), 0);
  }

  std::vector<std::string> DumpModel(const FeatureMap& fmap,
                                     bool with_stats,
                                     std::string format) const override {
-    return model.DumpModel(fmap, with_stats, format);
+    return model_.DumpModel(fmap, with_stats, format);
  }

 protected:
  void PredictBatchInternal(DMatrix *p_fmat,
               std::vector<bst_float> *out_preds) {
-    monitor.Start("PredictBatchInternal");
-      model.LazyInitModel();
+    monitor_.Start("PredictBatchInternal");
+      model_.LazyInitModel();
    std::vector<bst_float> &preds = *out_preds;
-    const std::vector<bst_float>& base_margin = p_fmat->info().base_margin;
+    const std::vector<bst_float>& base_margin = p_fmat->Info().base_margin_;
    // start collecting the prediction
    dmlc::DataIter<RowBatch> *iter = p_fmat->RowIterator();
-    const int ngroup = model.param.num_output_group;
-    preds.resize(p_fmat->info().num_row * ngroup);
+    const int ngroup = model_.param.num_output_group;
+    preds.resize(p_fmat->Info().num_row_ * ngroup);
    while (iter->Next()) {
      const RowBatch &batch = iter->Value();
      // output convention: nrow * k, where nrow is number of rows
      // k is number of group
      // parallel over local batch
-      const omp_ulong nsize = static_cast<omp_ulong>(batch.size);
+      const auto nsize = static_cast<omp_ulong>(batch.size);
      #pragma omp parallel for schedule(static)
      for (omp_ulong i = 0; i < nsize; ++i) {
        const size_t ridx = batch.base_rowid + i;
@ -221,14 +221,14 @@ class GBLinear : public GradientBooster {
        }
      }
    }
-    monitor.Stop("PredictBatchInternal");
+    monitor_.Stop("PredictBatchInternal");
  }
  void UpdatePredictionCache() {
    // update cache entry
    for (auto &kv : cache_) {
      PredictionCacheEntry &e = kv.second;
      if (e.predictions.size() == 0) {
-        size_t n = model.param.num_output_group * e.data->info().num_row;
+        size_t n = model_.param.num_output_group * e.data->Info().num_row_;
        e.predictions.resize(n);
      }
      this->PredictBatchInternal(e.data.get(), &e.predictions);
@ -236,53 +236,53 @@ class GBLinear : public GradientBooster {
  }

  bool CheckConvergence() {
-    if (param.tolerance == 0.0f) return false;
-    if (is_converged) return true;
-    if (previous_model.weight.size() != model.weight.size()) {
-      previous_model = model;
+    if (param_.tolerance == 0.0f) return false;
+    if (is_converged_) return true;
+    if (previous_model_.weight.size() != model_.weight.size()) {
+      previous_model_ = model_;
      return false;
    }
    float largest_dw = 0.0;
-    for (size_t i = 0; i < model.weight.size(); i++) {
+    for (size_t i = 0; i < model_.weight.size(); i++) {
      largest_dw = std::max(
-          largest_dw, std::abs(model.weight[i] - previous_model.weight[i]));
+          largest_dw, std::abs(model_.weight[i] - previous_model_.weight[i]));
    }
-    previous_model = model;
+    previous_model_ = model_;

-    is_converged = largest_dw <= param.tolerance;
-    return is_converged;
+    is_converged_ = largest_dw <= param_.tolerance;
+    return is_converged_;
  }

  void LazySumWeights(DMatrix *p_fmat) {
-    if (!sum_weight_complete) {
-      auto &info = p_fmat->info();
-      for (size_t i = 0; i < info.num_row; i++) {
-        sum_instance_weight += info.GetWeight(i);
+    if (!sum_weight_complete_) {
+      auto &info = p_fmat->Info();
+      for (size_t i = 0; i < info.num_row_; i++) {
+        sum_instance_weight_ += info.GetWeight(i);
      }
-      sum_weight_complete = true;
+      sum_weight_complete_ = true;
    }
  }

  inline void Pred(const RowBatch::Inst &inst, bst_float *preds, int gid,
                   bst_float base) {
-    bst_float psum = model.bias()[gid] + base;
+    bst_float psum = model_.bias()[gid] + base;
    for (bst_uint i = 0; i < inst.length; ++i) {
-      if (inst[i].index >= model.param.num_feature) continue;
-      psum += inst[i].fvalue * model[inst[i].index][gid];
+      if (inst[i].index >= model_.param.num_feature) continue;
+      psum += inst[i].fvalue * model_[inst[i].index][gid];
    }
    preds[gid] = psum;
  }
  // biase margin score
  bst_float base_margin_;
  // model field
-  GBLinearModel model;
-  GBLinearModel previous_model;
-  GBLinearTrainParam param;
-  std::unique_ptr<LinearUpdater> updater;
-  double sum_instance_weight;
-  bool sum_weight_complete;
-  common::Monitor monitor;
-  bool is_converged;
+  GBLinearModel model_;
+  GBLinearModel previous_model_;
+  GBLinearTrainParam param_;
+  std::unique_ptr<LinearUpdater> updater_;
+  double sum_instance_weight_;
+  bool sum_weight_complete_;
+  common::Monitor monitor_;
+  bool is_converged_;

  /**
   * \struct  PredictionCacheEntry
--- a/src/gbm/gblinear_model.h
+++ b/src/gbm/gblinear_model.h
@ -40,7 +40,7 @@ class GBLinearModel {
  // weight for each of feature, bias is the last one
  std::vector<bst_float> weight;
  // initialize the model parameter
-  inline void LazyInitModel(void) {
+  inline void LazyInitModel() {
    if (!weight.empty()) return;
    // bias is the last weight
    weight.resize((param.num_feature + 1) * param.num_output_group);
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@ -143,32 +143,32 @@ class GBTree : public GradientBooster {
  }

  void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override {
-    this->cfg = cfg;
+    this->cfg_ = cfg;
    model_.Configure(cfg);
    // initialize the updaters only when needed.
-    std::string updater_seq = tparam.updater_seq;
-    tparam.InitAllowUnknown(cfg);
-    if (updater_seq != tparam.updater_seq) updaters.clear();
-    for (const auto& up : updaters) {
+    std::string updater_seq = tparam_.updater_seq;
+    tparam_.InitAllowUnknown(cfg);
+    if (updater_seq != tparam_.updater_seq) updaters_.clear();
+    for (const auto& up : updaters_) {
      up->Init(cfg);
    }
    // for the 'update' process_type, move trees into trees_to_update
-    if (tparam.process_type == kUpdate) {
+    if (tparam_.process_type == kUpdate) {
      model_.InitTreesToUpdate();
    }

    // configure predictor
-    predictor = std::unique_ptr<Predictor>(Predictor::Create(tparam.predictor));
-    predictor->Init(cfg, cache_);
-    monitor.Init("GBTree", tparam.debug_verbose);
+    predictor_ = std::unique_ptr<Predictor>(Predictor::Create(tparam_.predictor));
+    predictor_->Init(cfg, cache_);
+    monitor_.Init("GBTree", tparam_.debug_verbose);
  }

  void Load(dmlc::Stream* fi) override {
    model_.Load(fi);

-    this->cfg.clear();
-    this->cfg.push_back(std::make_pair(std::string("num_feature"),
-                                       common::ToString(model_.param.num_feature)));
+    this->cfg_.clear();
+    this->cfg_.emplace_back(std::string("num_feature"),
+                                       common::ToString(model_.param.num_feature));
  }

  void Save(dmlc::Stream* fo) const override {
@ -177,29 +177,29 @@ class GBTree : public GradientBooster {

  bool AllowLazyCheckPoint() const override {
    return model_.param.num_output_group == 1 ||
-        tparam.updater_seq.find("distcol") != std::string::npos;
+        tparam_.updater_seq.find("distcol") != std::string::npos;
  }

  void DoBoost(DMatrix* p_fmat,
-               HostDeviceVector<bst_gpair>* in_gpair,
+               HostDeviceVector<GradientPair>* in_gpair,
               ObjFunction* obj) override {
    std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
    const int ngroup = model_.param.num_output_group;
-    monitor.Start("BoostNewTrees");
+    monitor_.Start("BoostNewTrees");
    if (ngroup == 1) {
      std::vector<std::unique_ptr<RegTree> > ret;
      BoostNewTrees(in_gpair, p_fmat, 0, &ret);
      new_trees.push_back(std::move(ret));
    } else {
-      CHECK_EQ(in_gpair->size() % ngroup, 0U)
+      CHECK_EQ(in_gpair->Size() % ngroup, 0U)
          << "must have exactly ngroup*nrow gpairs";
      // TODO(canonizer): perform this on GPU if HostDeviceVector has device set.
-      HostDeviceVector<bst_gpair> tmp(in_gpair->size() / ngroup,
-                                      bst_gpair(), in_gpair->device());
-      std::vector<bst_gpair>& gpair_h = in_gpair->data_h();
-      bst_omp_uint nsize = static_cast<bst_omp_uint>(tmp.size());
+      HostDeviceVector<GradientPair> tmp(in_gpair->Size() / ngroup,
+                                      GradientPair(), in_gpair->DeviceIdx());
+      std::vector<GradientPair>& gpair_h = in_gpair->HostVector();
+      auto nsize = static_cast<bst_omp_uint>(tmp.Size());
      for (int gid = 0; gid < ngroup; ++gid) {
-        std::vector<bst_gpair>& tmp_h = tmp.data_h();
+        std::vector<GradientPair>& tmp_h = tmp.HostVector();
        #pragma omp parallel for schedule(static)
        for (bst_omp_uint i = 0; i < nsize; ++i) {
          tmp_h[i] = gpair_h[i * ngroup + gid];
@ -209,43 +209,43 @@ class GBTree : public GradientBooster {
        new_trees.push_back(std::move(ret));
      }
    }
-    monitor.Stop("BoostNewTrees");
-    monitor.Start("CommitModel");
+    monitor_.Stop("BoostNewTrees");
+    monitor_.Start("CommitModel");
    this->CommitModel(std::move(new_trees));
-    monitor.Stop("CommitModel");
+    monitor_.Stop("CommitModel");
  }

  void PredictBatch(DMatrix* p_fmat,
               HostDeviceVector<bst_float>* out_preds,
               unsigned ntree_limit) override {
-    predictor->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
+    predictor_->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
  }

  void PredictInstance(const SparseBatch::Inst& inst,
               std::vector<bst_float>* out_preds,
               unsigned ntree_limit,
               unsigned root_index) override {
-    predictor->PredictInstance(inst, out_preds, model_,
+    predictor_->PredictInstance(inst, out_preds, model_,
                               ntree_limit, root_index);
  }

  void PredictLeaf(DMatrix* p_fmat,
                   std::vector<bst_float>* out_preds,
                   unsigned ntree_limit) override {
-    predictor->PredictLeaf(p_fmat, out_preds, model_, ntree_limit);
+    predictor_->PredictLeaf(p_fmat, out_preds, model_, ntree_limit);
  }

  void PredictContribution(DMatrix* p_fmat,
                           std::vector<bst_float>* out_contribs,
                           unsigned ntree_limit, bool approximate, int condition,
                           unsigned condition_feature) override {
-    predictor->PredictContribution(p_fmat, out_contribs, model_, ntree_limit, approximate);
+    predictor_->PredictContribution(p_fmat, out_contribs, model_, ntree_limit, approximate);
  }

  void PredictInteractionContributions(DMatrix* p_fmat,
                                       std::vector<bst_float>* out_contribs,
                                       unsigned ntree_limit, bool approximate) override {
-    predictor->PredictInteractionContributions(p_fmat, out_contribs, model_,
+    predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_,
                                               ntree_limit, approximate);
  }

@ -258,18 +258,18 @@ class GBTree : public GradientBooster {
 protected:
  // initialize updater before using them
  inline void InitUpdater() {
-    if (updaters.size() != 0) return;
-    std::string tval = tparam.updater_seq;
+    if (updaters_.size() != 0) return;
+    std::string tval = tparam_.updater_seq;
    std::vector<std::string> ups = common::Split(tval, ',');
    for (const std::string& pstr : ups) {
      std::unique_ptr<TreeUpdater> up(TreeUpdater::Create(pstr.c_str()));
-      up->Init(this->cfg);
-      updaters.push_back(std::move(up));
+      up->Init(this->cfg_);
+      updaters_.push_back(std::move(up));
    }
  }

  // do group specific group
-  inline void BoostNewTrees(HostDeviceVector<bst_gpair>* gpair,
+  inline void BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
                            DMatrix *p_fmat,
                            int bst_group,
                            std::vector<std::unique_ptr<RegTree> >* ret) {
@ -277,26 +277,27 @@ class GBTree : public GradientBooster {
    std::vector<RegTree*> new_trees;
    ret->clear();
    // create the trees
-    for (int i = 0; i < tparam.num_parallel_tree; ++i) {
-      if (tparam.process_type == kDefault) {
+    for (int i = 0; i < tparam_.num_parallel_tree; ++i) {
+      if (tparam_.process_type == kDefault) {
        // create new tree
        std::unique_ptr<RegTree> ptr(new RegTree());
-        ptr->param.InitAllowUnknown(this->cfg);
+        ptr->param.InitAllowUnknown(this->cfg_);
        ptr->InitModel();
        new_trees.push_back(ptr.get());
        ret->push_back(std::move(ptr));
-      } else if (tparam.process_type == kUpdate) {
+      } else if (tparam_.process_type == kUpdate) {
        CHECK_LT(model_.trees.size(), model_.trees_to_update.size());
        // move an existing tree from trees_to_update
        auto t = std::move(model_.trees_to_update[model_.trees.size() +
-                           bst_group * tparam.num_parallel_tree + i]);
+                           bst_group * tparam_.num_parallel_tree + i]);
        new_trees.push_back(t.get());
        ret->push_back(std::move(t));
      }
    }
    // update the trees
-    for (auto& up : updaters)
+    for (auto& up : updaters_) {
      up->Update(gpair, p_fmat, new_trees);
+}
  }

  // commit new trees all at once
@ -307,22 +308,22 @@ class GBTree : public GradientBooster {
      num_new_trees += new_trees[gid].size();
      model_.CommitModel(std::move(new_trees[gid]), gid);
    }
-    predictor->UpdatePredictionCache(model_, &updaters, num_new_trees);
+    predictor_->UpdatePredictionCache(model_, &updaters_, num_new_trees);
  }

  // --- data structure ---
  GBTreeModel model_;
  // training parameter
-  GBTreeTrainParam tparam;
+  GBTreeTrainParam tparam_;
  // ----training fields----
  // configurations for tree
-  std::vector<std::pair<std::string, std::string> > cfg;
+  std::vector<std::pair<std::string, std::string> > cfg_;
  // the updaters that can be applied to each of tree
-  std::vector<std::unique_ptr<TreeUpdater>> updaters;
+  std::vector<std::unique_ptr<TreeUpdater>> updaters_;
  // Cached matrices
  std::vector<std::shared_ptr<DMatrix>> cache_;
-  std::unique_ptr<Predictor> predictor;
-  common::Monitor monitor;
+  std::unique_ptr<Predictor> predictor_;
+  common::Monitor monitor_;
 };

 // dart
@ -333,22 +334,22 @@ class Dart : public GBTree {
  void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override {
    GBTree::Configure(cfg);
    if (model_.trees.size() == 0) {
-      dparam.InitAllowUnknown(cfg);
+      dparam_.InitAllowUnknown(cfg);
    }
  }

  void Load(dmlc::Stream* fi) override {
    GBTree::Load(fi);
-    weight_drop.resize(model_.param.num_trees);
+    weight_drop_.resize(model_.param.num_trees);
    if (model_.param.num_trees != 0) {
-      fi->Read(&weight_drop);
+      fi->Read(&weight_drop_);
    }
  }

  void Save(dmlc::Stream* fo) const override {
    GBTree::Save(fo);
-    if (weight_drop.size() != 0) {
-      fo->Write(weight_drop);
+    if (weight_drop_.size() != 0) {
+      fo->Write(weight_drop_);
    }
  }

@ -357,7 +358,7 @@ class Dart : public GBTree {
                    HostDeviceVector<bst_float>* out_preds,
                    unsigned ntree_limit) override {
    DropTrees(ntree_limit);
-    PredLoopInternal<Dart>(p_fmat, &out_preds->data_h(), 0, ntree_limit, true);
+    PredLoopInternal<Dart>(p_fmat, &out_preds->HostVector(), 0, ntree_limit, true);
  }

  void PredictInstance(const SparseBatch::Inst& inst,
@ -365,9 +366,9 @@ class Dart : public GBTree {
               unsigned ntree_limit,
               unsigned root_index) override {
    DropTrees(1);
-    if (thread_temp.size() == 0) {
-      thread_temp.resize(1, RegTree::FVec());
-      thread_temp[0].Init(model_.param.num_feature);
+    if (thread_temp_.size() == 0) {
+      thread_temp_.resize(1, RegTree::FVec());
+      thread_temp_[0].Init(model_.param.num_feature);
    }
    out_preds->resize(model_.param.num_output_group);
    ntree_limit *= model_.param.num_output_group;
@ -378,7 +379,7 @@ class Dart : public GBTree {
    for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
      (*out_preds)[gid]
          = PredValue(inst, gid, root_index,
-                      &thread_temp[0], 0, ntree_limit) + model_.base_margin;
+                      &thread_temp_[0], 0, ntree_limit) + model_.base_margin;
    }
  }

@ -400,8 +401,8 @@ class Dart : public GBTree {
    }

    if (init_out_preds) {
-      size_t n = num_group * p_fmat->info().num_row;
-      const std::vector<bst_float>& base_margin = p_fmat->info().base_margin;
+      size_t n = num_group * p_fmat->Info().num_row_;
+      const std::vector<bst_float>& base_margin = p_fmat->Info().base_margin_;
      out_preds->resize(n);
      if (base_margin.size() != 0) {
        CHECK_EQ(out_preds->size(), n);
@ -427,37 +428,37 @@ class Dart : public GBTree {
      int num_group,
      unsigned tree_begin,
      unsigned tree_end) {
-    const MetaInfo& info = p_fmat->info();
+    const MetaInfo& info = p_fmat->Info();
    const int nthread = omp_get_max_threads();
    CHECK_EQ(num_group, model_.param.num_output_group);
    InitThreadTemp(nthread);
    std::vector<bst_float>& preds = *out_preds;
    CHECK_EQ(model_.param.size_leaf_vector, 0)
        << "size_leaf_vector is enforced to 0 so far";
-    CHECK_EQ(preds.size(), p_fmat->info().num_row * num_group);
+    CHECK_EQ(preds.size(), p_fmat->Info().num_row_ * num_group);
    // start collecting the prediction
    dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator();
-    Derived* self = static_cast<Derived*>(this);
+    auto* self = static_cast<Derived*>(this);
    iter->BeforeFirst();
    while (iter->Next()) {
      const RowBatch &batch = iter->Value();
      // parallel over local batch
-      const int K = 8;
-      const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
-      const bst_omp_uint rest = nsize % K;
+      constexpr int kUnroll = 8;
+      const auto nsize = static_cast<bst_omp_uint>(batch.size);
+      const bst_omp_uint rest = nsize % kUnroll;
      #pragma omp parallel for schedule(static)
-      for (bst_omp_uint i = 0; i < nsize - rest; i += K) {
+      for (bst_omp_uint i = 0; i < nsize - rest; i += kUnroll) {
        const int tid = omp_get_thread_num();
-        RegTree::FVec& feats = thread_temp[tid];
-        int64_t ridx[K];
-        RowBatch::Inst inst[K];
-        for (int k = 0; k < K; ++k) {
+        RegTree::FVec& feats = thread_temp_[tid];
+        int64_t ridx[kUnroll];
+        RowBatch::Inst inst[kUnroll];
+        for (int k = 0; k < kUnroll; ++k) {
          ridx[k] = static_cast<int64_t>(batch.base_rowid + i + k);
        }
-        for (int k = 0; k < K; ++k) {
+        for (int k = 0; k < kUnroll; ++k) {
          inst[k] = batch[i + k];
        }
-        for (int k = 0; k < K; ++k) {
+        for (int k = 0; k < kUnroll; ++k) {
          for (int gid = 0; gid < num_group; ++gid) {
            const size_t offset = ridx[k] * num_group + gid;
            preds[offset] +=
@ -467,8 +468,8 @@ class Dart : public GBTree {
        }
      }
      for (bst_omp_uint i = nsize - rest; i < nsize; ++i) {
-        RegTree::FVec& feats = thread_temp[0];
-        const int64_t ridx = static_cast<int64_t>(batch.base_rowid + i);
+        RegTree::FVec& feats = thread_temp_[0];
+        const auto ridx = static_cast<int64_t>(batch.base_rowid + i);
        const RowBatch::Inst inst = batch[i];
        for (int gid = 0; gid < num_group; ++gid) {
          const size_t offset = ridx * num_group + gid;
@ -489,9 +490,9 @@ class Dart : public GBTree {
      model_.CommitModel(std::move(new_trees[gid]), gid);
    }
    size_t num_drop = NormalizeTrees(num_new_trees);
-    if (dparam.silent != 1) {
+    if (dparam_.silent != 1) {
      LOG(INFO) << "drop " << num_drop << " trees, "
-                << "weight = " << weight_drop.back();
+                << "weight = " << weight_drop_.back();
    }
  }

@ -506,10 +507,10 @@ class Dart : public GBTree {
    p_feats->Fill(inst);
    for (size_t i = tree_begin; i < tree_end; ++i) {
      if (model_.tree_info[i] == bst_group) {
-        bool drop = (std::binary_search(idx_drop.begin(), idx_drop.end(), i));
+        bool drop = (std::binary_search(idx_drop_.begin(), idx_drop_.end(), i));
        if (!drop) {
          int tid = model_.trees[i]->GetLeafIndex(*p_feats, root_index);
-          psum += weight_drop[i] * (*model_.trees[i])[tid].leaf_value();
+          psum += weight_drop_[i] * (*model_.trees[i])[tid].LeafValue();
        }
      }
    }
@ -519,45 +520,45 @@ class Dart : public GBTree {

  // select which trees to drop
  inline void DropTrees(unsigned ntree_limit_drop) {
-    idx_drop.clear();
+    idx_drop_.clear();
    if (ntree_limit_drop > 0) return;

    std::uniform_real_distribution<> runif(0.0, 1.0);
    auto& rnd = common::GlobalRandom();
    bool skip = false;
-    if (dparam.skip_drop > 0.0) skip = (runif(rnd) < dparam.skip_drop);
+    if (dparam_.skip_drop > 0.0) skip = (runif(rnd) < dparam_.skip_drop);
    // sample some trees to drop
    if (!skip) {
-      if (dparam.sample_type == 1) {
+      if (dparam_.sample_type == 1) {
        bst_float sum_weight = 0.0;
-        for (size_t i = 0; i < weight_drop.size(); ++i) {
-          sum_weight += weight_drop[i];
+        for (auto elem : weight_drop_) {
+          sum_weight += elem;
        }
-        for (size_t i = 0; i < weight_drop.size(); ++i) {
-          if (runif(rnd) < dparam.rate_drop * weight_drop.size() * weight_drop[i] / sum_weight) {
-            idx_drop.push_back(i);
+        for (size_t i = 0; i < weight_drop_.size(); ++i) {
+          if (runif(rnd) < dparam_.rate_drop * weight_drop_.size() * weight_drop_[i] / sum_weight) {
+            idx_drop_.push_back(i);
          }
        }
-        if (dparam.one_drop && idx_drop.empty() && !weight_drop.empty()) {
+        if (dparam_.one_drop && idx_drop_.empty() && !weight_drop_.empty()) {
          // the expression below is an ugly but MSVC2013-friendly equivalent of
          // size_t i = std::discrete_distribution<size_t>(weight_drop.begin(),
          //                                               weight_drop.end())(rnd);
          size_t i = std::discrete_distribution<size_t>(
-            weight_drop.size(), 0., static_cast<double>(weight_drop.size()),
+            weight_drop_.size(), 0., static_cast<double>(weight_drop_.size()),
            [this](double x) -> double {
-              return weight_drop[static_cast<size_t>(x)];
+              return weight_drop_[static_cast<size_t>(x)];
            })(rnd);
-          idx_drop.push_back(i);
+          idx_drop_.push_back(i);
        }
      } else {
-        for (size_t i = 0; i < weight_drop.size(); ++i) {
-          if (runif(rnd) < dparam.rate_drop) {
-            idx_drop.push_back(i);
+        for (size_t i = 0; i < weight_drop_.size(); ++i) {
+          if (runif(rnd) < dparam_.rate_drop) {
+            idx_drop_.push_back(i);
          }
        }
-        if (dparam.one_drop && idx_drop.empty() && !weight_drop.empty()) {
-          size_t i = std::uniform_int_distribution<size_t>(0, weight_drop.size() - 1)(rnd);
-          idx_drop.push_back(i);
+        if (dparam_.one_drop && idx_drop_.empty() && !weight_drop_.empty()) {
+          size_t i = std::uniform_int_distribution<size_t>(0, weight_drop_.size() - 1)(rnd);
+          idx_drop_.push_back(i);
        }
      }
    }
@ -565,58 +566,58 @@ class Dart : public GBTree {

  // set normalization factors
  inline size_t NormalizeTrees(size_t size_new_trees) {
-    float lr = 1.0 * dparam.learning_rate / size_new_trees;
-    size_t num_drop = idx_drop.size();
+    float lr = 1.0 * dparam_.learning_rate / size_new_trees;
+    size_t num_drop = idx_drop_.size();
    if (num_drop == 0) {
      for (size_t i = 0; i < size_new_trees; ++i) {
-        weight_drop.push_back(1.0);
+        weight_drop_.push_back(1.0);
      }
    } else {
-      if (dparam.normalize_type == 1) {
+      if (dparam_.normalize_type == 1) {
        // normalize_type 1
        float factor = 1.0 / (1.0 + lr);
-        for (size_t i = 0; i < idx_drop.size(); ++i) {
-          weight_drop[idx_drop[i]] *= factor;
+        for (auto i : idx_drop_) {
+          weight_drop_[i] *= factor;
        }
        for (size_t i = 0; i < size_new_trees; ++i) {
-          weight_drop.push_back(factor);
+          weight_drop_.push_back(factor);
        }
      } else {
        // normalize_type 0
        float factor = 1.0 * num_drop / (num_drop + lr);
-        for (size_t i = 0; i < idx_drop.size(); ++i) {
-          weight_drop[idx_drop[i]] *= factor;
+        for (auto i : idx_drop_) {
+          weight_drop_[i] *= factor;
        }
        for (size_t i = 0; i < size_new_trees; ++i) {
-          weight_drop.push_back(1.0 / (num_drop + lr));
+          weight_drop_.push_back(1.0 / (num_drop + lr));
        }
      }
    }
    // reset
-    idx_drop.clear();
+    idx_drop_.clear();
    return num_drop;
  }

  // init thread buffers
  inline void InitThreadTemp(int nthread) {
-    int prev_thread_temp_size = thread_temp.size();
+    int prev_thread_temp_size = thread_temp_.size();
    if (prev_thread_temp_size < nthread) {
-      thread_temp.resize(nthread, RegTree::FVec());
+      thread_temp_.resize(nthread, RegTree::FVec());
      for (int i = prev_thread_temp_size; i < nthread; ++i) {
-        thread_temp[i].Init(model_.param.num_feature);
+        thread_temp_[i].Init(model_.param.num_feature);
      }
    }
  }

  // --- data structure ---
  // training parameter
-  DartTrainParam dparam;
+  DartTrainParam dparam_;
  /*! \brief prediction buffer */
-  std::vector<bst_float> weight_drop;
+  std::vector<bst_float> weight_drop_;
  // indexes of dropped trees
-  std::vector<size_t> idx_drop;
+  std::vector<size_t> idx_drop_;
  // temporal storage for per thread
-  std::vector<RegTree::FVec> thread_temp;
+  std::vector<RegTree::FVec> thread_temp_;
 };

 // register the objective functions
@ -627,7 +628,7 @@ DMLC_REGISTER_PARAMETER(DartTrainParam);
 XGBOOST_REGISTER_GBM(GBTree, "gbtree")
 .describe("Tree booster, gradient boosted trees.")
 .set_body([](const std::vector<std::shared_ptr<DMatrix> >& cached_mats, bst_float base_margin) {
-    GBTree* p = new GBTree(base_margin);
+    auto* p = new GBTree(base_margin);
    p->InitCache(cached_mats);
    return p;
  });
--- a/src/gbm/gbtree_model.h
+++ b/src/gbm/gbtree_model.h
@ -70,8 +70,8 @@ struct GBTreeModel {

  void InitTreesToUpdate() {
    if (trees_to_update.size() == 0u) {
-      for (size_t i = 0; i < trees.size(); ++i) {
-        trees_to_update.push_back(std::move(trees[i]));
+      for (auto & tree : trees) {
+        trees_to_update.push_back(std::move(tree));
      }
      trees.clear();
      param.num_trees = 0;
@ -100,8 +100,8 @@ struct GBTreeModel {
  void Save(dmlc::Stream* fo) const {
    CHECK_EQ(param.num_trees, static_cast<int>(trees.size()));
    fo->Write(&param, sizeof(param));
-    for (size_t i = 0; i < trees.size(); ++i) {
-      trees[i]->Save(fo);
+    for (const auto & tree : trees) {
+      tree->Save(fo);
    }
    if (tree_info.size() != 0) {
      fo->Write(dmlc::BeginPtr(tree_info), sizeof(int) * tree_info.size());
@ -111,15 +111,15 @@ struct GBTreeModel {
  std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,
                                     std::string format) const {
    std::vector<std::string> dump;
-    for (size_t i = 0; i < trees.size(); i++) {
-      dump.push_back(trees[i]->DumpModel(fmap, with_stats, format));
+    for (const auto & tree : trees) {
+      dump.push_back(tree->DumpModel(fmap, with_stats, format));
    }
    return dump;
  }
  void CommitModel(std::vector<std::unique_ptr<RegTree> >&& new_trees,
                   int bst_group) {
-    for (size_t i = 0; i < new_trees.size(); ++i) {
-      trees.push_back(std::move(new_trees[i]));
+    for (auto & new_tree : new_trees) {
+      trees.push_back(std::move(new_tree));
      tree_info.push_back(bst_group);
    }
    param.num_trees += static_cast<int>(new_trees.size());
--- a/src/learner.cc
+++ b/src/learner.cc
@ -141,8 +141,8 @@ DMLC_REGISTER_PARAMETER(LearnerTrainParam);
 */
 class LearnerImpl : public Learner {
 public:
-  explicit LearnerImpl(const std::vector<std::shared_ptr<DMatrix> >& cache)
-      : cache_(cache) {
+  explicit LearnerImpl(std::vector<std::shared_ptr<DMatrix> >  cache)
+      : cache_(std::move(cache)) {
    // boosted tree
    name_obj_ = "reg:linear";
    name_gbm_ = "gbtree";
@ -155,25 +155,25 @@ class LearnerImpl : public Learner {
  }

  void ConfigureUpdaters() {
-    if (tparam.tree_method == 0 || tparam.tree_method == 1 ||
-        tparam.tree_method == 2) {
+    if (tparam_.tree_method == 0 || tparam_.tree_method == 1 ||
+        tparam_.tree_method == 2) {
      if (cfg_.count("updater") == 0) {
-        if (tparam.dsplit == 1) {
+        if (tparam_.dsplit == 1) {
          cfg_["updater"] = "distcol";
-        } else if (tparam.dsplit == 2) {
+        } else if (tparam_.dsplit == 2) {
          cfg_["updater"] = "grow_histmaker,prune";
        }
-        if (tparam.prob_buffer_row != 1.0f) {
+        if (tparam_.prob_buffer_row != 1.0f) {
          cfg_["updater"] = "grow_histmaker,refresh,prune";
        }
      }
-    } else if (tparam.tree_method == 3) {
+    } else if (tparam_.tree_method == 3) {
      /* histogram-based algorithm */
      LOG(CONSOLE) << "Tree method is selected to be \'hist\', which uses a "
                      "single updater "
                   << "grow_fast_histmaker.";
      cfg_["updater"] = "grow_fast_histmaker";
-    } else if (tparam.tree_method == 4) {
+    } else if (tparam_.tree_method == 4) {
      this->AssertGPUSupport();
      if (cfg_.count("updater") == 0) {
        cfg_["updater"] = "grow_gpu,prune";
@ -181,7 +181,7 @@ class LearnerImpl : public Learner {
      if (cfg_.count("predictor") == 0) {
        cfg_["predictor"] = "gpu_predictor";
      }
-    } else if (tparam.tree_method == 5) {
+    } else if (tparam_.tree_method == 5) {
      this->AssertGPUSupport();
      if (cfg_.count("updater") == 0) {
        cfg_["updater"] = "grow_gpu_hist";
@ -195,8 +195,8 @@ class LearnerImpl : public Learner {
  void Configure(
      const std::vector<std::pair<std::string, std::string> >& args) override {
    // add to configurations
-    tparam.InitAllowUnknown(args);
-    monitor.Init("Learner", tparam.debug_verbose);
+    tparam_.InitAllowUnknown(args);
+    monitor_.Init("Learner", tparam_.debug_verbose);
    cfg_.clear();
    for (const auto& kv : args) {
      if (kv.first == "eval_metric") {
@ -206,20 +206,20 @@ class LearnerImpl : public Learner {
        };
        if (std::all_of(metrics_.begin(), metrics_.end(), dup_check)) {
          metrics_.emplace_back(Metric::Create(kv.second));
-          mparam.contain_eval_metrics = 1;
+          mparam_.contain_eval_metrics = 1;
        }
      } else {
        cfg_[kv.first] = kv.second;
      }
    }
-    if (tparam.nthread != 0) {
-      omp_set_num_threads(tparam.nthread);
+    if (tparam_.nthread != 0) {
+      omp_set_num_threads(tparam_.nthread);
    }

    // add additional parameters
    // These are cosntraints that need to be satisfied.
-    if (tparam.dsplit == 0 && rabit::IsDistributed()) {
-      tparam.dsplit = 2;
+    if (tparam_.dsplit == 0 && rabit::IsDistributed()) {
+      tparam_.dsplit = 2;
    }

    if (cfg_.count("num_class") != 0) {
@ -244,21 +244,21 @@ class LearnerImpl : public Learner {
    }

    if (!this->ModelInitialized()) {
-      mparam.InitAllowUnknown(args);
+      mparam_.InitAllowUnknown(args);
      name_obj_ = cfg_["objective"];
      name_gbm_ = cfg_["booster"];
      // set seed only before the model is initialized
-      common::GlobalRandom().seed(tparam.seed);
+      common::GlobalRandom().seed(tparam_.seed);
    }

    // set number of features correctly.
-    cfg_["num_feature"] = common::ToString(mparam.num_feature);
-    cfg_["num_class"] = common::ToString(mparam.num_class);
+    cfg_["num_feature"] = common::ToString(mparam_.num_feature);
+    cfg_["num_class"] = common::ToString(mparam_.num_class);

-    if (gbm_.get() != nullptr) {
+    if (gbm_ != nullptr) {
      gbm_->Configure(cfg_.begin(), cfg_.end());
    }
-    if (obj_.get() != nullptr) {
+    if (obj_ != nullptr) {
      obj_->Configure(cfg_.begin(), cfg_.end());
    }
  }
@ -281,7 +281,7 @@ class LearnerImpl : public Learner {
    // use the peekable reader.
    fi = &fp;
    // read parameter
-    CHECK_EQ(fi->Read(&mparam, sizeof(mparam)), sizeof(mparam))
+    CHECK_EQ(fi->Read(&mparam_, sizeof(mparam_)), sizeof(mparam_))
        << "BoostLearner: wrong model format";
    {
      // backward compatibility code for compatible with old model type
@ -303,9 +303,9 @@ class LearnerImpl : public Learner {
    CHECK(fi->Read(&name_gbm_)) << "BoostLearner: wrong model format";
    // duplicated code with LazyInitModel
    obj_.reset(ObjFunction::Create(name_obj_));
-    gbm_.reset(GradientBooster::Create(name_gbm_, cache_, mparam.base_score));
+    gbm_.reset(GradientBooster::Create(name_gbm_, cache_, mparam_.base_score));
    gbm_->Load(fi);
-    if (mparam.contain_extra_attrs != 0) {
+    if (mparam_.contain_extra_attrs != 0) {
      std::vector<std::pair<std::string, std::string> > attr;
      fi->Read(&attr);
      attributes_ =
@ -316,35 +316,35 @@ class LearnerImpl : public Learner {
      fi->Read(&max_delta_step);
      cfg_["max_delta_step"] = max_delta_step;
    }
-    if (mparam.contain_eval_metrics != 0) {
+    if (mparam_.contain_eval_metrics != 0) {
      std::vector<std::string> metr;
      fi->Read(&metr);
      for (auto name : metr) {
        metrics_.emplace_back(Metric::Create(name));
      }
    }
-    cfg_["num_class"] = common::ToString(mparam.num_class);
-    cfg_["num_feature"] = common::ToString(mparam.num_feature);
+    cfg_["num_class"] = common::ToString(mparam_.num_class);
+    cfg_["num_feature"] = common::ToString(mparam_.num_feature);
    obj_->Configure(cfg_.begin(), cfg_.end());
  }

  // rabit save model to rabit checkpoint
  void Save(dmlc::Stream* fo) const override {
-    fo->Write(&mparam, sizeof(LearnerModelParam));
+    fo->Write(&mparam_, sizeof(LearnerModelParam));
    fo->Write(name_obj_);
    fo->Write(name_gbm_);
    gbm_->Save(fo);
-    if (mparam.contain_extra_attrs != 0) {
+    if (mparam_.contain_extra_attrs != 0) {
      std::vector<std::pair<std::string, std::string> > attr(
          attributes_.begin(), attributes_.end());
      fo->Write(attr);
    }
    if (name_obj_ == "count:poisson") {
-      std::map<std::string, std::string>::const_iterator it =
+      auto it =
          cfg_.find("max_delta_step");
      if (it != cfg_.end()) fo->Write(it->second);
    }
-    if (mparam.contain_eval_metrics != 0) {
+    if (mparam_.contain_eval_metrics != 0) {
      std::vector<std::string> metr;
      for (auto& ev : metrics_) {
        metr.emplace_back(ev->Name());
@ -354,37 +354,37 @@ class LearnerImpl : public Learner {
  }

  void UpdateOneIter(int iter, DMatrix* train) override {
-    monitor.Start("UpdateOneIter");
+    monitor_.Start("UpdateOneIter");
    CHECK(ModelInitialized())
        << "Always call InitModel or LoadModel before update";
-    if (tparam.seed_per_iteration || rabit::IsDistributed()) {
-      common::GlobalRandom().seed(tparam.seed * kRandSeedMagic + iter);
+    if (tparam_.seed_per_iteration || rabit::IsDistributed()) {
+      common::GlobalRandom().seed(tparam_.seed * kRandSeedMagic + iter);
    }
    this->LazyInitDMatrix(train);
-    monitor.Start("PredictRaw");
+    monitor_.Start("PredictRaw");
    this->PredictRaw(train, &preds_);
-    monitor.Stop("PredictRaw");
-    monitor.Start("GetGradient");
-    obj_->GetGradient(&preds_, train->info(), iter, &gpair_);
-    monitor.Stop("GetGradient");
+    monitor_.Stop("PredictRaw");
+    monitor_.Start("GetGradient");
+    obj_->GetGradient(&preds_, train->Info(), iter, &gpair_);
+    monitor_.Stop("GetGradient");
    gbm_->DoBoost(train, &gpair_, obj_.get());
-    monitor.Stop("UpdateOneIter");
+    monitor_.Stop("UpdateOneIter");
  }

  void BoostOneIter(int iter, DMatrix* train,
-                    HostDeviceVector<bst_gpair>* in_gpair) override {
-    monitor.Start("BoostOneIter");
-    if (tparam.seed_per_iteration || rabit::IsDistributed()) {
-      common::GlobalRandom().seed(tparam.seed * kRandSeedMagic + iter);
+                    HostDeviceVector<GradientPair>* in_gpair) override {
+    monitor_.Start("BoostOneIter");
+    if (tparam_.seed_per_iteration || rabit::IsDistributed()) {
+      common::GlobalRandom().seed(tparam_.seed * kRandSeedMagic + iter);
    }
    this->LazyInitDMatrix(train);
    gbm_->DoBoost(train, in_gpair);
-    monitor.Stop("BoostOneIter");
+    monitor_.Stop("BoostOneIter");
  }

  std::string EvalOneIter(int iter, const std::vector<DMatrix*>& data_sets,
                          const std::vector<std::string>& data_names) override {
-    monitor.Start("EvalOneIter");
+    monitor_.Start("EvalOneIter");
    std::ostringstream os;
    os << '[' << iter << ']' << std::setiosflags(std::ios::fixed);
    if (metrics_.size() == 0) {
@ -395,17 +395,17 @@ class LearnerImpl : public Learner {
      obj_->EvalTransform(&preds_);
      for (auto& ev : metrics_) {
        os << '\t' << data_names[i] << '-' << ev->Name() << ':'
-           << ev->Eval(preds_.data_h(), data_sets[i]->info(), tparam.dsplit == 2);
+           << ev->Eval(preds_.HostVector(), data_sets[i]->Info(), tparam_.dsplit == 2);
      }
    }

-    monitor.Stop("EvalOneIter");
+    monitor_.Stop("EvalOneIter");
    return os.str();
  }

  void SetAttr(const std::string& key, const std::string& value) override {
    attributes_[key] = value;
-    mparam.contain_extra_attrs = 1;
+    mparam_.contain_extra_attrs = 1;
  }

  bool GetAttr(const std::string& key, std::string* out) const override {
@ -438,7 +438,7 @@ class LearnerImpl : public Learner {
    this->PredictRaw(data, &preds_);
    obj_->EvalTransform(&preds_);
    return std::make_pair(metric,
-                          ev->Eval(preds_.data_h(), data->info(), tparam.dsplit == 2));
+                          ev->Eval(preds_.HostVector(), data->Info(), tparam_.dsplit == 2));
  }

  void Predict(DMatrix* data, bool output_margin,
@ -446,12 +446,12 @@ class LearnerImpl : public Learner {
               bool pred_leaf, bool pred_contribs, bool approx_contribs,
               bool pred_interactions) const override {
    if (pred_contribs) {
-      gbm_->PredictContribution(data, &out_preds->data_h(), ntree_limit, approx_contribs);
+      gbm_->PredictContribution(data, &out_preds->HostVector(), ntree_limit, approx_contribs);
    } else if (pred_interactions) {
-      gbm_->PredictInteractionContributions(data, &out_preds->data_h(), ntree_limit,
+      gbm_->PredictInteractionContributions(data, &out_preds->HostVector(), ntree_limit,
                                            approx_contribs);
    } else if (pred_leaf) {
-      gbm_->PredictLeaf(data, &out_preds->data_h(), ntree_limit);
+      gbm_->PredictLeaf(data, &out_preds->HostVector(), ntree_limit);
    } else {
      this->PredictRaw(data, out_preds, ntree_limit);
      if (!output_margin) {
@ -464,21 +464,21 @@ class LearnerImpl : public Learner {
  // check if p_train is ready to used by training.
  // if not, initialize the column access.
  inline void LazyInitDMatrix(DMatrix* p_train) {
-    if (tparam.tree_method == 3 || tparam.tree_method == 4 ||
-        tparam.tree_method == 5 || name_gbm_ == "gblinear") {
+    if (tparam_.tree_method == 3 || tparam_.tree_method == 4 ||
+        tparam_.tree_method == 5 || name_gbm_ == "gblinear") {
      return;
    }

-    monitor.Start("LazyInitDMatrix");
+    monitor_.Start("LazyInitDMatrix");
    if (!p_train->HaveColAccess(true)) {
-      int ncol = static_cast<int>(p_train->info().num_col);
+      auto ncol = static_cast<int>(p_train->Info().num_col_);
      std::vector<bool> enabled(ncol, true);
      // set max row per batch to limited value
      // in distributed mode, use safe choice otherwise
-      size_t max_row_perbatch = tparam.max_row_perbatch;
-      const size_t safe_max_row = static_cast<size_t>(32ul << 10ul);
+      size_t max_row_perbatch = tparam_.max_row_perbatch;
+      const auto safe_max_row = static_cast<size_t>(32ul << 10ul);

-      if (tparam.tree_method == 0 && p_train->info().num_row >= (4UL << 20UL)) {
+      if (tparam_.tree_method == 0 && p_train->Info().num_row_ >= (4UL << 20UL)) {
        LOG(CONSOLE)
            << "Tree method is automatically selected to be \'approx\'"
            << " for faster speed."
@ -487,57 +487,57 @@ class LearnerImpl : public Learner {
        max_row_perbatch = std::min(max_row_perbatch, safe_max_row);
      }

-      if (tparam.tree_method == 1) {
+      if (tparam_.tree_method == 1) {
        LOG(CONSOLE) << "Tree method is selected to be \'approx\'";
        max_row_perbatch = std::min(max_row_perbatch, safe_max_row);
      }

-      if (tparam.test_flag == "block" || tparam.dsplit == 2) {
+      if (tparam_.test_flag == "block" || tparam_.dsplit == 2) {
        max_row_perbatch = std::min(max_row_perbatch, safe_max_row);
      }
      // initialize column access
-      p_train->InitColAccess(enabled, tparam.prob_buffer_row, max_row_perbatch, true);
+      p_train->InitColAccess(enabled, tparam_.prob_buffer_row, max_row_perbatch, true);
    }

    if (!p_train->SingleColBlock() && cfg_.count("updater") == 0) {
-      if (tparam.tree_method == 2) {
+      if (tparam_.tree_method == 2) {
        LOG(CONSOLE) << "tree method is set to be 'exact',"
                     << " but currently we are only able to proceed with "
                        "approximate algorithm";
      }
      cfg_["updater"] = "grow_histmaker,prune";
-      if (gbm_.get() != nullptr) {
+      if (gbm_ != nullptr) {
        gbm_->Configure(cfg_.begin(), cfg_.end());
      }
    }
-    monitor.Stop("LazyInitDMatrix");
+    monitor_.Stop("LazyInitDMatrix");
  }

  // return whether model is already initialized.
-  inline bool ModelInitialized() const { return gbm_.get() != nullptr; }
+  inline bool ModelInitialized() const { return gbm_ != nullptr; }
  // lazily initialize the model if it haven't yet been initialized.
  inline void LazyInitModel() {
    if (this->ModelInitialized()) return;
    // estimate feature bound
    unsigned num_feature = 0;
-    for (size_t i = 0; i < cache_.size(); ++i) {
-      CHECK(cache_[i] != nullptr);
+    for (auto & matrix : cache_) {
+      CHECK(matrix != nullptr);
      num_feature = std::max(num_feature,
-                             static_cast<unsigned>(cache_[i]->info().num_col));
+                             static_cast<unsigned>(matrix->Info().num_col_));
    }
    // run allreduce on num_feature to find the maximum value
    rabit::Allreduce<rabit::op::Max>(&num_feature, 1);
-    if (num_feature > mparam.num_feature) {
-      mparam.num_feature = num_feature;
+    if (num_feature > mparam_.num_feature) {
+      mparam_.num_feature = num_feature;
    }
    // setup
-    cfg_["num_feature"] = common::ToString(mparam.num_feature);
-    CHECK(obj_.get() == nullptr && gbm_.get() == nullptr);
+    cfg_["num_feature"] = common::ToString(mparam_.num_feature);
+    CHECK(obj_ == nullptr && gbm_ == nullptr);
    obj_.reset(ObjFunction::Create(name_obj_));
    obj_->Configure(cfg_.begin(), cfg_.end());
    // reset the base score
-    mparam.base_score = obj_->ProbToMargin(mparam.base_score);
-    gbm_.reset(GradientBooster::Create(name_gbm_, cache_, mparam.base_score));
+    mparam_.base_score = obj_->ProbToMargin(mparam_.base_score);
+    gbm_.reset(GradientBooster::Create(name_gbm_, cache_, mparam_.base_score));
    gbm_->Configure(cfg_.begin(), cfg_.end());
  }
  /*!
@ -549,15 +549,15 @@ class LearnerImpl : public Learner {
   */
  inline void PredictRaw(DMatrix* data, HostDeviceVector<bst_float>* out_preds,
                         unsigned ntree_limit = 0) const {
-    CHECK(gbm_.get() != nullptr)
+    CHECK(gbm_ != nullptr)
        << "Predict must happen after Load or InitModel";
    gbm_->PredictBatch(data, out_preds, ntree_limit);
  }

  // model parameter
-  LearnerModelParam mparam;
+  LearnerModelParam mparam_;
  // training parameter
-  LearnerTrainParam tparam;
+  LearnerTrainParam tparam_;
  // configurations
  std::map<std::string, std::string> cfg_;
  // attributes
@ -569,7 +569,7 @@ class LearnerImpl : public Learner {
  // temporal storages for prediction
  HostDeviceVector<bst_float> preds_;
  // gradient pairs
-  HostDeviceVector<bst_gpair> gpair_;
+  HostDeviceVector<GradientPair> gpair_;

 private:
  /*! \brief random number transformation seed. */
@ -577,7 +577,7 @@ class LearnerImpl : public Learner {
  // internal cached dmatrix
  std::vector<std::shared_ptr<DMatrix> > cache_;

-  common::Monitor monitor;
+  common::Monitor monitor_;
 };

 Learner* Learner::Create(
--- a/src/linear/coordinate_common.h
+++ b/src/linear/coordinate_common.h
@ -62,14 +62,14 @@ inline double CoordinateDeltaBias(double sum_grad, double sum_hess) {
 * \return  The gradient and diagonal Hessian entry for a given feature.
 */
 inline std::pair<double, double> GetGradient(int group_idx, int num_group, int fidx,
-                                             const std::vector<bst_gpair> &gpair,
+                                             const std::vector<GradientPair> &gpair,
                                             DMatrix *p_fmat) {
  double sum_grad = 0.0, sum_hess = 0.0;
  dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator({static_cast<bst_uint>(fidx)});
  while (iter->Next()) {
    const ColBatch &batch = iter->Value();
    ColBatch::Inst col = batch[0];
-    const bst_omp_uint ndata = static_cast<bst_omp_uint>(col.length);
+    const auto ndata = static_cast<bst_omp_uint>(col.length);
    for (bst_omp_uint j = 0; j < ndata; ++j) {
      const bst_float v = col[j].fvalue;
      auto &p = gpair[col[j].index * num_group + group_idx];
@ -93,14 +93,14 @@ inline std::pair<double, double> GetGradient(int group_idx, int num_group, int f
 * \return  The gradient and diagonal Hessian entry for a given feature.
 */
 inline std::pair<double, double> GetGradientParallel(int group_idx, int num_group, int fidx,
-                                                     const std::vector<bst_gpair> &gpair,
+                                                     const std::vector<GradientPair> &gpair,
                                                     DMatrix *p_fmat) {
  double sum_grad = 0.0, sum_hess = 0.0;
  dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator({static_cast<bst_uint>(fidx)});
  while (iter->Next()) {
    const ColBatch &batch = iter->Value();
    ColBatch::Inst col = batch[0];
-    const bst_omp_uint ndata = static_cast<bst_omp_uint>(col.length);
+    const auto ndata = static_cast<bst_omp_uint>(col.length);
 #pragma omp parallel for schedule(static) reduction(+ : sum_grad, sum_hess)
    for (bst_omp_uint j = 0; j < ndata; ++j) {
      const bst_float v = col[j].fvalue;
@ -124,11 +124,11 @@ inline std::pair<double, double> GetGradientParallel(int group_idx, int num_grou
 * \return  The gradient and diagonal Hessian entry for the bias.
 */
 inline std::pair<double, double> GetBiasGradientParallel(int group_idx, int num_group,
-                                                         const std::vector<bst_gpair> &gpair,
+                                                         const std::vector<GradientPair> &gpair,
                                                         DMatrix *p_fmat) {
-  const RowSet &rowset = p_fmat->buffered_rowset();
+  const RowSet &rowset = p_fmat->BufferedRowset();
  double sum_grad = 0.0, sum_hess = 0.0;
-  const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
+  const auto ndata = static_cast<bst_omp_uint>(rowset.Size());
 #pragma omp parallel for schedule(static) reduction(+ : sum_grad, sum_hess)
  for (bst_omp_uint i = 0; i < ndata; ++i) {
    auto &p = gpair[rowset[i] * num_group + group_idx];
@ -151,7 +151,7 @@ inline std::pair<double, double> GetBiasGradientParallel(int group_idx, int num_
 * \param p_fmat    The input feature matrix.
 */
 inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
-                                   float dw, std::vector<bst_gpair> *in_gpair,
+                                   float dw, std::vector<GradientPair> *in_gpair,
                                   DMatrix *p_fmat) {
  if (dw == 0.0f) return;
  dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator({static_cast<bst_uint>(fidx)});
@ -159,12 +159,12 @@ inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
    const ColBatch &batch = iter->Value();
    ColBatch::Inst col = batch[0];
    // update grad value
-    const bst_omp_uint num_row = static_cast<bst_omp_uint>(col.length);
+    const auto num_row = static_cast<bst_omp_uint>(col.length);
 #pragma omp parallel for schedule(static)
    for (bst_omp_uint j = 0; j < num_row; ++j) {
-      bst_gpair &p = (*in_gpair)[col[j].index * num_group + group_idx];
+      GradientPair &p = (*in_gpair)[col[j].index * num_group + group_idx];
      if (p.GetHess() < 0.0f) continue;
-      p += bst_gpair(p.GetHess() * col[j].fvalue * dw, 0);
+      p += GradientPair(p.GetHess() * col[j].fvalue * dw, 0);
    }
  }
 }
@ -179,16 +179,16 @@ inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
 * \param p_fmat    The input feature matrix.
 */
 inline void UpdateBiasResidualParallel(int group_idx, int num_group, float dbias,
-                                       std::vector<bst_gpair> *in_gpair,
+                                       std::vector<GradientPair> *in_gpair,
                                       DMatrix *p_fmat) {
  if (dbias == 0.0f) return;
-  const RowSet &rowset = p_fmat->buffered_rowset();
-  const bst_omp_uint ndata = static_cast<bst_omp_uint>(p_fmat->info().num_row);
+  const RowSet &rowset = p_fmat->BufferedRowset();
+  const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);
 #pragma omp parallel for schedule(static)
  for (bst_omp_uint i = 0; i < ndata; ++i) {
-    bst_gpair &g = (*in_gpair)[rowset[i] * num_group + group_idx];
+    GradientPair &g = (*in_gpair)[rowset[i] * num_group + group_idx];
    if (g.GetHess() < 0.0f) continue;
-    g += bst_gpair(g.GetHess() * dbias, 0);
+    g += GradientPair(g.GetHess() * dbias, 0);
  }
 }

@ -201,7 +201,7 @@ class FeatureSelector {
  /*! \brief factory method */
  static FeatureSelector *Create(int choice);
  /*! \brief virtual destructor */
-  virtual ~FeatureSelector() {}
+  virtual ~FeatureSelector() = default;
  /**
   * \brief Setting up the selector state prior to looping through features.
   *
@ -213,7 +213,7 @@ class FeatureSelector {
   * \param param  A parameter with algorithm-dependent use.
   */
  virtual void Setup(const gbm::GBLinearModel &model,
-                     const std::vector<bst_gpair> &gpair,
+                     const std::vector<GradientPair> &gpair,
                     DMatrix *p_fmat,
                     float alpha, float lambda, int param) {}
  /**
@ -232,7 +232,7 @@ class FeatureSelector {
  virtual int NextFeature(int iteration,
                          const gbm::GBLinearModel &model,
                          int group_idx,
-                          const std::vector<bst_gpair> &gpair,
+                          const std::vector<GradientPair> &gpair,
                          DMatrix *p_fmat, float alpha, float lambda) = 0;
 };

@ -242,7 +242,7 @@ class FeatureSelector {
 class CyclicFeatureSelector : public FeatureSelector {
 public:
  int NextFeature(int iteration, const gbm::GBLinearModel &model,
-                  int group_idx, const std::vector<bst_gpair> &gpair,
+                  int group_idx, const std::vector<GradientPair> &gpair,
                  DMatrix *p_fmat, float alpha, float lambda) override {
    return iteration % model.param.num_feature;
  }
@ -255,23 +255,23 @@ class CyclicFeatureSelector : public FeatureSelector {
 class ShuffleFeatureSelector : public FeatureSelector {
 public:
  void Setup(const gbm::GBLinearModel &model,
-             const std::vector<bst_gpair> &gpair,
+             const std::vector<GradientPair> &gpair,
             DMatrix *p_fmat, float alpha, float lambda, int param) override {
-    if (feat_index.size() == 0) {
-      feat_index.resize(model.param.num_feature);
-      std::iota(feat_index.begin(), feat_index.end(), 0);
+    if (feat_index_.size() == 0) {
+      feat_index_.resize(model.param.num_feature);
+      std::iota(feat_index_.begin(), feat_index_.end(), 0);
    }
-    std::shuffle(feat_index.begin(), feat_index.end(), common::GlobalRandom());
+    std::shuffle(feat_index_.begin(), feat_index_.end(), common::GlobalRandom());
  }

  int NextFeature(int iteration, const gbm::GBLinearModel &model,
-                  int group_idx, const std::vector<bst_gpair> &gpair,
+                  int group_idx, const std::vector<GradientPair> &gpair,
                  DMatrix *p_fmat, float alpha, float lambda) override {
-    return feat_index[iteration % model.param.num_feature];
+    return feat_index_[iteration % model.param.num_feature];
  }

 protected:
-  std::vector<bst_uint> feat_index;
+  std::vector<bst_uint> feat_index_;
 };

 /**
@ -281,7 +281,7 @@ class ShuffleFeatureSelector : public FeatureSelector {
 class RandomFeatureSelector : public FeatureSelector {
 public:
  int NextFeature(int iteration, const gbm::GBLinearModel &model,
-                  int group_idx, const std::vector<bst_gpair> &gpair,
+                  int group_idx, const std::vector<GradientPair> &gpair,
                  DMatrix *p_fmat, float alpha, float lambda) override {
    return common::GlobalRandom()() % model.param.num_feature;
  }
@ -299,32 +299,32 @@ class RandomFeatureSelector : public FeatureSelector {
 class GreedyFeatureSelector : public FeatureSelector {
 public:
  void Setup(const gbm::GBLinearModel &model,
-             const std::vector<bst_gpair> &gpair,
+             const std::vector<GradientPair> &gpair,
             DMatrix *p_fmat, float alpha, float lambda, int param) override {
-    top_k = static_cast<bst_uint>(param);
+    top_k_ = static_cast<bst_uint>(param);
    const bst_uint ngroup = model.param.num_output_group;
-    if (param <= 0) top_k = std::numeric_limits<bst_uint>::max();
-    if (counter.size() == 0) {
-      counter.resize(ngroup);
-      gpair_sums.resize(model.param.num_feature * ngroup);
+    if (param <= 0) top_k_ = std::numeric_limits<bst_uint>::max();
+    if (counter_.size() == 0) {
+      counter_.resize(ngroup);
+      gpair_sums_.resize(model.param.num_feature * ngroup);
    }
    for (bst_uint gid = 0u; gid < ngroup; ++gid) {
-      counter[gid] = 0u;
+      counter_[gid] = 0u;
    }
  }

  int NextFeature(int iteration, const gbm::GBLinearModel &model,
-                  int group_idx, const std::vector<bst_gpair> &gpair,
+                  int group_idx, const std::vector<GradientPair> &gpair,
                  DMatrix *p_fmat, float alpha, float lambda) override {
    // k-th selected feature for a group
-    auto k = counter[group_idx]++;
+    auto k = counter_[group_idx]++;
    // stop after either reaching top-K or going through all the features in a group
-    if (k >= top_k || counter[group_idx] == model.param.num_feature) return -1;
+    if (k >= top_k_ || counter_[group_idx] == model.param.num_feature) return -1;

    const int ngroup = model.param.num_output_group;
    const bst_omp_uint nfeat = model.param.num_feature;
    // Calculate univariate gradient sums
-    std::fill(gpair_sums.begin(), gpair_sums.end(), std::make_pair(0., 0.));
+    std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
    dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator();
    while (iter->Next()) {
      const ColBatch &batch = iter->Value();
@ -332,7 +332,7 @@ class GreedyFeatureSelector : public FeatureSelector {
      for (bst_omp_uint i = 0; i < nfeat; ++i) {
        const ColBatch::Inst col = batch[i];
        const bst_uint ndata = col.length;
-        auto &sums = gpair_sums[group_idx * nfeat + i];
+        auto &sums = gpair_sums_[group_idx * nfeat + i];
        for (bst_uint j = 0u; j < ndata; ++j) {
          const bst_float v = col[j].fvalue;
          auto &p = gpair[col[j].index * ngroup + group_idx];
@ -346,7 +346,7 @@ class GreedyFeatureSelector : public FeatureSelector {
    int best_fidx = 0;
    double best_weight_update = 0.0f;
    for (bst_omp_uint fidx = 0; fidx < nfeat; ++fidx) {
-      auto &s = gpair_sums[group_idx * nfeat + fidx];
+      auto &s = gpair_sums_[group_idx * nfeat + fidx];
      float dw = std::abs(static_cast<bst_float>(
                 CoordinateDelta(s.first, s.second, model[fidx][group_idx], alpha, lambda)));
      if (dw > best_weight_update) {
@ -358,9 +358,9 @@ class GreedyFeatureSelector : public FeatureSelector {
  }

 protected:
-  bst_uint top_k;
-  std::vector<bst_uint> counter;
-  std::vector<std::pair<double, double>> gpair_sums;
+  bst_uint top_k_;
+  std::vector<bst_uint> counter_;
+  std::vector<std::pair<double, double>> gpair_sums_;
 };

 /**
@ -377,21 +377,21 @@ class GreedyFeatureSelector : public FeatureSelector {
 class ThriftyFeatureSelector : public FeatureSelector {
 public:
  void Setup(const gbm::GBLinearModel &model,
-             const std::vector<bst_gpair> &gpair,
+             const std::vector<GradientPair> &gpair,
             DMatrix *p_fmat, float alpha, float lambda, int param) override {
-    top_k = static_cast<bst_uint>(param);
-    if (param <= 0) top_k = std::numeric_limits<bst_uint>::max();
+    top_k_ = static_cast<bst_uint>(param);
+    if (param <= 0) top_k_ = std::numeric_limits<bst_uint>::max();
    const bst_uint ngroup = model.param.num_output_group;
    const bst_omp_uint nfeat = model.param.num_feature;

-    if (deltaw.size() == 0) {
-      deltaw.resize(nfeat * ngroup);
-      sorted_idx.resize(nfeat * ngroup);
-      counter.resize(ngroup);
-      gpair_sums.resize(nfeat * ngroup);
+    if (deltaw_.size() == 0) {
+      deltaw_.resize(nfeat * ngroup);
+      sorted_idx_.resize(nfeat * ngroup);
+      counter_.resize(ngroup);
+      gpair_sums_.resize(nfeat * ngroup);
    }
    // Calculate univariate gradient sums
-    std::fill(gpair_sums.begin(), gpair_sums.end(), std::make_pair(0., 0.));
+    std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
    dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator();
    while (iter->Next()) {
      const ColBatch &batch = iter->Value();
@ -401,7 +401,7 @@ class ThriftyFeatureSelector : public FeatureSelector {
        const ColBatch::Inst col = batch[i];
        const bst_uint ndata = col.length;
        for (bst_uint gid = 0u; gid < ngroup; ++gid) {
-          auto &sums = gpair_sums[gid * nfeat + i];
+          auto &sums = gpair_sums_[gid * nfeat + i];
          for (bst_uint j = 0u; j < ndata; ++j) {
            const bst_float v = col[j].fvalue;
            auto &p = gpair[col[j].index * ngroup + gid];
@ -413,45 +413,45 @@ class ThriftyFeatureSelector : public FeatureSelector {
      }
    }
    // rank by descending weight magnitude within the groups
-    std::fill(deltaw.begin(), deltaw.end(), 0.f);
-    std::iota(sorted_idx.begin(), sorted_idx.end(), 0);
-    bst_float *pdeltaw = &deltaw[0];
+    std::fill(deltaw_.begin(), deltaw_.end(), 0.f);
+    std::iota(sorted_idx_.begin(), sorted_idx_.end(), 0);
+    bst_float *pdeltaw = &deltaw_[0];
    for (bst_uint gid = 0u; gid < ngroup; ++gid) {
      // Calculate univariate weight changes
      for (bst_omp_uint i = 0; i < nfeat; ++i) {
        auto ii = gid * nfeat + i;
-        auto &s = gpair_sums[ii];
-        deltaw[ii] = static_cast<bst_float>(CoordinateDelta(
+        auto &s = gpair_sums_[ii];
+        deltaw_[ii] = static_cast<bst_float>(CoordinateDelta(
                       s.first, s.second, model[i][gid], alpha, lambda));
      }
      // sort in descending order of deltaw abs values
-      auto start = sorted_idx.begin() + gid * nfeat;
+      auto start = sorted_idx_.begin() + gid * nfeat;
      std::sort(start, start + nfeat,
                [pdeltaw](size_t i, size_t j) {
                  return std::abs(*(pdeltaw + i)) > std::abs(*(pdeltaw + j));
                });
-      counter[gid] = 0u;
+      counter_[gid] = 0u;
    }
  }

  int NextFeature(int iteration, const gbm::GBLinearModel &model,
-                  int group_idx, const std::vector<bst_gpair> &gpair,
+                  int group_idx, const std::vector<GradientPair> &gpair,
                  DMatrix *p_fmat, float alpha, float lambda) override {
    // k-th selected feature for a group
-    auto k = counter[group_idx]++;
+    auto k = counter_[group_idx]++;
    // stop after either reaching top-N or going through all the features in a group
-    if (k >= top_k || counter[group_idx] == model.param.num_feature) return -1;
+    if (k >= top_k_ || counter_[group_idx] == model.param.num_feature) return -1;
    // note that sorted_idx stores the "long" indices
    const size_t grp_offset = group_idx * model.param.num_feature;
-    return static_cast<int>(sorted_idx[grp_offset + k] - grp_offset);
+    return static_cast<int>(sorted_idx_[grp_offset + k] - grp_offset);
  }

 protected:
-  bst_uint top_k;
-  std::vector<bst_float> deltaw;
-  std::vector<size_t> sorted_idx;
-  std::vector<bst_uint> counter;
-  std::vector<std::pair<double, double>> gpair_sums;
+  bst_uint top_k_;
+  std::vector<bst_float> deltaw_;
+  std::vector<size_t> sorted_idx_;
+  std::vector<bst_uint> counter_;
+  std::vector<std::pair<double, double>> gpair_sums_;
 };

 /**
--- a/src/linear/updater_coordinate.cc
+++ b/src/linear/updater_coordinate.cc
@ -85,7 +85,7 @@ class CoordinateUpdater : public LinearUpdater {
    monitor.Init("CoordinateUpdater", param.debug_verbose);
  }

-  void Update(std::vector<bst_gpair> *in_gpair, DMatrix *p_fmat,
+  void Update(std::vector<GradientPair> *in_gpair, DMatrix *p_fmat,
              gbm::GBLinearModel *model, double sum_instance_weight) override {
    param.DenormalizePenalties(sum_instance_weight);
    const int ngroup = model->param.num_output_group;
@ -111,7 +111,7 @@ class CoordinateUpdater : public LinearUpdater {
    }
  }

-  inline void UpdateFeature(int fidx, int group_idx, std::vector<bst_gpair> *in_gpair,
+  inline void UpdateFeature(int fidx, int group_idx, std::vector<GradientPair> *in_gpair,
                            DMatrix *p_fmat, gbm::GBLinearModel *model) {
    const int ngroup = model->param.num_output_group;
    bst_float &w = (*model)[fidx][group_idx];
--- a/src/linear/updater_shotgun.cc
+++ b/src/linear/updater_shotgun.cc
@ -58,59 +58,60 @@ class ShotgunUpdater : public LinearUpdater {
 public:
  // set training parameter
  void Init(const std::vector<std::pair<std::string, std::string> > &args) override {
-    param.InitAllowUnknown(args);
-    selector.reset(FeatureSelector::Create(param.feature_selector));
+    param_.InitAllowUnknown(args);
+    selector_.reset(FeatureSelector::Create(param_.feature_selector));
  }

-  void Update(std::vector<bst_gpair> *in_gpair, DMatrix *p_fmat,
+  void Update(std::vector<GradientPair> *in_gpair, DMatrix *p_fmat,
              gbm::GBLinearModel *model, double sum_instance_weight) override {
-    param.DenormalizePenalties(sum_instance_weight);
-    std::vector<bst_gpair> &gpair = *in_gpair;
+    param_.DenormalizePenalties(sum_instance_weight);
+    std::vector<GradientPair> &gpair = *in_gpair;
    const int ngroup = model->param.num_output_group;

    // update bias
    for (int gid = 0; gid < ngroup; ++gid) {
      auto grad = GetBiasGradientParallel(gid, ngroup, *in_gpair, p_fmat);
-      auto dbias = static_cast<bst_float>(param.learning_rate *
+      auto dbias = static_cast<bst_float>(param_.learning_rate *
                               CoordinateDeltaBias(grad.first, grad.second));
      model->bias()[gid] += dbias;
      UpdateBiasResidualParallel(gid, ngroup, dbias, in_gpair, p_fmat);
    }

    // lock-free parallel updates of weights
-    selector->Setup(*model, *in_gpair, p_fmat, param.reg_alpha_denorm, param.reg_lambda_denorm, 0);
+    selector_->Setup(*model, *in_gpair, p_fmat, param_.reg_alpha_denorm,
+                     param_.reg_lambda_denorm, 0);
    dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator();
    while (iter->Next()) {
      const ColBatch &batch = iter->Value();
-      const bst_omp_uint nfeat = static_cast<bst_omp_uint>(batch.size);
+      const auto nfeat = static_cast<bst_omp_uint>(batch.size);
 #pragma omp parallel for schedule(static)
      for (bst_omp_uint i = 0; i < nfeat; ++i) {
-        int ii = selector->NextFeature(i, *model, 0, *in_gpair, p_fmat,
-                                       param.reg_alpha_denorm, param.reg_lambda_denorm);
+        int ii = selector_->NextFeature(i, *model, 0, *in_gpair, p_fmat,
+                                       param_.reg_alpha_denorm, param_.reg_lambda_denorm);
        if (ii < 0) continue;
        const bst_uint fid = batch.col_index[ii];
        ColBatch::Inst col = batch[ii];
        for (int gid = 0; gid < ngroup; ++gid) {
          double sum_grad = 0.0, sum_hess = 0.0;
          for (bst_uint j = 0; j < col.length; ++j) {
-            bst_gpair &p = gpair[col[j].index * ngroup + gid];
+            GradientPair &p = gpair[col[j].index * ngroup + gid];
            if (p.GetHess() < 0.0f) continue;
            const bst_float v = col[j].fvalue;
            sum_grad += p.GetGrad() * v;
            sum_hess += p.GetHess() * v * v;
          }
          bst_float &w = (*model)[fid][gid];
-          bst_float dw = static_cast<bst_float>(
-              param.learning_rate *
-              CoordinateDelta(sum_grad, sum_hess, w, param.reg_alpha_denorm,
-                              param.reg_lambda_denorm));
+          auto dw = static_cast<bst_float>(
+              param_.learning_rate *
+              CoordinateDelta(sum_grad, sum_hess, w, param_.reg_alpha_denorm,
+                              param_.reg_lambda_denorm));
          if (dw == 0.f) continue;
          w += dw;
          // update grad values
          for (bst_uint j = 0; j < col.length; ++j) {
-            bst_gpair &p = gpair[col[j].index * ngroup + gid];
+            GradientPair &p = gpair[col[j].index * ngroup + gid];
            if (p.GetHess() < 0.0f) continue;
-            p += bst_gpair(p.GetHess() * col[j].fvalue * dw, 0);
+            p += GradientPair(p.GetHess() * col[j].fvalue * dw, 0);
          }
        }
      }
@ -119,9 +120,9 @@ class ShotgunUpdater : public LinearUpdater {

 protected:
  // training parameters
-  ShotgunTrainParam param;
+  ShotgunTrainParam param_;

-  std::unique_ptr<FeatureSelector> selector;
+  std::unique_ptr<FeatureSelector> selector_;
 };

 DMLC_REGISTER_PARAMETER(ShotgunTrainParam);
--- a/src/metric/elementwise_metric.cc
+++ b/src/metric/elementwise_metric.cc
@ -24,16 +24,16 @@ struct EvalEWiseBase : public Metric {
  bst_float Eval(const std::vector<bst_float>& preds,
                 const MetaInfo& info,
                 bool distributed) const override {
-    CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
-    CHECK_EQ(preds.size(), info.labels.size())
+    CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
+    CHECK_EQ(preds.size(), info.labels_.size())
        << "label and prediction size not match, "
        << "hint: use merror or mlogloss for multi-class classification";
-    const omp_ulong ndata = static_cast<omp_ulong>(info.labels.size());
+    const auto ndata = static_cast<omp_ulong>(info.labels_.size());
    double sum = 0.0, wsum = 0.0;
    #pragma omp parallel for reduction(+: sum, wsum) schedule(static)
    for (omp_ulong i = 0; i < ndata; ++i) {
      const bst_float wt = info.GetWeight(i);
-      sum += static_cast<const Derived*>(this)->EvalRow(info.labels[i], preds[i]) * wt;
+      sum += static_cast<const Derived*>(this)->EvalRow(info.labels_[i], preds[i]) * wt;
      wsum += wt;
    }
    double dat[2]; dat[0] = sum, dat[1] = wsum;
--- a/src/metric/multiclass_metric.cc
+++ b/src/metric/multiclass_metric.cc
@ -23,23 +23,23 @@ struct EvalMClassBase : public Metric {
  bst_float Eval(const std::vector<bst_float> &preds,
                 const MetaInfo &info,
                 bool distributed) const override {
-    CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
-    CHECK(preds.size() % info.labels.size() == 0)
+    CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
+    CHECK(preds.size() % info.labels_.size() == 0)
        << "label and prediction size not match";
-    const size_t nclass = preds.size() / info.labels.size();
+    const size_t nclass = preds.size() / info.labels_.size();
    CHECK_GE(nclass, 1U)
        << "mlogloss and merror are only used for multi-class classification,"
        << " use logloss for binary classification";
-    const bst_omp_uint ndata = static_cast<bst_omp_uint>(info.labels.size());
+    const auto ndata = static_cast<bst_omp_uint>(info.labels_.size());
    double sum = 0.0, wsum = 0.0;
    int label_error = 0;
    #pragma omp parallel for reduction(+: sum, wsum) schedule(static)
    for (bst_omp_uint i = 0; i < ndata; ++i) {
      const bst_float wt = info.GetWeight(i);
-      int label =  static_cast<int>(info.labels[i]);
+      auto label =  static_cast<int>(info.labels_[i]);
      if (label >= 0 && label < static_cast<int>(nclass)) {
        sum += Derived::EvalRow(label,
-                                dmlc::BeginPtr(preds) + i * nclass,
+                                preds.data() + i * nclass,
                                nclass) * wt;
        wsum += wt;
      } else {
@ -99,7 +99,7 @@ struct EvalMultiLogLoss : public EvalMClassBase<EvalMultiLogLoss> {
                                  const bst_float *pred,
                                  size_t nclass) {
    const bst_float eps = 1e-16f;
-    size_t k = static_cast<size_t>(label);
+    auto k = static_cast<size_t>(label);
    if (pred[k] > eps) {
      return -std::log(pred[k]);
    } else {
--- a/src/metric/rank_metric.cc
+++ b/src/metric/rank_metric.cc
@ -19,7 +19,7 @@ DMLC_REGISTRY_FILE_TAG(rank_metric);
 struct EvalAMS : public Metric {
 public:
  explicit EvalAMS(const char* param) {
-    CHECK(param != nullptr)
+    CHECK(param != nullptr)  // NOLINT
        << "AMS must be in format ams@k";
    ratio_ = atof(param);
    std::ostringstream os;
@ -32,7 +32,7 @@ struct EvalAMS : public Metric {
    CHECK(!distributed) << "metric AMS do not support distributed evaluation";
    using namespace std;  // NOLINT(*)

-    const bst_omp_uint ndata = static_cast<bst_omp_uint>(info.labels.size());
+    const auto ndata = static_cast<bst_omp_uint>(info.labels_.size());
    std::vector<std::pair<bst_float, unsigned> > rec(ndata);

    #pragma omp parallel for schedule(static)
@ -40,7 +40,7 @@ struct EvalAMS : public Metric {
      rec[i] = std::make_pair(preds[i], i);
    }
    std::sort(rec.begin(), rec.end(), common::CmpFirst);
-    unsigned ntop = static_cast<unsigned>(ratio_ * ndata);
+    auto ntop = static_cast<unsigned>(ratio_ * ndata);
    if (ntop == 0) ntop = ndata;
    const double br = 10.0;
    unsigned thresindex = 0;
@ -48,7 +48,7 @@ struct EvalAMS : public Metric {
    for (unsigned i = 0; i < static_cast<unsigned>(ndata-1) && i < ntop; ++i) {
      const unsigned ridx = rec[i].second;
      const bst_float wt = info.GetWeight(ridx);
-      if (info.labels[ridx] > 0.5f) {
+      if (info.labels_[ridx] > 0.5f) {
        s_tp += wt;
      } else {
        b_fp += wt;
@ -84,16 +84,16 @@ struct EvalAuc : public Metric {
  bst_float Eval(const std::vector<bst_float> &preds,
                 const MetaInfo &info,
                 bool distributed) const override {
-    CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
-    CHECK_EQ(preds.size(), info.labels.size())
+    CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
+    CHECK_EQ(preds.size(), info.labels_.size())
        << "label size predict size not match";
    std::vector<unsigned> tgptr(2, 0);
-    tgptr[1] = static_cast<unsigned>(info.labels.size());
+    tgptr[1] = static_cast<unsigned>(info.labels_.size());

-    const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
-    CHECK_EQ(gptr.back(), info.labels.size())
+    const std::vector<unsigned> &gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_;
+    CHECK_EQ(gptr.back(), info.labels_.size())
        << "EvalAuc: group structure must match number of prediction";
-    const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
+    const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
    // sum statistics
    bst_float sum_auc = 0.0f;
    int auc_error = 0;
@ -102,7 +102,7 @@ struct EvalAuc : public Metric {
    for (bst_omp_uint k = 0; k < ngroup; ++k) {
      rec.clear();
      for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
-        rec.push_back(std::make_pair(preds[j], j));
+        rec.emplace_back(preds[j], j);
      }
      XGBOOST_PARALLEL_SORT(rec.begin(), rec.end(), common::CmpFirst);
      // calculate AUC
@ -110,7 +110,7 @@ struct EvalAuc : public Metric {
      double sum_npos = 0.0, sum_nneg = 0.0, buf_pos = 0.0, buf_neg = 0.0;
      for (size_t j = 0; j < rec.size(); ++j) {
        const bst_float wt = info.GetWeight(rec[j].second);
-        const bst_float ctr = info.labels[rec[j].second];
+        const bst_float ctr = info.labels_[rec[j].second];
        // keep bucketing predictions in same bucket
        if (j != 0 && rec[j].first != rec[j - 1].first) {
          sum_pospair += buf_neg * (sum_npos + buf_pos *0.5);
@ -156,16 +156,16 @@ struct EvalRankList : public Metric {
  bst_float Eval(const std::vector<bst_float> &preds,
                 const MetaInfo &info,
                 bool distributed) const override {
-    CHECK_EQ(preds.size(), info.labels.size())
+    CHECK_EQ(preds.size(), info.labels_.size())
        << "label size predict size not match";
    // quick consistency when group is not available
    std::vector<unsigned> tgptr(2, 0);
    tgptr[1] = static_cast<unsigned>(preds.size());
-    const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
+    const std::vector<unsigned> &gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_;
    CHECK_NE(gptr.size(), 0U) << "must specify group when constructing rank file";
    CHECK_EQ(gptr.back(), preds.size())
        << "EvalRanklist: group structure must match number of prediction";
-    const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
+    const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
    // sum statistics
    double sum_metric = 0.0f;
    #pragma omp parallel reduction(+:sum_metric)
@ -176,7 +176,7 @@ struct EvalRankList : public Metric {
      for (bst_omp_uint k = 0; k < ngroup; ++k) {
        rec.clear();
        for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
-          rec.push_back(std::make_pair(preds[j], static_cast<int>(info.labels[j])));
+          rec.emplace_back(preds[j], static_cast<int>(info.labels_[j]));
        }
        sum_metric += this->EvalMetric(rec);
      }
@ -230,7 +230,7 @@ struct EvalPrecision : public EvalRankList{
  explicit EvalPrecision(const char *name) : EvalRankList("pre", name) {}

 protected:
-  virtual bst_float EvalMetric(std::vector< std::pair<bst_float, unsigned> > &rec) const {
+  bst_float EvalMetric(std::vector< std::pair<bst_float, unsigned> > &rec) const override {
    // calculate Precision
    std::sort(rec.begin(), rec.end(), common::CmpFirst);
    unsigned nhit = 0;
@ -279,7 +279,7 @@ struct EvalMAP : public EvalRankList {
  explicit EvalMAP(const char *name) : EvalRankList("map", name) {}

 protected:
-  virtual bst_float EvalMetric(std::vector< std::pair<bst_float, unsigned> > &rec) const {
+  bst_float EvalMetric(std::vector< std::pair<bst_float, unsigned> > &rec) const override {
    std::sort(rec.begin(), rec.end(), common::CmpFirst);
    unsigned nhits = 0;
    double sumap = 0.0;
@ -307,14 +307,14 @@ struct EvalMAP : public EvalRankList {
 /*! \brief Cox: Partial likelihood of the Cox proportional hazards model */
 struct EvalCox : public Metric {
 public:
-  EvalCox() {}
+  EvalCox() = default;
  bst_float Eval(const std::vector<bst_float> &preds,
                 const MetaInfo &info,
                 bool distributed) const override {
    CHECK(!distributed) << "Cox metric does not support distributed evaluation";
    using namespace std;  // NOLINT(*)

-    const bst_omp_uint ndata = static_cast<bst_omp_uint>(info.labels.size());
+    const auto ndata = static_cast<bst_omp_uint>(info.labels_.size());
    const std::vector<size_t> &label_order = info.LabelAbsSort();

    // pre-compute a sum for the denominator
@ -328,7 +328,7 @@ struct EvalCox : public Metric {
    bst_omp_uint num_events = 0;
    for (bst_omp_uint i = 0; i < ndata; ++i) {
      const size_t ind = label_order[i];
-      const auto label = info.labels[ind];
+      const auto label = info.labels_[ind];
      if (label > 0) {
        out -= log(preds[ind]) - log(exp_p_sum);
        ++num_events;
@ -336,7 +336,7 @@ struct EvalCox : public Metric {

      // only update the denominator after we move forward in time (labels are sorted)
      accumulated_sum += preds[ind];
-      if (i == ndata - 1 || std::abs(label) < std::abs(info.labels[label_order[i + 1]])) {
+      if (i == ndata - 1 || std::abs(label) < std::abs(info.labels_[label_order[i + 1]])) {
        exp_p_sum -= accumulated_sum;
        accumulated_sum = 0;
      }
@ -358,16 +358,16 @@ struct EvalAucPR : public Metric {

  bst_float Eval(const std::vector<bst_float> &preds, const MetaInfo &info,
                 bool distributed) const override {
-    CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
-    CHECK_EQ(preds.size(), info.labels.size())
+    CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
+    CHECK_EQ(preds.size(), info.labels_.size())
        << "label size predict size not match";
    std::vector<unsigned> tgptr(2, 0);
-    tgptr[1] = static_cast<unsigned>(info.labels.size());
+    tgptr[1] = static_cast<unsigned>(info.labels_.size());
    const std::vector<unsigned> &gptr =
-        info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
-    CHECK_EQ(gptr.back(), info.labels.size())
+        info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_;
+    CHECK_EQ(gptr.back(), info.labels_.size())
        << "EvalAucPR: group structure must match number of prediction";
-    const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
+    const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
    // sum statistics
    double auc = 0.0;
    int auc_error = 0, auc_gt_one = 0;
@ -378,9 +378,9 @@ struct EvalAucPR : public Metric {
      double total_neg = 0.0;
      rec.clear();
      for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
-        total_pos += info.GetWeight(j) * info.labels[j];
-        total_neg += info.GetWeight(j) * (1.0f - info.labels[j]);
-        rec.push_back(std::make_pair(preds[j], j));
+        total_pos += info.GetWeight(j) * info.labels_[j];
+        total_neg += info.GetWeight(j) * (1.0f - info.labels_[j]);
+        rec.emplace_back(preds[j], j);
      }
      XGBOOST_PARALLEL_SORT(rec.begin(), rec.end(), common::CmpFirst);
      // we need pos > 0 && neg > 0
@ -390,11 +390,10 @@ struct EvalAucPR : public Metric {
      // calculate AUC
      double tp = 0.0, prevtp = 0.0, fp = 0.0, prevfp = 0.0, h = 0.0, a = 0.0, b = 0.0;
      for (size_t j = 0; j < rec.size(); ++j) {
-        tp += info.GetWeight(rec[j].second) * info.labels[rec[j].second];
-        fp += info.GetWeight(rec[j].second) * (1.0f - info.labels[rec[j].second]);
+        tp += info.GetWeight(rec[j].second) * info.labels_[rec[j].second];
+        fp += info.GetWeight(rec[j].second) * (1.0f - info.labels_[rec[j].second]);
        if ((j < rec.size() - 1 && rec[j].first != rec[j + 1].first) || j  == rec.size() - 1) {
          if (tp == prevtp) {
-            h = 1.0;
            a = 1.0;
            b = 0.0;
          } else {
--- a/src/objective/multiclass_obj.cc
+++ b/src/objective/multiclass_obj.cc
@ -38,15 +38,15 @@ class SoftmaxMultiClassObj : public ObjFunction {
  void GetGradient(HostDeviceVector<bst_float>* preds,
                   const MetaInfo& info,
                   int iter,
-                   HostDeviceVector<bst_gpair>* out_gpair) override {
-    CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
-    CHECK(preds->size() == (static_cast<size_t>(param_.num_class) * info.labels.size()))
+                   HostDeviceVector<GradientPair>* out_gpair) override {
+    CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
+    CHECK(preds->Size() == (static_cast<size_t>(param_.num_class) * info.labels_.size()))
        << "SoftmaxMultiClassObj: label size and pred size does not match";
-    std::vector<bst_float>& preds_h = preds->data_h();
-    out_gpair->resize(preds_h.size());
-    std::vector<bst_gpair>& gpair = out_gpair->data_h();
+    std::vector<bst_float>& preds_h = preds->HostVector();
+    out_gpair->Resize(preds_h.size());
+    std::vector<GradientPair>& gpair = out_gpair->HostVector();
    const int nclass = param_.num_class;
-    const omp_ulong ndata = static_cast<omp_ulong>(preds_h.size() / nclass);
+    const auto ndata = static_cast<omp_ulong>(preds_h.size() / nclass);

    int label_error = 0;
    #pragma omp parallel
@ -58,7 +58,7 @@ class SoftmaxMultiClassObj : public ObjFunction {
          rec[k] = preds_h[i * nclass + k];
        }
        common::Softmax(&rec);
-        int label = static_cast<int>(info.labels[i]);
+        auto label = static_cast<int>(info.labels_[i]);
        if (label < 0 || label >= nclass)  {
          label_error = label; label = 0;
        }
@ -67,9 +67,9 @@ class SoftmaxMultiClassObj : public ObjFunction {
          bst_float p = rec[k];
          const bst_float h = 2.0f * p * (1.0f - p) * wt;
          if (label == k) {
-            gpair[i * nclass + k] = bst_gpair((p - 1.0f) * wt, h);
+            gpair[i * nclass + k] = GradientPair((p - 1.0f) * wt, h);
          } else {
-            gpair[i * nclass + k] = bst_gpair(p* wt, h);
+            gpair[i * nclass + k] = GradientPair(p* wt, h);
          }
        }
      }
@ -91,10 +91,10 @@ class SoftmaxMultiClassObj : public ObjFunction {

 private:
  inline void Transform(HostDeviceVector<bst_float> *io_preds, bool prob) {
-    std::vector<bst_float> &preds = io_preds->data_h();
+    std::vector<bst_float> &preds = io_preds->HostVector();
    std::vector<bst_float> tmp;
    const int nclass = param_.num_class;
-    const omp_ulong ndata = static_cast<omp_ulong>(preds.size() / nclass);
+    const auto ndata = static_cast<omp_ulong>(preds.size() / nclass);
    if (!prob) tmp.resize(ndata);

    #pragma omp parallel
--- a/src/objective/rank_obj.cc
+++ b/src/objective/rank_obj.cc
@ -40,17 +40,17 @@ class LambdaRankObj : public ObjFunction {
  void GetGradient(HostDeviceVector<bst_float>* preds,
                   const MetaInfo& info,
                   int iter,
-                   HostDeviceVector<bst_gpair>* out_gpair) override {
-    CHECK_EQ(preds->size(), info.labels.size()) << "label size predict size not match";
-    auto& preds_h = preds->data_h();
-    out_gpair->resize(preds_h.size());
-    std::vector<bst_gpair>& gpair = out_gpair->data_h();
+                   HostDeviceVector<GradientPair>* out_gpair) override {
+    CHECK_EQ(preds->Size(), info.labels_.size()) << "label size predict size not match";
+    auto& preds_h = preds->HostVector();
+    out_gpair->Resize(preds_h.size());
+    std::vector<GradientPair>& gpair = out_gpair->HostVector();
    // quick consistency when group is not available
-    std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(info.labels.size());
-    const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
-    CHECK(gptr.size() != 0 && gptr.back() == info.labels.size())
+    std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(info.labels_.size());
+    const std::vector<unsigned> &gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_;
+    CHECK(gptr.size() != 0 && gptr.back() == info.labels_.size())
        << "group structure not consistent with #rows";
-    const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
+    const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
    #pragma omp parallel
    {
      // parall construct, declare random number generator here, so that each
@ -64,8 +64,8 @@ class LambdaRankObj : public ObjFunction {
      for (bst_omp_uint k = 0; k < ngroup; ++k) {
        lst.clear(); pairs.clear();
        for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) {
-          lst.push_back(ListEntry(preds_h[j], info.labels[j], j));
-          gpair[j] = bst_gpair(0.0f, 0.0f);
+          lst.emplace_back(preds_h[j], info.labels_[j], j);
+          gpair[j] = GradientPair(0.0f, 0.0f);
        }
        std::sort(lst.begin(), lst.end(), ListEntry::CmpPred);
        rec.resize(lst.size());
@ -85,9 +85,9 @@ class LambdaRankObj : public ObjFunction {
              for (unsigned pid = i; pid < j; ++pid) {
                unsigned ridx = std::uniform_int_distribution<unsigned>(0, nleft + nright - 1)(rnd);
                if (ridx < nleft) {
-                  pairs.push_back(LambdaPair(rec[ridx].second, rec[pid].second));
+                  pairs.emplace_back(rec[ridx].second, rec[pid].second);
                } else {
-                  pairs.push_back(LambdaPair(rec[pid].second, rec[ridx+j-i].second));
+                  pairs.emplace_back(rec[pid].second, rec[ridx+j-i].second);
                }
              }
            }
@ -101,22 +101,22 @@ class LambdaRankObj : public ObjFunction {
        if (param_.fix_list_weight != 0.0f) {
          scale *= param_.fix_list_weight / (gptr[k + 1] - gptr[k]);
        }
-        for (size_t i = 0; i < pairs.size(); ++i) {
-          const ListEntry &pos = lst[pairs[i].pos_index];
-          const ListEntry &neg = lst[pairs[i].neg_index];
-          const bst_float w = pairs[i].weight * scale;
+        for (auto & pair : pairs) {
+          const ListEntry &pos = lst[pair.pos_index];
+          const ListEntry &neg = lst[pair.neg_index];
+          const bst_float w = pair.weight * scale;
          const float eps = 1e-16f;
          bst_float p = common::Sigmoid(pos.pred - neg.pred);
          bst_float g = p - 1.0f;
          bst_float h = std::max(p * (1.0f - p), eps);
          // accumulate gradient and hessian in both pid, and nid
-          gpair[pos.rindex] += bst_gpair(g * w, 2.0f*w*h);
-          gpair[neg.rindex] += bst_gpair(-g * w, 2.0f*w*h);
+          gpair[pos.rindex] += GradientPair(g * w, 2.0f*w*h);
+          gpair[neg.rindex] += GradientPair(-g * w, 2.0f*w*h);
        }
      }
    }
  }
-  const char* DefaultEvalMetric(void) const override {
+  const char* DefaultEvalMetric() const override {
    return "map";
  }

@ -177,7 +177,7 @@ class LambdaRankObjNDCG : public LambdaRankObj {
  void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
                       std::vector<LambdaPair> *io_pairs) override {
    std::vector<LambdaPair> &pairs = *io_pairs;
-    float IDCG;
+    float IDCG;  // NOLINT
    {
      std::vector<bst_float> labels(sorted_list.size());
      for (size_t i = 0; i < sorted_list.size(); ++i) {
@ -187,32 +187,32 @@ class LambdaRankObjNDCG : public LambdaRankObj {
      IDCG = CalcDCG(labels);
    }
    if (IDCG == 0.0) {
-      for (size_t i = 0; i < pairs.size(); ++i) {
-        pairs[i].weight = 0.0f;
+      for (auto & pair : pairs) {
+        pair.weight = 0.0f;
      }
    } else {
      IDCG = 1.0f / IDCG;
-      for (size_t i = 0; i < pairs.size(); ++i) {
-        unsigned pos_idx = pairs[i].pos_index;
-        unsigned neg_idx = pairs[i].neg_index;
+      for (auto & pair : pairs) {
+        unsigned pos_idx = pair.pos_index;
+        unsigned neg_idx = pair.neg_index;
        float pos_loginv = 1.0f / std::log2(pos_idx + 2.0f);
        float neg_loginv = 1.0f / std::log2(neg_idx + 2.0f);
-        int pos_label = static_cast<int>(sorted_list[pos_idx].label);
-        int neg_label = static_cast<int>(sorted_list[neg_idx].label);
+        auto pos_label = static_cast<int>(sorted_list[pos_idx].label);
+        auto neg_label = static_cast<int>(sorted_list[neg_idx].label);
        bst_float original =
            ((1 << pos_label) - 1) * pos_loginv + ((1 << neg_label) - 1) * neg_loginv;
        float changed  =
            ((1 << neg_label) - 1) * pos_loginv + ((1 << pos_label) - 1) * neg_loginv;
        bst_float delta = (original - changed) * IDCG;
        if (delta < 0.0f) delta = - delta;
-        pairs[i].weight = delta;
+        pair.weight = delta;
      }
    }
  }
  inline static bst_float CalcDCG(const std::vector<bst_float> &labels) {
    double sumdcg = 0.0;
    for (size_t i = 0; i < labels.size(); ++i) {
-      const unsigned rel = static_cast<unsigned>(labels[i]);
+      const auto rel = static_cast<unsigned>(labels[i]);
      if (rel != 0) {
        sumdcg += ((1 << rel) - 1) / std::log2(static_cast<bst_float>(i + 2));
      }
@ -238,7 +238,7 @@ class LambdaRankObjMAP : public LambdaRankObj {
    float ap_acc_add;
    /* \brief the accumulated positive instance count */
    float hits;
-    MAPStats(void) {}
+    MAPStats() = default;
    MAPStats(float ap_acc, float ap_acc_miss, float ap_acc_add, float hits)
        : ap_acc(ap_acc), ap_acc_miss(ap_acc_miss), ap_acc_add(ap_acc_add), hits(hits) {}
  };
@ -300,10 +300,10 @@ class LambdaRankObjMAP : public LambdaRankObj {
    std::vector<LambdaPair> &pairs = *io_pairs;
    std::vector<MAPStats> map_stats;
    GetMAPStats(sorted_list, &map_stats);
-    for (size_t i = 0; i < pairs.size(); ++i) {
-      pairs[i].weight =
-          GetLambdaMAP(sorted_list, pairs[i].pos_index,
-                       pairs[i].neg_index, &map_stats);
+    for (auto & pair : pairs) {
+      pair.weight =
+          GetLambdaMAP(sorted_list, pair.pos_index,
+                       pair.neg_index, &map_stats);
    }
  }
 };
--- a/src/objective/regression_obj.cc
+++ b/src/objective/regression_obj.cc
@ -32,26 +32,26 @@ struct RegLossParam : public dmlc::Parameter<RegLossParam> {
 template <typename Loss>
 class RegLossObj : public ObjFunction {
 public:
-  RegLossObj() : labels_checked(false) {}
+  RegLossObj()  = default;

  void Configure(
      const std::vector<std::pair<std::string, std::string> > &args) override {
    param_.InitAllowUnknown(args);
  }
  void GetGradient(HostDeviceVector<bst_float> *preds, const MetaInfo &info,
-                   int iter, HostDeviceVector<bst_gpair> *out_gpair) override {
-    CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
-    CHECK_EQ(preds->size(), info.labels.size())
+                   int iter, HostDeviceVector<GradientPair> *out_gpair) override {
+    CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
+    CHECK_EQ(preds->Size(), info.labels_.size())
        << "labels are not correctly provided"
-        << "preds.size=" << preds->size()
-        << ", label.size=" << info.labels.size();
-    auto& preds_h = preds->data_h();
+        << "preds.size=" << preds->Size()
+        << ", label.size=" << info.labels_.size();
+    auto& preds_h = preds->HostVector();

-    this->LazyCheckLabels(info.labels);
-    out_gpair->resize(preds_h.size());
-    auto& gpair = out_gpair->data_h();
-    const omp_ulong n = static_cast<omp_ulong>(preds_h.size());
-    auto gpair_ptr = out_gpair->ptr_h();
+    this->LazyCheckLabels(info.labels_);
+    out_gpair->Resize(preds_h.size());
+    auto& gpair = out_gpair->HostVector();
+    const auto n = static_cast<omp_ulong>(preds_h.size());
+    auto gpair_ptr = out_gpair->HostPointer();
    avx::Float8 scale(param_.scale_pos_weight);

    const omp_ulong remainder = n % 8;
@ -59,10 +59,10 @@ class RegLossObj : public ObjFunction {
    // Use a maximum of 8 threads
 #pragma omp parallel for schedule(static) num_threads(std::min(8, nthread))
    for (omp_ulong i = 0; i < n - remainder; i += 8) {
-      avx::Float8 y(&info.labels[i]);
+      avx::Float8 y(&info.labels_[i]);
      avx::Float8 p = Loss::PredTransform(avx::Float8(&preds_h[i]));
-      avx::Float8 w = info.weights.empty() ? avx::Float8(1.0f)
-                                           : avx::Float8(&info.weights[i]);
+      avx::Float8 w = info.weights_.empty() ? avx::Float8(1.0f)
+                                           : avx::Float8(&info.weights_[i]);
      // Adjust weight
      w += y * (scale * w - w);
      avx::Float8 grad = Loss::FirstOrderGradient(p, y);
@ -70,11 +70,11 @@ class RegLossObj : public ObjFunction {
      avx::StoreGpair(gpair_ptr + i, grad * w, hess * w);
    }
    for (omp_ulong i = n - remainder; i < n; ++i) {
-      auto y = info.labels[i];
+      auto y = info.labels_[i];
      bst_float p = Loss::PredTransform(preds_h[i]);
      bst_float w = info.GetWeight(i);
      w += y * ((param_.scale_pos_weight * w) - w);
-      gpair[i] = bst_gpair(Loss::FirstOrderGradient(p, y) * w,
+      gpair[i] = GradientPair(Loss::FirstOrderGradient(p, y) * w,
                           Loss::SecondOrderGradient(p, y) * w);
    }

@ -85,8 +85,8 @@ class RegLossObj : public ObjFunction {
    return Loss::DefaultEvalMetric();
  }
  void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
-    std::vector<bst_float> &preds = io_preds->data_h();
-    const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
+    std::vector<bst_float> &preds = io_preds->HostVector();
+    const auto ndata = static_cast<bst_omp_uint>(preds.size());
 #pragma omp parallel for schedule(static)
    for (bst_omp_uint j = 0; j < ndata; ++j) {
      preds[j] = Loss::PredTransform(preds[j]);
@ -98,14 +98,14 @@ class RegLossObj : public ObjFunction {

 protected:
  void LazyCheckLabels(const std::vector<float> &labels) {
-    if (labels_checked) return;
+    if (labels_checked_) return;
    for (auto &y : labels) {
      CHECK(Loss::CheckLabel(y)) << Loss::LabelErrorMsg();
    }
-    labels_checked = true;
+    labels_checked_ = true;
  }
  RegLossParam param_;
-  bool labels_checked;
+  bool labels_checked_{false};
 };

 // register the objective functions
@ -148,12 +148,12 @@ class PoissonRegression : public ObjFunction {
  void GetGradient(HostDeviceVector<bst_float> *preds,
                   const MetaInfo &info,
                   int iter,
-                   HostDeviceVector<bst_gpair> *out_gpair) override {
-    CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
-    CHECK_EQ(preds->size(), info.labels.size()) << "labels are not correctly provided";
-    auto& preds_h = preds->data_h();
-    out_gpair->resize(preds->size());
-    auto& gpair = out_gpair->data_h();
+                   HostDeviceVector<GradientPair> *out_gpair) override {
+    CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
+    CHECK_EQ(preds->Size(), info.labels_.size()) << "labels are not correctly provided";
+    auto& preds_h = preds->HostVector();
+    out_gpair->Resize(preds->Size());
+    auto& gpair = out_gpair->HostVector();
    // check if label in range
    bool label_correct = true;
    // start calculating gradient
@ -162,9 +162,9 @@ class PoissonRegression : public ObjFunction {
    for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
      bst_float p = preds_h[i];
      bst_float w = info.GetWeight(i);
-      bst_float y = info.labels[i];
+      bst_float y = info.labels_[i];
      if (y >= 0.0f) {
-        gpair[i] = bst_gpair((std::exp(p) - y) * w,
+        gpair[i] = GradientPair((std::exp(p) - y) * w,
                             std::exp(p + param_.max_delta_step) * w);
      } else {
        label_correct = false;
@ -173,7 +173,7 @@ class PoissonRegression : public ObjFunction {
    CHECK(label_correct) << "PoissonRegression: label must be nonnegative";
  }
  void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
-    std::vector<bst_float> &preds = io_preds->data_h();
+    std::vector<bst_float> &preds = io_preds->HostVector();
    const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
 #pragma omp parallel for schedule(static)
    for (long j = 0; j < ndata; ++j) {  // NOLINT(*)
@ -186,7 +186,7 @@ class PoissonRegression : public ObjFunction {
  bst_float ProbToMargin(bst_float base_score) const override {
    return std::log(base_score);
  }
-  const char* DefaultEvalMetric(void) const override {
+  const char* DefaultEvalMetric() const override {
    return "poisson-nloglik";
  }

@ -209,12 +209,12 @@ class CoxRegression : public ObjFunction {
  void GetGradient(HostDeviceVector<bst_float> *preds,
                   const MetaInfo &info,
                   int iter,
-                   HostDeviceVector<bst_gpair> *out_gpair) override {
-    CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
-    CHECK_EQ(preds->size(), info.labels.size()) << "labels are not correctly provided";
-    auto& preds_h = preds->data_h();
-    out_gpair->resize(preds_h.size());
-    auto& gpair = out_gpair->data_h();
+                   HostDeviceVector<GradientPair> *out_gpair) override {
+    CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
+    CHECK_EQ(preds->Size(), info.labels_.size()) << "labels are not correctly provided";
+    auto& preds_h = preds->HostVector();
+    out_gpair->Resize(preds_h.size());
+    auto& gpair = out_gpair->HostVector();
    const std::vector<size_t> &label_order = info.LabelAbsSort();

    const omp_ulong ndata = static_cast<omp_ulong>(preds_h.size()); // NOLINT(*)
@ -236,7 +236,7 @@ class CoxRegression : public ObjFunction {
      const double p = preds_h[ind];
      const double exp_p = std::exp(p);
      const double w = info.GetWeight(ind);
-      const double y = info.labels[ind];
+      const double y = info.labels_[ind];
      const double abs_y = std::abs(y);

      // only update the denominator after we move forward in time (labels are sorted)
@ -257,14 +257,14 @@ class CoxRegression : public ObjFunction {

      const double grad = exp_p*r_k - static_cast<bst_float>(y > 0);
      const double hess = exp_p*r_k - exp_p*exp_p * s_k;
-      gpair.at(ind) = bst_gpair(grad * w, hess * w);
+      gpair.at(ind) = GradientPair(grad * w, hess * w);

      last_abs_y = abs_y;
      last_exp_p = exp_p;
    }
  }
  void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
-    std::vector<bst_float> &preds = io_preds->data_h();
+    std::vector<bst_float> &preds = io_preds->HostVector();
    const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
    #pragma omp parallel for schedule(static)
    for (long j = 0; j < ndata; ++j) {  // NOLINT(*)
@ -277,7 +277,7 @@ class CoxRegression : public ObjFunction {
  bst_float ProbToMargin(bst_float base_score) const override {
    return std::log(base_score);
  }
-  const char* DefaultEvalMetric(void) const override {
+  const char* DefaultEvalMetric() const override {
    return "cox-nloglik";
  }
 };
@ -297,12 +297,12 @@ class GammaRegression : public ObjFunction {
  void GetGradient(HostDeviceVector<bst_float> *preds,
                   const MetaInfo &info,
                   int iter,
-                   HostDeviceVector<bst_gpair> *out_gpair) override {
-    CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
-    CHECK_EQ(preds->size(), info.labels.size()) << "labels are not correctly provided";
-    auto& preds_h = preds->data_h();
-    out_gpair->resize(preds_h.size());
-    auto& gpair = out_gpair->data_h();
+                   HostDeviceVector<GradientPair> *out_gpair) override {
+    CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
+    CHECK_EQ(preds->Size(), info.labels_.size()) << "labels are not correctly provided";
+    auto& preds_h = preds->HostVector();
+    out_gpair->Resize(preds_h.size());
+    auto& gpair = out_gpair->HostVector();
    // check if label in range
    bool label_correct = true;
    // start calculating gradient
@ -311,9 +311,9 @@ class GammaRegression : public ObjFunction {
    for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
      bst_float p = preds_h[i];
      bst_float w = info.GetWeight(i);
-      bst_float y = info.labels[i];
+      bst_float y = info.labels_[i];
      if (y >= 0.0f) {
-        gpair[i] = bst_gpair((1 - y / std::exp(p)) * w, y / std::exp(p) * w);
+        gpair[i] = GradientPair((1 - y / std::exp(p)) * w, y / std::exp(p) * w);
      } else {
        label_correct = false;
      }
@ -321,7 +321,7 @@ class GammaRegression : public ObjFunction {
    CHECK(label_correct) << "GammaRegression: label must be positive";
  }
  void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
-    std::vector<bst_float> &preds = io_preds->data_h();
+    std::vector<bst_float> &preds = io_preds->HostVector();
    const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
    #pragma omp parallel for schedule(static)
    for (long j = 0; j < ndata; ++j) {  // NOLINT(*)
@ -334,7 +334,7 @@ class GammaRegression : public ObjFunction {
  bst_float ProbToMargin(bst_float base_score) const override {
    return std::log(base_score);
  }
-  const char* DefaultEvalMetric(void) const override {
+  const char* DefaultEvalMetric() const override {
    return "gamma-nloglik";
  }
 };
@ -364,27 +364,27 @@ class TweedieRegression : public ObjFunction {
  void GetGradient(HostDeviceVector<bst_float> *preds,
                   const MetaInfo &info,
                   int iter,
-                   HostDeviceVector<bst_gpair> *out_gpair) override {
-    CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
-    CHECK_EQ(preds->size(), info.labels.size()) << "labels are not correctly provided";
-    auto& preds_h = preds->data_h();
-    out_gpair->resize(preds->size());
-    auto& gpair = out_gpair->data_h();
+                   HostDeviceVector<GradientPair> *out_gpair) override {
+    CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
+    CHECK_EQ(preds->Size(), info.labels_.size()) << "labels are not correctly provided";
+    auto& preds_h = preds->HostVector();
+    out_gpair->Resize(preds->Size());
+    auto& gpair = out_gpair->HostVector();
    // check if label in range
    bool label_correct = true;
    // start calculating gradient
-    const omp_ulong ndata = static_cast<omp_ulong>(preds->size()); // NOLINT(*)
+    const omp_ulong ndata = static_cast<omp_ulong>(preds->Size()); // NOLINT(*)
    #pragma omp parallel for schedule(static)
    for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
      bst_float p = preds_h[i];
      bst_float w = info.GetWeight(i);
-      bst_float y = info.labels[i];
+      bst_float y = info.labels_[i];
      float rho = param_.tweedie_variance_power;
      if (y >= 0.0f) {
        bst_float grad = -y * std::exp((1 - rho) * p) + std::exp((2 - rho) * p);
        bst_float hess = -y * (1 - rho) * \
          std::exp((1 - rho) * p) + (2 - rho) * std::exp((2 - rho) * p);
-        gpair[i] = bst_gpair(grad * w, hess * w);
+        gpair[i] = GradientPair(grad * w, hess * w);
      } else {
        label_correct = false;
      }
@ -392,14 +392,14 @@ class TweedieRegression : public ObjFunction {
    CHECK(label_correct) << "TweedieRegression: label must be nonnegative";
  }
  void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
-    std::vector<bst_float> &preds = io_preds->data_h();
+    std::vector<bst_float> &preds = io_preds->HostVector();
    const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
 #pragma omp parallel for schedule(static)
    for (long j = 0; j < ndata; ++j) {  // NOLINT(*)
      preds[j] = std::exp(preds[j]);
    }
  }
-  const char* DefaultEvalMetric(void) const override {
+  const char* DefaultEvalMetric() const override {
    std::ostringstream os;
    os << "tweedie-nloglik@" << param_.tweedie_variance_power;
    std::string metric = os.str();
--- a/src/objective/regression_obj_gpu.cu
+++ b/src/objective/regression_obj_gpu.cu
@ -16,11 +16,12 @@
 #include "../common/host_device_vector.h"
 #include "./regression_loss.h"

-using namespace dh;

 namespace xgboost {
 namespace obj {

+using dh::DVec;
+
 DMLC_REGISTRY_FILE_TAG(regression_obj_gpu);

 struct GPURegLossParam : public dmlc::Parameter<GPURegLossParam> {
@ -43,7 +44,7 @@ struct GPURegLossParam : public dmlc::Parameter<GPURegLossParam> {
 // GPU kernel for gradient computation
 template<typename Loss>
 __global__ void get_gradient_k
-(bst_gpair *__restrict__ out_gpair,  unsigned int *__restrict__ label_correct,
+(GradientPair *__restrict__ out_gpair,  unsigned int *__restrict__ label_correct,
 const float * __restrict__ preds, const float * __restrict__ labels,
 const float * __restrict__ weights, int n, float scale_pos_weight) {
  int i = threadIdx.x + blockIdx.x * blockDim.x;
@ -56,7 +57,7 @@ __global__ void get_gradient_k
    w *= scale_pos_weight;
  if (!Loss::CheckLabel(label))
    atomicAnd(label_correct, 0);
-  out_gpair[i] = bst_gpair
+  out_gpair[i] = GradientPair
    (Loss::FirstOrderGradient(p, label) * w, Loss::SecondOrderGradient(p, label) * w);
 }

@ -75,40 +76,40 @@ class GPURegLossObj : public ObjFunction {
 protected:
  // manages device data
  struct DeviceData {
-    dvec<float> labels, weights;
-    dvec<unsigned int> label_correct;
+    DVec<float> labels, weights;
+    DVec<unsigned int> label_correct;

    // allocate everything on device
-    DeviceData(bulk_allocator<memory_type::DEVICE>* ba, int device_idx, size_t n) {
-      ba->allocate(device_idx, false,
+    DeviceData(dh::BulkAllocator<dh::MemoryType::kDevice>* ba, int device_idx, size_t n) {
+      ba->Allocate(device_idx, false,
                  &labels, n,
                  &weights, n,
                  &label_correct, 1);
    }
-    size_t size() const { return labels.size(); }
+    size_t Size() const { return labels.Size(); }
  };


  bool copied_;
-  std::unique_ptr<bulk_allocator<memory_type::DEVICE>> ba_;
+  std::unique_ptr<dh::BulkAllocator<dh::MemoryType::kDevice>> ba_;
  std::unique_ptr<DeviceData> data_;
  HostDeviceVector<bst_float> preds_d_;
-  HostDeviceVector<bst_gpair> out_gpair_d_;
+  HostDeviceVector<GradientPair> out_gpair_d_;

  // allocate device data for n elements, do nothing if enough memory is allocated already
  void LazyResize(int n) {
-    if (data_.get() != nullptr && data_->size() >= n)
+    if (data_.get() != nullptr && data_->Size() >= n)
      return;
    copied_ = false;
    // free the old data and allocate the new data
-    ba_.reset(new bulk_allocator<memory_type::DEVICE>());
+    ba_.reset(new dh::BulkAllocator<dh::MemoryType::kDevice>());
    data_.reset(new DeviceData(ba_.get(), 0, n));
-    preds_d_.resize(n, 0.0f, param_.gpu_id);
-    out_gpair_d_.resize(n, bst_gpair(), param_.gpu_id);
+    preds_d_.Resize(n, 0.0f, param_.gpu_id);
+    out_gpair_d_.Resize(n, GradientPair(), param_.gpu_id);
  }

 public:
-  GPURegLossObj() : copied_(false), preds_d_(0, -1), out_gpair_d_(0, -1) {}
+  GPURegLossObj() : copied_(false), preds_d_(0, -1), out_gpair_d_({}, -1) {}

  void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
    param_.InitAllowUnknown(args);
@ -118,32 +119,32 @@ class GPURegLossObj : public ObjFunction {
  void GetGradient(HostDeviceVector<float>* preds,
                   const MetaInfo &info,
                   int iter,
-                   HostDeviceVector<bst_gpair>* out_gpair) override {
-    CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty";
-    CHECK_EQ(preds->size(), info.labels.size())
+                   HostDeviceVector<GradientPair>* out_gpair) override {
+    CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
+    CHECK_EQ(preds->Size(), info.labels_.size())
      << "labels are not correctly provided"
-      << "preds.size=" << preds->size() << ", label.size=" << info.labels.size();
-    size_t ndata = preds->size();
-    out_gpair->resize(ndata, bst_gpair(), param_.gpu_id);
+      << "preds.size=" << preds->Size() << ", label.size=" << info.labels_.size();
+    size_t ndata = preds->Size();
+    out_gpair->Resize(ndata, GradientPair(), param_.gpu_id);
    LazyResize(ndata);
-    GetGradientDevice(preds->ptr_d(param_.gpu_id), info, iter,
-                      out_gpair->ptr_d(param_.gpu_id), ndata);
+    GetGradientDevice(preds->DevicePointer(param_.gpu_id), info, iter,
+                      out_gpair->DevicePointer(param_.gpu_id), ndata);
  }

 private:
  void GetGradientDevice(float* preds,
                         const MetaInfo &info,
                         int iter,
-                         bst_gpair* out_gpair, size_t n) {
-    safe_cuda(cudaSetDevice(param_.gpu_id));
+                         GradientPair* out_gpair, size_t n) {
+    dh::safe_cuda(cudaSetDevice(param_.gpu_id));
    DeviceData& d = *data_;
-    d.label_correct.fill(1);
+    d.label_correct.Fill(1);
    // only copy the labels and weights once, similar to how the data is copied
    if (!copied_) {
-      thrust::copy(info.labels.begin(), info.labels.begin() + n,
+      thrust::copy(info.labels_.begin(), info.labels_.begin() + n,
                   d.labels.tbegin());
-      if (info.weights.size() > 0) {
-        thrust::copy(info.weights.begin(), info.weights.begin() + n,
+      if (info.weights_.size() > 0) {
+        thrust::copy(info.weights_.begin(), info.weights_.begin() + n,
                     d.weights.tbegin());
      }
      copied_ = true;
@ -151,11 +152,11 @@ class GPURegLossObj : public ObjFunction {

    // run the kernel
    const int block = 256;
-    get_gradient_k<Loss><<<div_round_up(n, block), block>>>
-      (out_gpair, d.label_correct.data(), preds,
-       d.labels.data(), info.weights.size() > 0 ? d.weights.data() : nullptr,
+    get_gradient_k<Loss><<<dh::DivRoundUp(n, block), block>>>
+      (out_gpair, d.label_correct.Data(), preds,
+       d.labels.Data(), info.weights_.size() > 0 ? d.weights.Data() : nullptr,
       n, param_.scale_pos_weight);
-    safe_cuda(cudaGetLastError());
+    dh::safe_cuda(cudaGetLastError());

    // copy output data from the GPU
    unsigned int label_correct_h;
@ -173,15 +174,15 @@ class GPURegLossObj : public ObjFunction {
  }

  void PredTransform(HostDeviceVector<float> *io_preds) override {
-    PredTransformDevice(io_preds->ptr_d(param_.gpu_id), io_preds->size());
+    PredTransformDevice(io_preds->DevicePointer(param_.gpu_id), io_preds->Size());
  }

  void PredTransformDevice(float* preds, size_t n) {
-    safe_cuda(cudaSetDevice(param_.gpu_id));
+    dh::safe_cuda(cudaSetDevice(param_.gpu_id));
    const int block = 256;
-    pred_transform_k<Loss><<<div_round_up(n, block), block>>>(preds, n);
-    safe_cuda(cudaGetLastError());
-    safe_cuda(cudaDeviceSynchronize());
+    pred_transform_k<Loss><<<dh::DivRoundUp(n, block), block>>>(preds, n);
+    dh::safe_cuda(cudaGetLastError());
+    dh::safe_cuda(cudaDeviceSynchronize());
  }


--- a/src/predictor/cpu_predictor.cc
+++ b/src/predictor/cpu_predictor.cc
@ -24,7 +24,7 @@ class CPUPredictor : public Predictor {
    for (size_t i = tree_begin; i < tree_end; ++i) {
      if (tree_info[i] == bst_group) {
        int tid = trees[i]->GetLeafIndex(*p_feats, root_index);
-        psum += (*trees[i])[tid].leaf_value();
+        psum += (*trees[i])[tid].LeafValue();
      }
    }
    p_feats->Drop(inst);
@ -45,35 +45,35 @@ class CPUPredictor : public Predictor {
                                std::vector<bst_float>* out_preds,
                                const gbm::GBTreeModel& model, int num_group,
                                unsigned tree_begin, unsigned tree_end) {
-    const MetaInfo& info = p_fmat->info();
+    const MetaInfo& info = p_fmat->Info();
    const int nthread = omp_get_max_threads();
    InitThreadTemp(nthread, model.param.num_feature);
    std::vector<bst_float>& preds = *out_preds;
    CHECK_EQ(model.param.size_leaf_vector, 0)
        << "size_leaf_vector is enforced to 0 so far";
-    CHECK_EQ(preds.size(), p_fmat->info().num_row * num_group);
+    CHECK_EQ(preds.size(), p_fmat->Info().num_row_ * num_group);
    // start collecting the prediction
    dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator();
    iter->BeforeFirst();
    while (iter->Next()) {
      const RowBatch& batch = iter->Value();
      // parallel over local batch
-      const int K = 8;
-      const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
-      const bst_omp_uint rest = nsize % K;
+      constexpr int kUnroll = 8;
+      const auto nsize = static_cast<bst_omp_uint>(batch.size);
+      const bst_omp_uint rest = nsize % kUnroll;
 #pragma omp parallel for schedule(static)
-      for (bst_omp_uint i = 0; i < nsize - rest; i += K) {
+      for (bst_omp_uint i = 0; i < nsize - rest; i += kUnroll) {
        const int tid = omp_get_thread_num();
        RegTree::FVec& feats = thread_temp[tid];
-        int64_t ridx[K];
-        RowBatch::Inst inst[K];
-        for (int k = 0; k < K; ++k) {
+        int64_t ridx[kUnroll];
+        RowBatch::Inst inst[kUnroll];
+        for (int k = 0; k < kUnroll; ++k) {
          ridx[k] = static_cast<int64_t>(batch.base_rowid + i + k);
        }
-        for (int k = 0; k < K; ++k) {
+        for (int k = 0; k < kUnroll; ++k) {
          inst[k] = batch[i + k];
        }
-        for (int k = 0; k < K; ++k) {
+        for (int k = 0; k < kUnroll; ++k) {
          for (int gid = 0; gid < num_group; ++gid) {
            const size_t offset = ridx[k] * num_group + gid;
            preds[offset] += this->PredValue(
@ -84,7 +84,7 @@ class CPUPredictor : public Predictor {
      }
      for (bst_omp_uint i = nsize - rest; i < nsize; ++i) {
        RegTree::FVec& feats = thread_temp[0];
-        const int64_t ridx = static_cast<int64_t>(batch.base_rowid + i);
+        const auto ridx = static_cast<int64_t>(batch.base_rowid + i);
        const RowBatch::Inst inst = batch[i];
        for (int gid = 0; gid < num_group; ++gid) {
          const size_t offset = ridx * num_group + gid;
@ -113,10 +113,10 @@ class CPUPredictor : public Predictor {
      auto it = cache_.find(dmat);
      if (it != cache_.end()) {
        HostDeviceVector<bst_float>& y = it->second.predictions;
-        if (y.size() != 0) {
-          out_preds->resize(y.size());
-          std::copy(y.data_h().begin(), y.data_h().end(),
-                    out_preds->data_h().begin());
+        if (y.Size() != 0) {
+          out_preds->Resize(y.Size());
+          std::copy(y.HostVector().begin(), y.HostVector().end(),
+                    out_preds->HostVector().begin());
          return true;
        }
      }
@ -127,12 +127,12 @@ class CPUPredictor : public Predictor {
  void InitOutPredictions(const MetaInfo& info,
                          HostDeviceVector<bst_float>* out_preds,
                          const gbm::GBTreeModel& model) const {
-    size_t n = model.param.num_output_group * info.num_row;
-    const std::vector<bst_float>& base_margin = info.base_margin;
-    out_preds->resize(n);
-    std::vector<bst_float>& out_preds_h = out_preds->data_h();
+    size_t n = model.param.num_output_group * info.num_row_;
+    const std::vector<bst_float>& base_margin = info.base_margin_;
+    out_preds->Resize(n);
+    std::vector<bst_float>& out_preds_h = out_preds->HostVector();
    if (base_margin.size() != 0) {
-      CHECK_EQ(out_preds->size(), n);
+      CHECK_EQ(out_preds->Size(), n);
      std::copy(base_margin.begin(), base_margin.end(), out_preds_h.begin());
    } else {
      std::fill(out_preds_h.begin(), out_preds_h.end(), model.base_margin);
@ -147,14 +147,14 @@ class CPUPredictor : public Predictor {
      return;
    }

-    this->InitOutPredictions(dmat->info(), out_preds, model);
+    this->InitOutPredictions(dmat->Info(), out_preds, model);

    ntree_limit *= model.param.num_output_group;
    if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
      ntree_limit = static_cast<unsigned>(model.trees.size());
    }

-    this->PredLoopInternal(dmat, &out_preds->data_h(), model,
+    this->PredLoopInternal(dmat, &out_preds->HostVector(), model,
                           tree_begin, ntree_limit);
  }

@ -167,9 +167,9 @@ class CPUPredictor : public Predictor {
    for (auto& kv : cache_) {
      PredictionCacheEntry& e = kv.second;

-      if (e.predictions.size() == 0) {
-        InitOutPredictions(e.data->info(), &(e.predictions), model);
-        PredLoopInternal(e.data.get(), &(e.predictions.data_h()), model, 0,
+      if (e.predictions.Size() == 0) {
+        InitOutPredictions(e.data->Info(), &(e.predictions), model);
+        PredLoopInternal(e.data.get(), &(e.predictions.HostVector()), model, 0,
                         model.trees.size());
      } else if (model.param.num_output_group == 1 && updaters->size() > 0 &&
                 num_new_trees == 1 &&
@ -177,7 +177,7 @@ class CPUPredictor : public Predictor {
                                                         &(e.predictions))) {
        {}  // do nothing
      } else {
-        PredLoopInternal(e.data.get(), &(e.predictions.data_h()), model, old_ntree,
+        PredLoopInternal(e.data.get(), &(e.predictions.HostVector()), model, old_ntree,
                         model.trees.size());
      }
    }
@ -209,25 +209,25 @@ class CPUPredictor : public Predictor {
                   const gbm::GBTreeModel& model, unsigned ntree_limit) override {
    const int nthread = omp_get_max_threads();
    InitThreadTemp(nthread, model.param.num_feature);
-    const MetaInfo& info = p_fmat->info();
+    const MetaInfo& info = p_fmat->Info();
    // number of valid trees
    ntree_limit *= model.param.num_output_group;
    if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
      ntree_limit = static_cast<unsigned>(model.trees.size());
    }
    std::vector<bst_float>& preds = *out_preds;
-    preds.resize(info.num_row * ntree_limit);
+    preds.resize(info.num_row_ * ntree_limit);
    // start collecting the prediction
    dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator();
    iter->BeforeFirst();
    while (iter->Next()) {
      const RowBatch& batch = iter->Value();
      // parallel over local batch
-      const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
+      const auto nsize = static_cast<bst_omp_uint>(batch.size);
 #pragma omp parallel for schedule(static)
      for (bst_omp_uint i = 0; i < nsize; ++i) {
        const int tid = omp_get_thread_num();
-        size_t ridx = static_cast<size_t>(batch.base_rowid + i);
+        auto ridx = static_cast<size_t>(batch.base_rowid + i);
        RegTree::FVec& feats = thread_temp[tid];
        feats.Fill(batch[i]);
        for (unsigned j = 0; j < ntree_limit; ++j) {
@ -246,7 +246,7 @@ class CPUPredictor : public Predictor {
                           unsigned condition_feature) override {
    const int nthread = omp_get_max_threads();
    InitThreadTemp(nthread,  model.param.num_feature);
-    const MetaInfo& info = p_fmat->info();
+    const MetaInfo& info = p_fmat->Info();
    // number of valid trees
    ntree_limit *= model.param.num_output_group;
    if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
@ -256,7 +256,7 @@ class CPUPredictor : public Predictor {
    size_t ncolumns = model.param.num_feature + 1;
    // allocate space for (number of features + bias) times the number of rows
    std::vector<bst_float>& contribs = *out_contribs;
-    contribs.resize(info.num_row * ncolumns * model.param.num_output_group);
+    contribs.resize(info.num_row_ * ncolumns * model.param.num_output_group);
    // make sure contributions is zeroed, we could be reusing a previously
    // allocated one
    std::fill(contribs.begin(), contribs.end(), 0);
@ -267,15 +267,15 @@ class CPUPredictor : public Predictor {
    }
    // start collecting the contributions
    dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator();
-    const std::vector<bst_float>& base_margin = info.base_margin;
+    const std::vector<bst_float>& base_margin = info.base_margin_;
    iter->BeforeFirst();
    while (iter->Next()) {
      const RowBatch& batch = iter->Value();
      // parallel over local batch
-      const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
+      const auto nsize = static_cast<bst_omp_uint>(batch.size);
 #pragma omp parallel for schedule(static)
      for (bst_omp_uint i = 0; i < nsize; ++i) {
-        size_t row_idx = static_cast<size_t>(batch.base_rowid + i);
+        auto row_idx = static_cast<size_t>(batch.base_rowid + i);
        unsigned root_id = info.GetRoot(row_idx);
        RegTree::FVec& feats = thread_temp[omp_get_thread_num()];
        // loop over all classes
@ -310,7 +310,7 @@ class CPUPredictor : public Predictor {
  void PredictInteractionContributions(DMatrix* p_fmat, std::vector<bst_float>* out_contribs,
                                       const gbm::GBTreeModel& model, unsigned ntree_limit,
                                       bool approximate) override {
-    const MetaInfo& info = p_fmat->info();
+    const MetaInfo& info = p_fmat->Info();
    const int ngroup = model.param.num_output_group;
    size_t ncolumns = model.param.num_feature;
    const unsigned row_chunk = ngroup * (ncolumns + 1) * (ncolumns + 1);
@ -319,10 +319,10 @@ class CPUPredictor : public Predictor {

    // allocate space for (number of features^2) times the number of rows and tmp off/on contribs
    std::vector<bst_float>& contribs = *out_contribs;
-    contribs.resize(info.num_row * ngroup * (ncolumns + 1) * (ncolumns + 1));
-    std::vector<bst_float> contribs_off(info.num_row * ngroup * (ncolumns + 1));
-    std::vector<bst_float> contribs_on(info.num_row * ngroup * (ncolumns + 1));
-    std::vector<bst_float> contribs_diag(info.num_row * ngroup * (ncolumns + 1));
+    contribs.resize(info.num_row_ * ngroup * (ncolumns + 1) * (ncolumns + 1));
+    std::vector<bst_float> contribs_off(info.num_row_ * ngroup * (ncolumns + 1));
+    std::vector<bst_float> contribs_on(info.num_row_ * ngroup * (ncolumns + 1));
+    std::vector<bst_float> contribs_diag(info.num_row_ * ngroup * (ncolumns + 1));

    // Compute the difference in effects when conditioning on each of the features on and off
    // see: Axiomatic characterizations of probabilistic and
@ -332,7 +332,7 @@ class CPUPredictor : public Predictor {
      PredictContribution(p_fmat, &contribs_off, model, ntree_limit, approximate, -1, i);
      PredictContribution(p_fmat, &contribs_on, model, ntree_limit, approximate, 1, i);

-      for (size_t j = 0; j < info.num_row; ++j) {
+      for (size_t j = 0; j < info.num_row_; ++j) {
        for (int l = 0; l < ngroup; ++l) {
          const unsigned o_offset = j * row_chunk + l * mrow_chunk + i * (ncolumns + 1);
          const unsigned c_offset = j * crow_chunk + l * (ncolumns + 1);
--- a/src/predictor/gpu_predictor.cu
+++ b/src/predictor/gpu_predictor.cu
@ -36,8 +36,8 @@ struct GPUPredictionParam : public dmlc::Parameter<GPUPredictionParam> {
 };
 DMLC_REGISTER_PARAMETER(GPUPredictionParam);

-template <typename iter_t>
-void increment_offset(iter_t begin_itr, iter_t end_itr, size_t amount) {
+template <typename IterT>
+void IncrementOffset(IterT begin_itr, IterT end_itr, size_t amount) {
  thrust::transform(begin_itr, end_itr, begin_itr,
                    [=] __device__(size_t elem) { return elem + amount; });
 }
@ -50,16 +50,16 @@ void increment_offset(iter_t begin_itr, iter_t end_itr, size_t amount) {

 struct DeviceMatrix {
  DMatrix* p_mat;  // Pointer to the original matrix on the host
-  dh::bulk_allocator<dh::memory_type::DEVICE> ba;
-  dh::dvec<size_t> row_ptr;
-  dh::dvec<SparseBatch::Entry> data;
+  dh::BulkAllocator<dh::MemoryType::kDevice> ba;
+  dh::DVec<size_t> row_ptr;
+  dh::DVec<SparseBatch::Entry> data;
  thrust::device_vector<float> predictions;

  DeviceMatrix(DMatrix* dmat, int device_idx, bool silent) : p_mat(dmat) {
    dh::safe_cuda(cudaSetDevice(device_idx));
-    auto info = dmat->info();
-    ba.allocate(device_idx, silent, &row_ptr, info.num_row + 1, &data,
-                info.num_nonzero);
+    auto info = dmat->Info();
+    ba.Allocate(device_idx, silent, &row_ptr, info.num_row_ + 1, &data,
+                info.num_nonzero_);
    auto iter = dmat->RowIterator();
    iter->BeforeFirst();
    size_t data_offset = 0;
@ -71,7 +71,7 @@ struct DeviceMatrix {
      if (batch.base_rowid > 0) {
        auto begin_itr = row_ptr.tbegin() + batch.base_rowid;
        auto end_itr = begin_itr + batch.size + 1;
-        increment_offset(begin_itr, end_itr, batch.base_rowid);
+        IncrementOffset(begin_itr, end_itr, batch.base_rowid);
      }
      // Copy data
      thrust::copy(batch.data_ptr, batch.data_ptr + batch.ind_ptr[batch.size],
@ -103,17 +103,17 @@ struct DevicePredictionNode {
  NodeValue val;

  DevicePredictionNode(const RegTree::Node& n) {  // NOLINT
-    this->left_child_idx = n.cleft();
-    this->right_child_idx = n.cright();
-    this->fidx = n.split_index();
-    if (n.default_left()) {
+    this->left_child_idx = n.LeftChild();
+    this->right_child_idx = n.RightChild();
+    this->fidx = n.SplitIndex();
+    if (n.DefaultLeft()) {
      fidx |= (1U << 31);
    }

-    if (n.is_leaf()) {
-      this->val.leaf_weight = n.leaf_value();
+    if (n.IsLeaf()) {
+      this->val.leaf_weight = n.LeafValue();
    } else {
-      this->val.fvalue = n.split_cond();
+      this->val.fvalue = n.SplitCond();
    }
  }

@ -155,7 +155,7 @@ struct ElementLoader {
    if (use_shared) {
      bst_uint global_idx = blockDim.x * blockIdx.x + threadIdx.x;
      int shared_elements = blockDim.x * num_features;
-      dh::block_fill(smem, shared_elements, nanf(""));
+      dh::BlockFill(smem, shared_elements, nanf(""));
      __syncthreads();
      if (global_idx < num_rows) {
        bst_uint elem_begin = d_row_ptr[global_idx];
@ -309,16 +309,16 @@ class GPUPredictor : public xgboost::Predictor {
    thrust::copy(model.tree_info.begin(), model.tree_info.end(),
                 tree_group.begin());

-    device_matrix->predictions.resize(out_preds->size());
+    device_matrix->predictions.resize(out_preds->Size());
    thrust::copy(out_preds->tbegin(param.gpu_id), out_preds->tend(param.gpu_id),
                 device_matrix->predictions.begin());

    const int BLOCK_THREADS = 128;
    const int GRID_SIZE = static_cast<int>(
-        dh::div_round_up(device_matrix->row_ptr.size() - 1, BLOCK_THREADS));
+        dh::DivRoundUp(device_matrix->row_ptr.Size() - 1, BLOCK_THREADS));

    int shared_memory_bytes = static_cast<int>(
-        sizeof(float) * device_matrix->p_mat->info().num_col * BLOCK_THREADS);
+        sizeof(float) * device_matrix->p_mat->Info().num_col_ * BLOCK_THREADS);
    bool use_shared = true;
    if (shared_memory_bytes > max_shared_memory_bytes) {
      shared_memory_bytes = 0;
@ -327,11 +327,11 @@ class GPUPredictor : public xgboost::Predictor {

    PredictKernel<BLOCK_THREADS>
        <<<GRID_SIZE, BLOCK_THREADS, shared_memory_bytes>>>(
-            dh::raw(nodes), dh::raw(device_matrix->predictions),
-            dh::raw(tree_segments), dh::raw(tree_group),
-            device_matrix->row_ptr.data(), device_matrix->data.data(),
-            tree_begin, tree_end, device_matrix->p_mat->info().num_col,
-            device_matrix->p_mat->info().num_row, use_shared,
+            dh::Raw(nodes), dh::Raw(device_matrix->predictions),
+            dh::Raw(tree_segments), dh::Raw(tree_group),
+            device_matrix->row_ptr.Data(), device_matrix->data.Data(),
+            tree_begin, tree_end, device_matrix->p_mat->Info().num_col_,
+            device_matrix->p_mat->Info().num_row_, use_shared,
            model.param.num_output_group);

    dh::safe_cuda(cudaDeviceSynchronize());
@ -349,7 +349,7 @@ class GPUPredictor : public xgboost::Predictor {
    if (this->PredictFromCache(dmat, out_preds, model, ntree_limit)) {
      return;
    }
-    this->InitOutPredictions(dmat->info(), out_preds, model);
+    this->InitOutPredictions(dmat->Info(), out_preds, model);

    int tree_end = ntree_limit * model.param.num_output_group;

@ -364,11 +364,11 @@ class GPUPredictor : public xgboost::Predictor {
  void InitOutPredictions(const MetaInfo& info,
                          HostDeviceVector<bst_float>* out_preds,
                          const gbm::GBTreeModel& model) const {
-    size_t n = model.param.num_output_group * info.num_row;
-    const std::vector<bst_float>& base_margin = info.base_margin;
-    out_preds->resize(n, 0.0f, param.gpu_id);
+    size_t n = model.param.num_output_group * info.num_row_;
+    const std::vector<bst_float>& base_margin = info.base_margin_;
+    out_preds->Resize(n, 0.0f, param.gpu_id);
    if (base_margin.size() != 0) {
-      CHECK_EQ(out_preds->size(), n);
+      CHECK_EQ(out_preds->Size(), n);
      thrust::copy(base_margin.begin(), base_margin.end(),
                   out_preds->tbegin(param.gpu_id));
    } else {
@ -384,12 +384,12 @@ class GPUPredictor : public xgboost::Predictor {
      auto it = cache_.find(dmat);
      if (it != cache_.end()) {
        HostDeviceVector<bst_float>& y = it->second.predictions;
-        if (y.size() != 0) {
+        if (y.Size() != 0) {
          dh::safe_cuda(cudaSetDevice(param.gpu_id));
-          out_preds->resize(y.size(), 0.0f, param.gpu_id);
+          out_preds->Resize(y.Size(), 0.0f, param.gpu_id);
          dh::safe_cuda(cudaMemcpy(
-              out_preds->ptr_d(param.gpu_id), y.ptr_d(param.gpu_id),
-              out_preds->size() * sizeof(bst_float), cudaMemcpyDefault));
+              out_preds->DevicePointer(param.gpu_id), y.DevicePointer(param.gpu_id),
+              out_preds->Size() * sizeof(bst_float), cudaMemcpyDefault));
          return true;
        }
      }
@ -409,9 +409,9 @@ class GPUPredictor : public xgboost::Predictor {
      DMatrix* dmat = kv.first;
      HostDeviceVector<bst_float>& predictions = e.predictions;

-      if (predictions.size() == 0) {
+      if (predictions.Size() == 0) {
        // ensure that the device in predictions is correct
-        predictions.resize(0, 0.0f, param.gpu_id);
+        predictions.Resize(0, 0.0f, param.gpu_id);
        cpu_predictor->PredictBatch(dmat, &predictions, model, 0,
                                    static_cast<bst_uint>(model.trees.size()));
      } else if (model.param.num_output_group == 1 && updaters->size() > 0 &&
@ -462,7 +462,7 @@ class GPUPredictor : public xgboost::Predictor {
    Predictor::Init(cfg, cache);
    cpu_predictor->Init(cfg, cache);
    param.InitAllowUnknown(cfg);
-    max_shared_memory_bytes = dh::max_shared_memory(param.gpu_id);
+    max_shared_memory_bytes = dh::MaxSharedMemory(param.gpu_id);
  }

 private:
--- a/src/predictor/predictor.cc
+++ b/src/predictor/predictor.cc
@ -11,8 +11,9 @@ namespace xgboost {
 void Predictor::Init(
    const std::vector<std::pair<std::string, std::string>>& cfg,
    const std::vector<std::shared_ptr<DMatrix>>& cache) {
-  for (const std::shared_ptr<DMatrix>& d : cache)
+  for (const std::shared_ptr<DMatrix>& d : cache) {
    cache_[d.get()].data = d;
+  }
 }
 Predictor* Predictor::Create(std::string name) {
  auto* e = ::dmlc::Registry<PredictorReg>::Get()->Find(name);
--- a/src/tree/fast_hist_param.h
+++ b/src/tree/fast_hist_param.h
@ -13,7 +13,7 @@ namespace tree {
 /*! \brief training parameters for histogram-based training */
 struct FastHistParam : public dmlc::Parameter<FastHistParam> {
  // integral data type to be used with columnar data storage
-  enum class DataType { uint8 = 1, uint16 = 2, uint32 = 4 };
+  enum class DataType { uint8 = 1, uint16 = 2, uint32 = 4 };  // NOLINT
  int colmat_dtype;
  // percentage threshold for treating a feature as sparse
  // e.g. 0.2 indicates a feature with fewer than 20% nonzeros is considered sparse
--- a/src/tree/param.h
+++ b/src/tree/param.h
@ -190,26 +190,26 @@ struct TrainParam : public dmlc::Parameter<TrainParam> {
    DMLC_DECLARE_ALIAS(learning_rate, eta);
  }
  /*! \brief whether need forward small to big search: default right */
-  inline bool need_forward_search(float col_density, bool indicator) const {
+  inline bool NeedForwardSearch(float col_density, bool indicator) const {
    return this->default_direction == 2 ||
           (default_direction == 0 && (col_density < opt_dense_col) &&
            !indicator);
  }
  /*! \brief whether need backward big to small search: default left */
-  inline bool need_backward_search(float col_density, bool indicator) const {
+  inline bool NeedBackwardSearch(float col_density, bool indicator) const {
    return this->default_direction != 2;
  }
  /*! \brief given the loss change, whether we need to invoke pruning */
-  inline bool need_prune(double loss_chg, int depth) const {
+  inline bool NeedPrune(double loss_chg, int depth) const {
    return loss_chg < this->min_split_loss;
  }
  /*! \brief whether we can split with current hessian */
-  inline bool cannot_split(double sum_hess, int depth) const {
+  inline bool CannotSplit(double sum_hess, int depth) const {
    return sum_hess < this->min_child_weight * 2.0;
  }
  /*! \brief maximum sketch size */
-  inline unsigned max_sketch_size() const {
-    unsigned ret = static_cast<unsigned>(sketch_ratio / sketch_eps);
+  inline unsigned MaxSketchSize() const {
+    auto ret = static_cast<unsigned>(sketch_ratio / sketch_eps);
    CHECK_GT(ret, 0U);
    return ret;
  }
@ -220,10 +220,12 @@ struct TrainParam : public dmlc::Parameter<TrainParam> {
 // functions for L1 cost
 template <typename T1, typename T2>
 XGBOOST_DEVICE inline static T1 ThresholdL1(T1 w, T2 lambda) {
-  if (w > +lambda)
+  if (w > +lambda) {
    return w - lambda;
-  if (w < -lambda)
+  }
+  if (w < -lambda) {
    return w + lambda;
+  }
  return 0.0;
 }

@ -240,8 +242,9 @@ XGBOOST_DEVICE inline T CalcGainGivenWeight(const TrainingParams &p, T sum_grad,
 // calculate the cost of loss function
 template <typename TrainingParams, typename T>
 XGBOOST_DEVICE inline T CalcGain(const TrainingParams &p, T sum_grad, T sum_hess) {
-  if (sum_hess < p.min_child_weight)
+  if (sum_hess < p.min_child_weight) {
    return T(0.0);
+}
  if (p.max_delta_step == 0.0f) {
    if (p.reg_alpha == 0.0f) {
      return Sqr(sum_grad) / (sum_hess + p.reg_lambda);
@ -276,8 +279,9 @@ XGBOOST_DEVICE inline T CalcGain(const TrainingParams &p, T sum_grad, T sum_hess
 template <typename TrainingParams, typename T>
 XGBOOST_DEVICE inline T CalcWeight(const TrainingParams &p, T sum_grad,
                               T sum_hess) {
-  if (sum_hess < p.min_child_weight)
+  if (sum_hess < p.min_child_weight) {
    return 0.0;
+}
  T dw;
  if (p.reg_alpha == 0.0f) {
    dw = -sum_grad / (sum_hess + p.reg_lambda);
@ -285,16 +289,18 @@ XGBOOST_DEVICE inline T CalcWeight(const TrainingParams &p, T sum_grad,
    dw = -ThresholdL1(sum_grad, p.reg_alpha) / (sum_hess + p.reg_lambda);
  }
  if (p.max_delta_step != 0.0f) {
-    if (dw > p.max_delta_step)
+    if (dw > p.max_delta_step) {
      dw = p.max_delta_step;
-    if (dw < -p.max_delta_step)
+}
+    if (dw < -p.max_delta_step) {
      dw = -p.max_delta_step;
+}
  }
  return dw;
 }

-template <typename TrainingParams, typename gpair_t>
-XGBOOST_DEVICE inline float CalcWeight(const TrainingParams &p, gpair_t sum_grad) {
+template <typename TrainingParams, typename GpairT>
+XGBOOST_DEVICE inline float CalcWeight(const TrainingParams &p, GpairT sum_grad) {
  return CalcWeight(p, sum_grad.GetGrad(), sum_grad.GetHess());
 }

@ -312,8 +318,8 @@ struct XGBOOST_ALIGNAS(16) GradStats {
  /*! \brief constructor, the object must be cleared during construction */
  explicit GradStats(const TrainParam& param) { this->Clear(); }

-  template <typename gpair_t>
-  XGBOOST_DEVICE explicit GradStats(const gpair_t &sum)
+  template <typename GpairT>
+  XGBOOST_DEVICE explicit GradStats(const GpairT &sum)
      : sum_grad(sum.GetGrad()), sum_hess(sum.GetHess()) {}
  /*! \brief clear the statistics */
  inline void Clear() { sum_grad = sum_hess = 0.0f; }
@ -323,26 +329,26 @@ struct XGBOOST_ALIGNAS(16) GradStats {
   * \brief accumulate statistics
   * \param p the gradient pair
   */
-  inline void Add(bst_gpair p) { this->Add(p.GetGrad(), p.GetHess()); }
+  inline void Add(GradientPair p) { this->Add(p.GetGrad(), p.GetHess()); }
  /*!
   * \brief accumulate statistics, more complicated version
   * \param gpair the vector storing the gradient statistics
   * \param info the additional information
   * \param ridx instance index of this instance
   */
-  inline void Add(const std::vector<bst_gpair>& gpair, const MetaInfo& info,
+  inline void Add(const std::vector<GradientPair>& gpair, const MetaInfo& info,
                  bst_uint ridx) {
-    const bst_gpair& b = gpair[ridx];
+    const GradientPair& b = gpair[ridx];
    this->Add(b.GetGrad(), b.GetHess());
  }
  /*! \brief calculate leaf weight */
-  template <typename param_t>
-  XGBOOST_DEVICE inline double CalcWeight(const param_t &param) const {
+  template <typename ParamT>
+  XGBOOST_DEVICE inline double CalcWeight(const ParamT &param) const {
    return xgboost::tree::CalcWeight(param, sum_grad, sum_hess);
  }
  /*! \brief calculate gain of the solution */
-template <typename param_t>
-  inline double CalcGain(const param_t& param) const {
+template <typename ParamT>
+  inline double CalcGain(const ParamT& param) const {
    return xgboost::tree::CalcGain(param, sum_grad, sum_hess);
  }
  /*! \brief add statistics to the data */
@ -364,7 +370,7 @@ template <typename param_t>
  /*! \brief set leaf vector value based on statistics */
  inline void SetLeafVec(const TrainParam& param, bst_float* vec) const {}
  // constructor to allow inheritance
-  GradStats() {}
+  GradStats() = default;
  /*! \brief add statistics to the data */
  inline void Add(double grad, double hess) {
    sum_grad += grad;
@ -400,8 +406,8 @@ struct ValueConstraint {
  inline static void Init(TrainParam *param, unsigned num_feature) {
    param->monotone_constraints.resize(num_feature, 0);
  }
-template <typename param_t>
-  XGBOOST_DEVICE inline double CalcWeight(const param_t &param, GradStats stats) const {
+template <typename ParamT>
+  XGBOOST_DEVICE inline double CalcWeight(const ParamT &param, GradStats stats) const {
    double w = stats.CalcWeight(param);
    if (w < lower_bound) {
      return lower_bound;
@ -412,14 +418,14 @@ template <typename param_t>
    return w;
  }

-template <typename param_t>
-  XGBOOST_DEVICE inline double CalcGain(const param_t &param, GradStats stats) const {
+template <typename ParamT>
+  XGBOOST_DEVICE inline double CalcGain(const ParamT &param, GradStats stats) const {
    return CalcGainGivenWeight(param, stats.sum_grad, stats.sum_hess,
                               CalcWeight(param, stats));
  }

-template <typename param_t>
-  XGBOOST_DEVICE inline double CalcSplitGain(const param_t &param, int constraint,
+template <typename ParamT>
+  XGBOOST_DEVICE inline double CalcSplitGain(const ParamT &param, int constraint,
                              GradStats left, GradStats right) const {
    const double negative_infinity = -std::numeric_limits<double>::infinity();
    double wleft = CalcWeight(param, left);
@ -442,8 +448,9 @@ template <typename param_t>
    int c = param.monotone_constraints.at(split_index);
    *cleft = *this;
    *cright = *this;
-    if (c == 0)
+    if (c == 0) {
      return;
+}
    double wleft = CalcWeight(param, left);
    double wright = CalcWeight(param, right);
    double mid = (wleft + wright) / 2;
@ -464,13 +471,13 @@ template <typename param_t>
 */
 struct SplitEntry {
  /*! \brief loss change after split this node */
-  bst_float loss_chg;
+  bst_float loss_chg{0.0f};
  /*! \brief split index */
-  unsigned sindex;
+  unsigned sindex{0};
  /*! \brief split value */
-  bst_float split_value;
+  bst_float split_value{0.0f};
  /*! \brief constructor */
-  SplitEntry() : loss_chg(0.0f), sindex(0), split_value(0.0f) {}
+  SplitEntry()  = default;
  /*!
   * \brief decides whether we can replace current entry with the given
   * statistics
@ -482,7 +489,7 @@ struct SplitEntry {
   * \param split_index the feature index where the split is on
   */
  inline bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const {
-    if (this->split_index() <= split_index) {
+    if (this->SplitIndex() <= split_index) {
      return new_loss_chg > this->loss_chg;
    } else {
      return !(this->loss_chg > new_loss_chg);
@ -494,7 +501,7 @@ struct SplitEntry {
   * \return whether the proposed split is better and can replace current split
   */
  inline bool Update(const SplitEntry &e) {
-    if (this->NeedReplace(e.loss_chg, e.split_index())) {
+    if (this->NeedReplace(e.loss_chg, e.SplitIndex())) {
      this->loss_chg = e.loss_chg;
      this->sindex = e.sindex;
      this->split_value = e.split_value;
@ -515,8 +522,9 @@ struct SplitEntry {
                     bst_float new_split_value, bool default_left) {
    if (this->NeedReplace(new_loss_chg, split_index)) {
      this->loss_chg = new_loss_chg;
-      if (default_left)
+      if (default_left) {
        split_index |= (1U << 31);
+}
      this->sindex = split_index;
      this->split_value = new_split_value;
      return true;
@ -530,9 +538,9 @@ struct SplitEntry {
    dst.Update(src);
  }
  /*!\return feature index to split on */
-  inline unsigned split_index() const { return sindex & ((1U << 31) - 1U); }
+  inline unsigned SplitIndex() const { return sindex & ((1U << 31) - 1U); }
  /*!\return whether missing value goes to left branch */
-  inline bool default_left() const { return (sindex >> 31) != 0; }
+  inline bool DefaultLeft() const { return (sindex >> 31) != 0; }
 };

 }  // namespace tree
@ -542,14 +550,16 @@ struct SplitEntry {
 namespace std {
 inline std::ostream &operator<<(std::ostream &os, const std::vector<int> &t) {
  os << '(';
-  for (std::vector<int>::const_iterator it = t.begin(); it != t.end(); ++it) {
-    if (it != t.begin())
+  for (auto it = t.begin(); it != t.end(); ++it) {
+    if (it != t.begin()) {
      os << ',';
+}
    os << *it;
  }
  // python style tuple
-  if (t.size() == 1)
+  if (t.size() == 1) {
    os << ',';
+}
  os << ')';
  return os;
 }
@ -566,8 +576,9 @@ inline std::istream &operator>>(std::istream &is, std::vector<int> &t) {
      return is;
    }
    is.get();
-    if (ch == '(')
+    if (ch == '(') {
      break;
+}
    if (!isspace(ch)) {
      is.setstate(std::ios::failbit);
      return is;
@ -597,8 +608,9 @@ inline std::istream &operator>>(std::istream &is, std::vector<int> &t) {
        }
        break;
      }
-      if (ch == ')')
+      if (ch == ')') {
        break;
+}
    } else if (ch == ')') {
      break;
    } else {
--- a/src/tree/tree_model.cc
+++ b/src/tree/tree_model.cc
@ -21,45 +21,53 @@ void DumpRegTree(std::stringstream& fo,  // NOLINT(*)
                 int nid, int depth, int add_comma,
                 bool with_stats, std::string format) {
  if (format == "json") {
-    if (add_comma) fo << ",";
-    if (depth != 0) fo << std::endl;
-    for (int i = 0; i < depth+1; ++i) fo << "  ";
-  } else {
-    for (int i = 0; i < depth; ++i) fo << '\t';
+    if (add_comma) {
+      fo << ",";
    }
-  if (tree[nid].is_leaf()) {
+    if (depth != 0) {
+      fo << std::endl;
+    }
+    for (int i = 0; i < depth + 1; ++i) {
+      fo << "  ";
+    }
+  } else {
+    for (int i = 0; i < depth; ++i) {
+      fo << '\t';
+    }
+  }
+  if (tree[nid].IsLeaf()) {
    if (format == "json") {
      fo << "{ \"nodeid\": " << nid
-         << ", \"leaf\": " << tree[nid].leaf_value();
+         << ", \"leaf\": " << tree[nid].LeafValue();
      if (with_stats) {
-        fo << ", \"cover\": " << tree.stat(nid).sum_hess;
+        fo << ", \"cover\": " << tree.Stat(nid).sum_hess;
      }
      fo << " }";
    } else {
-      fo << nid << ":leaf=" << tree[nid].leaf_value();
+      fo << nid << ":leaf=" << tree[nid].LeafValue();
      if (with_stats) {
-        fo << ",cover=" << tree.stat(nid).sum_hess;
+        fo << ",cover=" << tree.Stat(nid).sum_hess;
      }
      fo << '\n';
    }
  } else {
    // right then left,
-    bst_float cond = tree[nid].split_cond();
-    const unsigned split_index = tree[nid].split_index();
-    if (split_index < fmap.size()) {
+    bst_float cond = tree[nid].SplitCond();
+    const unsigned split_index = tree[nid].SplitIndex();
+    if (split_index < fmap.Size()) {
      switch (fmap.type(split_index)) {
        case FeatureMap::kIndicator: {
-          int nyes = tree[nid].default_left() ?
-              tree[nid].cright() : tree[nid].cleft();
+          int nyes = tree[nid].DefaultLeft() ?
+              tree[nid].RightChild() : tree[nid].LeftChild();
          if (format == "json") {
            fo << "{ \"nodeid\": " << nid
               << ", \"depth\": " << depth
-               << ", \"split\": \"" << fmap.name(split_index) << "\""
+               << ", \"split\": \"" << fmap.Name(split_index) << "\""
               << ", \"yes\": " << nyes
-               << ", \"no\": " << tree[nid].cdefault();
+               << ", \"no\": " << tree[nid].DefaultChild();
          } else {
-            fo << nid << ":[" << fmap.name(split_index) << "] yes=" << nyes
-               << ",no=" << tree[nid].cdefault();
+            fo << nid << ":[" << fmap.Name(split_index) << "] yes=" << nyes
+               << ",no=" << tree[nid].DefaultChild();
          }
          break;
        }
@ -67,17 +75,17 @@ void DumpRegTree(std::stringstream& fo,  // NOLINT(*)
          if (format == "json") {
            fo << "{ \"nodeid\": " << nid
               << ", \"depth\": " << depth
-               << ", \"split\": \"" << fmap.name(split_index) << "\""
+               << ", \"split\": \"" << fmap.Name(split_index) << "\""
               << ", \"split_condition\": " << int(cond + 1.0)
-               << ", \"yes\": " << tree[nid].cleft()
-               << ", \"no\": " << tree[nid].cright()
-               << ", \"missing\": " << tree[nid].cdefault();
+               << ", \"yes\": " << tree[nid].LeftChild()
+               << ", \"no\": " << tree[nid].RightChild()
+               << ", \"missing\": " << tree[nid].DefaultChild();
          } else {
-            fo << nid << ":[" << fmap.name(split_index) << "<"
+            fo << nid << ":[" << fmap.Name(split_index) << "<"
               << int(cond + 1.0)
-               << "] yes=" << tree[nid].cleft()
-               << ",no=" << tree[nid].cright()
-               << ",missing=" << tree[nid].cdefault();
+               << "] yes=" << tree[nid].LeftChild()
+               << ",no=" << tree[nid].RightChild()
+               << ",missing=" << tree[nid].DefaultChild();
          }
          break;
        }
@ -86,16 +94,16 @@ void DumpRegTree(std::stringstream& fo,  // NOLINT(*)
          if (format == "json") {
            fo << "{ \"nodeid\": " << nid
               << ", \"depth\": " << depth
-               << ", \"split\": \"" << fmap.name(split_index) << "\""
+               << ", \"split\": \"" << fmap.Name(split_index) << "\""
               << ", \"split_condition\": " << cond
-               << ", \"yes\": " << tree[nid].cleft()
-               << ", \"no\": " << tree[nid].cright()
-               << ", \"missing\": " << tree[nid].cdefault();
+               << ", \"yes\": " << tree[nid].LeftChild()
+               << ", \"no\": " << tree[nid].RightChild()
+               << ", \"missing\": " << tree[nid].DefaultChild();
          } else {
-            fo << nid << ":[" << fmap.name(split_index) << "<" << cond
-               << "] yes=" << tree[nid].cleft()
-               << ",no=" << tree[nid].cright()
-               << ",missing=" << tree[nid].cdefault();
+            fo << nid << ":[" << fmap.Name(split_index) << "<" << cond
+               << "] yes=" << tree[nid].LeftChild()
+               << ",no=" << tree[nid].RightChild()
+               << ",missing=" << tree[nid].DefaultChild();
          }
          break;
        }
@ -107,22 +115,22 @@ void DumpRegTree(std::stringstream& fo,  // NOLINT(*)
           << ", \"depth\": " << depth
           << ", \"split\": " << split_index
           << ", \"split_condition\": " << cond
-           << ", \"yes\": " << tree[nid].cleft()
-           << ", \"no\": " << tree[nid].cright()
-           << ", \"missing\": " << tree[nid].cdefault();
+           << ", \"yes\": " << tree[nid].LeftChild()
+           << ", \"no\": " << tree[nid].RightChild()
+           << ", \"missing\": " << tree[nid].DefaultChild();
      } else {
        fo << nid << ":[f" << split_index << "<"<< cond
-           << "] yes=" << tree[nid].cleft()
-           << ",no=" << tree[nid].cright()
-           << ",missing=" << tree[nid].cdefault();
+           << "] yes=" << tree[nid].LeftChild()
+           << ",no=" << tree[nid].RightChild()
+           << ",missing=" << tree[nid].DefaultChild();
      }
    }
    if (with_stats) {
      if (format == "json") {
-        fo << ", \"gain\": " << tree.stat(nid).loss_chg
-           << ", \"cover\": " << tree.stat(nid).sum_hess;
+        fo << ", \"gain\": " << tree.Stat(nid).loss_chg
+           << ", \"cover\": " << tree.Stat(nid).sum_hess;
      } else {
-        fo << ",gain=" << tree.stat(nid).loss_chg << ",cover=" << tree.stat(nid).sum_hess;
+        fo << ",gain=" << tree.Stat(nid).loss_chg << ",cover=" << tree.Stat(nid).sum_hess;
      }
    }
    if (format == "json") {
@ -130,11 +138,13 @@ void DumpRegTree(std::stringstream& fo,  // NOLINT(*)
    } else {
      fo << '\n';
    }
-    DumpRegTree(fo, tree, fmap, tree[nid].cleft(), depth + 1, false, with_stats, format);
-    DumpRegTree(fo, tree, fmap, tree[nid].cright(), depth + 1, true, with_stats, format);
+    DumpRegTree(fo, tree, fmap, tree[nid].LeftChild(), depth + 1, false, with_stats, format);
+    DumpRegTree(fo, tree, fmap, tree[nid].RightChild(), depth + 1, true, with_stats, format);
    if (format == "json") {
      fo << std::endl;
-      for (int i = 0; i < depth+1; ++i) fo << "  ";
+      for (int i = 0; i < depth + 1; ++i) {
+        fo << "  ";
+      }
      fo << "]}";
    }
  }
--- a/src/tree/updater_basemaker-inl.h
+++ b/src/tree/updater_basemaker-inl.h
@ -29,7 +29,7 @@ namespace tree {
 class BaseMaker: public TreeUpdater {
 public:
  void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
-    param.InitAllowUnknown(args);
+    param_.InitAllowUnknown(args);
  }

 protected:
@ -39,8 +39,8 @@ class BaseMaker: public TreeUpdater {
    /*! \brief find type of each feature, use column format */
    inline void InitByCol(DMatrix* p_fmat,
                          const RegTree& tree) {
-      fminmax.resize(tree.param.num_feature * 2);
-      std::fill(fminmax.begin(), fminmax.end(),
+      fminmax_.resize(tree.param.num_feature * 2);
+      std::fill(fminmax_.begin(), fminmax_.end(),
                -std::numeric_limits<bst_float>::max());
      // start accumulating statistics
      dmlc::DataIter<ColBatch>* iter = p_fmat->ColIterator();
@ -51,22 +51,22 @@ class BaseMaker: public TreeUpdater {
          const bst_uint fid = batch.col_index[i];
          const ColBatch::Inst& c = batch[i];
          if (c.length != 0) {
-            fminmax[fid * 2 + 0] = std::max(-c[0].fvalue, fminmax[fid * 2 + 0]);
-            fminmax[fid * 2 + 1] = std::max(c[c.length - 1].fvalue, fminmax[fid * 2 + 1]);
+            fminmax_[fid * 2 + 0] = std::max(-c[0].fvalue, fminmax_[fid * 2 + 0]);
+            fminmax_[fid * 2 + 1] = std::max(c[c.length - 1].fvalue, fminmax_[fid * 2 + 1]);
          }
        }
      }
    }
    /*! \brief synchronize the information */
    inline void SyncInfo() {
-      rabit::Allreduce<rabit::op::Max>(dmlc::BeginPtr(fminmax), fminmax.size());
+      rabit::Allreduce<rabit::op::Max>(dmlc::BeginPtr(fminmax_), fminmax_.size());
    }
    // get feature type, 0:empty 1:binary 2:real
    inline int Type(bst_uint fid) const {
-      CHECK_LT(fid * 2 + 1, fminmax.size())
+      CHECK_LT(fid * 2 + 1, fminmax_.size())
          << "FeatHelper fid exceed query bound ";
-      bst_float a = fminmax[fid * 2];
-      bst_float b = fminmax[fid * 2 + 1];
+      bst_float a = fminmax_[fid * 2];
+      bst_float b = fminmax_[fid * 2 + 1];
      if (a == -std::numeric_limits<bst_float>::max()) return 0;
      if (-a == b) {
        return 1;
@ -75,16 +75,16 @@ class BaseMaker: public TreeUpdater {
      }
    }
    inline bst_float MaxValue(bst_uint fid) const {
-      return fminmax[fid *2 + 1];
+      return fminmax_[fid *2 + 1];
    }
    inline void SampleCol(float p, std::vector<bst_uint> *p_findex) const {
      std::vector<bst_uint> &findex = *p_findex;
      findex.clear();
-      for (size_t i = 0; i < fminmax.size(); i += 2) {
-        const bst_uint fid = static_cast<bst_uint>(i / 2);
+      for (size_t i = 0; i < fminmax_.size(); i += 2) {
+        const auto fid = static_cast<bst_uint>(i / 2);
        if (this->Type(fid) != 0) findex.push_back(fid);
      }
-      unsigned n = static_cast<unsigned>(p * findex.size());
+      auto n = static_cast<unsigned>(p * findex.size());
      std::shuffle(findex.begin(), findex.end(), common::GlobalRandom());
      findex.resize(n);
      // sync the findex if it is subsample
@ -99,64 +99,64 @@ class BaseMaker: public TreeUpdater {
    }

   private:
-    std::vector<bst_float> fminmax;
+    std::vector<bst_float> fminmax_;
  };
  // ------static helper functions ------
  // helper function to get to next level of the tree
  /*! \brief this is  helper function for row based data*/
  inline static int NextLevel(const RowBatch::Inst &inst, const RegTree &tree, int nid) {
    const RegTree::Node &n = tree[nid];
-    bst_uint findex = n.split_index();
+    bst_uint findex = n.SplitIndex();
    for (unsigned i = 0; i < inst.length; ++i) {
      if (findex == inst[i].index) {
-        if (inst[i].fvalue < n.split_cond()) {
-          return n.cleft();
+        if (inst[i].fvalue < n.SplitCond()) {
+          return n.LeftChild();
        } else {
-          return n.cright();
+          return n.RightChild();
        }
      }
    }
-    return n.cdefault();
+    return n.DefaultChild();
  }
  //  ------class member helpers---------
  /*! \brief initialize temp data structure */
-  inline void InitData(const std::vector<bst_gpair> &gpair,
+  inline void InitData(const std::vector<GradientPair> &gpair,
                       const DMatrix &fmat,
                       const RegTree &tree) {
    CHECK_EQ(tree.param.num_nodes, tree.param.num_roots)
        << "TreeMaker: can only grow new tree";
-    const std::vector<unsigned> &root_index =  fmat.info().root_index;
+    const std::vector<unsigned> &root_index =  fmat.Info().root_index_;
    {
      // setup position
-      position.resize(gpair.size());
+      position_.resize(gpair.size());
      if (root_index.size() == 0) {
-        std::fill(position.begin(), position.end(), 0);
+        std::fill(position_.begin(), position_.end(), 0);
      } else {
-        for (size_t i = 0; i < position.size(); ++i) {
-          position[i] = root_index[i];
+        for (size_t i = 0; i < position_.size(); ++i) {
+          position_[i] = root_index[i];
          CHECK_LT(root_index[i], (unsigned)tree.param.num_roots)
              << "root index exceed setting";
        }
      }
      // mark delete for the deleted datas
-      for (size_t i = 0; i < position.size(); ++i) {
-        if (gpair[i].GetHess() < 0.0f) position[i] = ~position[i];
+      for (size_t i = 0; i < position_.size(); ++i) {
+        if (gpair[i].GetHess() < 0.0f) position_[i] = ~position_[i];
      }
      // mark subsample
-      if (param.subsample < 1.0f) {
-        std::bernoulli_distribution coin_flip(param.subsample);
+      if (param_.subsample < 1.0f) {
+        std::bernoulli_distribution coin_flip(param_.subsample);
        auto& rnd = common::GlobalRandom();
-        for (size_t i = 0; i < position.size(); ++i) {
+        for (size_t i = 0; i < position_.size(); ++i) {
          if (gpair[i].GetHess() < 0.0f) continue;
-          if (!coin_flip(rnd)) position[i] = ~position[i];
+          if (!coin_flip(rnd)) position_[i] = ~position_[i];
        }
      }
    }
    {
      // expand query
-      qexpand.reserve(256); qexpand.clear();
+      qexpand_.reserve(256); qexpand_.clear();
      for (int i = 0; i < tree.param.num_roots; ++i) {
-        qexpand.push_back(i);
+        qexpand_.push_back(i);
      }
      this->UpdateNode2WorkIndex(tree);
    }
@ -164,28 +164,27 @@ class BaseMaker: public TreeUpdater {
  /*! \brief update queue expand add in new leaves */
  inline void UpdateQueueExpand(const RegTree &tree) {
    std::vector<int> newnodes;
-    for (size_t i = 0; i < qexpand.size(); ++i) {
-      const int nid = qexpand[i];
-      if (!tree[nid].is_leaf()) {
-        newnodes.push_back(tree[nid].cleft());
-        newnodes.push_back(tree[nid].cright());
+    for (int nid : qexpand_) {
+      if (!tree[nid].IsLeaf()) {
+        newnodes.push_back(tree[nid].LeftChild());
+        newnodes.push_back(tree[nid].RightChild());
      }
    }
    // use new nodes for qexpand
-    qexpand = newnodes;
+    qexpand_ = newnodes;
    this->UpdateNode2WorkIndex(tree);
  }
  // return decoded position
  inline int DecodePosition(bst_uint ridx) const {
-    const int pid = position[ridx];
+    const int pid = position_[ridx];
    return pid < 0 ? ~pid : pid;
  }
  // encode the encoded position value for ridx
  inline void SetEncodePosition(bst_uint ridx, int nid) {
-    if (position[ridx] < 0) {
-      position[ridx] = ~nid;
+    if (position_[ridx] < 0) {
+      position_[ridx] = ~nid;
    } else {
-      position[ridx] = nid;
+      position_[ridx] = nid;
    }
  }
  /*!
@ -211,27 +210,27 @@ class BaseMaker: public TreeUpdater {
  inline void SetDefaultPostion(DMatrix *p_fmat,
                                const RegTree &tree) {
    // set rest of instances to default position
-    const RowSet &rowset = p_fmat->buffered_rowset();
+    const RowSet &rowset = p_fmat->BufferedRowset();
    // set default direct nodes to default
    // for leaf nodes that are not fresh, mark then to ~nid,
    // so that they are ignored in future statistics collection
-    const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
+    const auto ndata = static_cast<bst_omp_uint>(rowset.Size());

    #pragma omp parallel for schedule(static)
    for (bst_omp_uint i = 0; i < ndata; ++i) {
      const bst_uint ridx = rowset[i];
      const int nid = this->DecodePosition(ridx);
-      if (tree[nid].is_leaf()) {
+      if (tree[nid].IsLeaf()) {
        // mark finish when it is not a fresh leaf
-        if (tree[nid].cright() == -1) {
-          position[ridx] = ~nid;
+        if (tree[nid].RightChild() == -1) {
+          position_[ridx] = ~nid;
        }
      } else {
        // push to default branch
-        if (tree[nid].default_left()) {
-          this->SetEncodePosition(ridx, tree[nid].cleft());
+        if (tree[nid].DefaultLeft()) {
+          this->SetEncodePosition(ridx, tree[nid].LeftChild());
        } else {
-          this->SetEncodePosition(ridx, tree[nid].cright());
+          this->SetEncodePosition(ridx, tree[nid].RightChild());
        }
      }
    }
@ -254,21 +253,21 @@ class BaseMaker: public TreeUpdater {
      auto it = std::lower_bound(sorted_split_set.begin(), sorted_split_set.end(), fid);

      if (it != sorted_split_set.end() && *it == fid) {
-        const bst_omp_uint ndata = static_cast<bst_omp_uint>(col.length);
+        const auto ndata = static_cast<bst_omp_uint>(col.length);
        #pragma omp parallel for schedule(static)
        for (bst_omp_uint j = 0; j < ndata; ++j) {
          const bst_uint ridx = col[j].index;
          const bst_float fvalue = col[j].fvalue;
          const int nid = this->DecodePosition(ridx);
-          CHECK(tree[nid].is_leaf());
-          int pid = tree[nid].parent();
+          CHECK(tree[nid].IsLeaf());
+          int pid = tree[nid].Parent();

          // go back to parent, correct those who are not default
-          if (!tree[nid].is_root() && tree[pid].split_index() == fid) {
-            if (fvalue < tree[pid].split_cond()) {
-              this->SetEncodePosition(ridx, tree[pid].cleft());
+          if (!tree[nid].IsRoot() && tree[pid].SplitIndex() == fid) {
+            if (fvalue < tree[pid].SplitCond()) {
+              this->SetEncodePosition(ridx, tree[pid].LeftChild());
            } else {
-              this->SetEncodePosition(ridx, tree[pid].cright());
+              this->SetEncodePosition(ridx, tree[pid].RightChild());
            }
          }
        }
@ -287,10 +286,9 @@ class BaseMaker: public TreeUpdater {
    std::vector<unsigned>& fsplits = *out_split_set;
    fsplits.clear();
    // step 1, classify the non-default data into right places
-    for (size_t i = 0; i < nodes.size(); ++i) {
-      const int nid = nodes[i];
-      if (!tree[nid].is_leaf()) {
-        fsplits.push_back(tree[nid].split_index());
+    for (int nid : nodes) {
+      if (!tree[nid].IsLeaf()) {
+        fsplits.push_back(tree[nid].SplitIndex());
      }
    }
    std::sort(fsplits.begin(), fsplits.end());
@ -314,18 +312,18 @@ class BaseMaker: public TreeUpdater {
      for (size_t i = 0; i < batch.size; ++i) {
        ColBatch::Inst col = batch[i];
        const bst_uint fid = batch.col_index[i];
-        const bst_omp_uint ndata = static_cast<bst_omp_uint>(col.length);
+        const auto ndata = static_cast<bst_omp_uint>(col.length);
        #pragma omp parallel for schedule(static)
        for (bst_omp_uint j = 0; j < ndata; ++j) {
          const bst_uint ridx = col[j].index;
          const bst_float fvalue = col[j].fvalue;
          const int nid = this->DecodePosition(ridx);
          // go back to parent, correct those who are not default
-          if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) {
-            if (fvalue < tree[nid].split_cond()) {
-              this->SetEncodePosition(ridx, tree[nid].cleft());
+          if (!tree[nid].IsLeaf() && tree[nid].SplitIndex() == fid) {
+            if (fvalue < tree[nid].SplitCond()) {
+              this->SetEncodePosition(ridx, tree[nid].LeftChild());
            } else {
-              this->SetEncodePosition(ridx, tree[nid].cright());
+              this->SetEncodePosition(ridx, tree[nid].RightChild());
            }
          }
        }
@ -334,39 +332,37 @@ class BaseMaker: public TreeUpdater {
  }
  /*! \brief helper function to get statistics from a tree */
  template<typename TStats>
-  inline void GetNodeStats(const std::vector<bst_gpair> &gpair,
+  inline void GetNodeStats(const std::vector<GradientPair> &gpair,
                           const DMatrix &fmat,
                           const RegTree &tree,
                           std::vector< std::vector<TStats> > *p_thread_temp,
                           std::vector<TStats> *p_node_stats) {
    std::vector< std::vector<TStats> > &thread_temp = *p_thread_temp;
-    const MetaInfo &info = fmat.info();
+    const MetaInfo &info = fmat.Info();
    thread_temp.resize(omp_get_max_threads());
    p_node_stats->resize(tree.param.num_nodes);
    #pragma omp parallel
    {
      const int tid = omp_get_thread_num();
-      thread_temp[tid].resize(tree.param.num_nodes, TStats(param));
-      for (size_t i = 0; i < qexpand.size(); ++i) {
-        const unsigned nid = qexpand[i];
+      thread_temp[tid].resize(tree.param.num_nodes, TStats(param_));
+      for (unsigned int nid : qexpand_) {
        thread_temp[tid][nid].Clear();
      }
    }
-    const RowSet &rowset = fmat.buffered_rowset();
+    const RowSet &rowset = fmat.BufferedRowset();
    // setup position
-    const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
+    const auto ndata = static_cast<bst_omp_uint>(rowset.Size());
    #pragma omp parallel for schedule(static)
    for (bst_omp_uint i = 0; i < ndata; ++i) {
      const bst_uint ridx = rowset[i];
-      const int nid = position[ridx];
+      const int nid = position_[ridx];
      const int tid = omp_get_thread_num();
      if (nid >= 0) {
        thread_temp[tid][nid].Add(gpair, info, ridx);
      }
    }
    // sum the per thread statistics together
-    for (size_t j = 0; j < qexpand.size(); ++j) {
-      const int nid = qexpand[j];
+    for (int nid : qexpand_) {
      TStats &s = (*p_node_stats)[nid];
      s.Clear();
      for (size_t tid = 0; tid < thread_temp.size(); ++tid) {
@ -461,28 +457,28 @@ class BaseMaker: public TreeUpdater {
    }
  };
  /*! \brief training parameter of tree grower */
-  TrainParam param;
+  TrainParam param_;
  /*! \brief queue of nodes to be expanded */
-  std::vector<int> qexpand;
+  std::vector<int> qexpand_;
  /*!
   * \brief map active node to is working index offset in qexpand,
   *   can be -1, which means the node is node actively expanding
   */
-  std::vector<int> node2workindex;
+  std::vector<int> node2workindex_;
  /*!
   * \brief position of each instance in the tree
   *   can be negative, which means this position is no longer expanding
   *   see also Decode/EncodePosition
   */
-  std::vector<int> position;
+  std::vector<int> position_;

 private:
  inline void UpdateNode2WorkIndex(const RegTree &tree) {
    // update the node2workindex
-    std::fill(node2workindex.begin(), node2workindex.end(), -1);
-    node2workindex.resize(tree.param.num_nodes);
-    for (size_t i = 0; i < qexpand.size(); ++i) {
-      node2workindex[qexpand[i]] = static_cast<int>(i);
+    std::fill(node2workindex_.begin(), node2workindex_.end(), -1);
+    node2workindex_.resize(tree.param.num_nodes);
+    for (size_t i = 0; i < qexpand_.size(); ++i) {
+      node2workindex_[qexpand_[i]] = static_cast<int>(i);
    }
  }
 };
--- a/src/tree/updater_colmaker.cc
+++ b/src/tree/updater_colmaker.cc
--- a/src/tree/updater_fast_hist.cc
+++ b/src/tree/updater_fast_hist.cc
@ -7,6 +7,7 @@
 #include <dmlc/timer.h>
 #include <xgboost/tree_updater.h>
 #include <cmath>
+#include <memory>
 #include <vector>
 #include <algorithm>
 #include <queue>
@ -50,47 +51,47 @@ class FastHistMaker: public TreeUpdater {
      pruner_.reset(TreeUpdater::Create("prune"));
    }
    pruner_->Init(args);
-    param.InitAllowUnknown(args);
-    fhparam.InitAllowUnknown(args);
+    param_.InitAllowUnknown(args);
+    fhparam_.InitAllowUnknown(args);
    is_gmat_initialized_ = false;
  }

-  void Update(HostDeviceVector<bst_gpair>* gpair,
+  void Update(HostDeviceVector<GradientPair>* gpair,
              DMatrix* dmat,
              const std::vector<RegTree*>& trees) override {
-    TStats::CheckInfo(dmat->info());
+    TStats::CheckInfo(dmat->Info());
    if (is_gmat_initialized_ == false) {
      double tstart = dmlc::GetTime();
-      hmat_.Init(dmat, static_cast<uint32_t>(param.max_bin));
+      hmat_.Init(dmat, static_cast<uint32_t>(param_.max_bin));
      gmat_.cut = &hmat_;
      gmat_.Init(dmat);
-      column_matrix_.Init(gmat_, fhparam);
-      if (fhparam.enable_feature_grouping > 0) {
-        gmatb_.Init(gmat_, column_matrix_, fhparam);
+      column_matrix_.Init(gmat_, fhparam_);
+      if (fhparam_.enable_feature_grouping > 0) {
+        gmatb_.Init(gmat_, column_matrix_, fhparam_);
      }
      is_gmat_initialized_ = true;
-      if (param.debug_verbose > 0) {
+      if (param_.debug_verbose > 0) {
        LOG(INFO) << "Generating gmat: " << dmlc::GetTime() - tstart << " sec";
      }
    }
    // rescale learning rate according to size of trees
-    float lr = param.learning_rate;
-    param.learning_rate = lr / trees.size();
-    TConstraint::Init(&param, dmat->info().num_col);
+    float lr = param_.learning_rate;
+    param_.learning_rate = lr / trees.size();
+    TConstraint::Init(&param_, dmat->Info().num_col_);
    // build tree
    if (!builder_) {
-      builder_.reset(new Builder(param, fhparam, std::move(pruner_)));
+      builder_.reset(new Builder(param_, fhparam_, std::move(pruner_)));
    }
-    for (size_t i = 0; i < trees.size(); ++i) {
+    for (auto tree : trees) {
      builder_->Update
-        (gmat_, gmatb_, column_matrix_, gpair, dmat, trees[i]);
+        (gmat_, gmatb_, column_matrix_, gpair, dmat, tree);
    }
-    param.learning_rate = lr;
+    param_.learning_rate = lr;
  }

  bool UpdatePredictionCache(const DMatrix* data,
                             HostDeviceVector<bst_float>* out_preds) override {
-    if (!builder_ || param.subsample < 1.0f) {
+    if (!builder_ || param_.subsample < 1.0f) {
      return false;
    } else {
      return builder_->UpdatePredictionCache(data, out_preds);
@ -99,8 +100,8 @@ class FastHistMaker: public TreeUpdater {

 protected:
  // training parameter
-  TrainParam param;
-  FastHistParam fhparam;
+  TrainParam param_;
+  FastHistParam fhparam_;
  // data sketch
  HistCutMatrix hmat_;
  // quantized data matrix
@ -134,13 +135,13 @@ class FastHistMaker: public TreeUpdater {
    explicit Builder(const TrainParam& param,
                     const FastHistParam& fhparam,
                     std::unique_ptr<TreeUpdater> pruner)
-      : param(param), fhparam(fhparam), pruner_(std::move(pruner)),
+      : param_(param), fhparam_(fhparam), pruner_(std::move(pruner)),
        p_last_tree_(nullptr), p_last_fmat_(nullptr) {}
    // update one tree, growing
    virtual void Update(const GHistIndexMatrix& gmat,
                        const GHistIndexBlockMatrix& gmatb,
                        const ColumnMatrix& column_matrix,
-                        HostDeviceVector<bst_gpair>* gpair,
+                        HostDeviceVector<GradientPair>* gpair,
                        DMatrix* p_fmat,
                        RegTree* p_tree) {
      double gstart = dmlc::GetTime();
@ -155,11 +156,11 @@ class FastHistMaker: public TreeUpdater {
      double time_evaluate_split = 0;
      double time_apply_split = 0;

-      std::vector<bst_gpair>& gpair_h = gpair->data_h();
+      std::vector<GradientPair>& gpair_h = gpair->HostVector();

      tstart = dmlc::GetTime();
      this->InitData(gmat, gpair_h, *p_fmat, *p_tree);
-      std::vector<bst_uint> feat_set = feat_index;
+      std::vector<bst_uint> feat_set = feat_index_;
      time_init_data = dmlc::GetTime() - tstart;

      // FIXME(hcho3): this code is broken when param.num_roots > 1. Please fix it
@ -179,7 +180,7 @@ class FastHistMaker: public TreeUpdater {
        this->EvaluateSplit(nid, gmat, hist_, *p_fmat, *p_tree, feat_set);
        time_evaluate_split += dmlc::GetTime() - tstart;
        qexpand_->push(ExpandEntry(nid, p_tree->GetDepth(nid),
-                                   snode[nid].best.loss_chg,
+                                   snode_[nid].best.loss_chg,
                                   timestamp++));
        ++num_leaves;
      }
@ -188,21 +189,21 @@ class FastHistMaker: public TreeUpdater {
        const ExpandEntry candidate = qexpand_->top();
        const int nid = candidate.nid;
        qexpand_->pop();
-        if (candidate.loss_chg <= rt_eps
-            || (param.max_depth > 0 && candidate.depth == param.max_depth)
-            || (param.max_leaves > 0 && num_leaves == param.max_leaves) ) {
-          (*p_tree)[nid].set_leaf(snode[nid].weight * param.learning_rate);
+        if (candidate.loss_chg <= kRtEps
+            || (param_.max_depth > 0 && candidate.depth == param_.max_depth)
+            || (param_.max_leaves > 0 && num_leaves == param_.max_leaves) ) {
+          (*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate);
        } else {
          tstart = dmlc::GetTime();
          this->ApplySplit(nid, gmat, column_matrix, hist_, *p_fmat, p_tree);
          time_apply_split += dmlc::GetTime() - tstart;

          tstart = dmlc::GetTime();
-          const int cleft = (*p_tree)[nid].cleft();
-          const int cright = (*p_tree)[nid].cright();
+          const int cleft = (*p_tree)[nid].LeftChild();
+          const int cright = (*p_tree)[nid].RightChild();
          hist_.AddHistRow(cleft);
          hist_.AddHistRow(cright);
-          if (row_set_collection_[cleft].size() < row_set_collection_[cright].size()) {
+          if (row_set_collection_[cleft].Size() < row_set_collection_[cright].Size()) {
            BuildHist(gpair_h, row_set_collection_[cleft], gmat, gmatb, feat_set, hist_[cleft]);
            SubtractionTrick(hist_[cright], hist_[cleft], hist_[nid]);
          } else {
@ -222,10 +223,10 @@ class FastHistMaker: public TreeUpdater {
          time_evaluate_split += dmlc::GetTime() - tstart;

          qexpand_->push(ExpandEntry(cleft, p_tree->GetDepth(cleft),
-                                     snode[cleft].best.loss_chg,
+                                     snode_[cleft].best.loss_chg,
                                     timestamp++));
          qexpand_->push(ExpandEntry(cright, p_tree->GetDepth(cright),
-                                     snode[cright].best.loss_chg,
+                                     snode_[cright].best.loss_chg,
                                     timestamp++));

          ++num_leaves;  // give two and take one, as parent is no longer a leaf
@ -238,19 +239,19 @@ class FastHistMaker: public TreeUpdater {
      while (!qexpand_->empty()) {
        const int nid = qexpand_->top().nid;
        qexpand_->pop();
-        (*p_tree)[nid].set_leaf(snode[nid].weight * param.learning_rate);
+        (*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate);
      }
      // remember auxiliary statistics in the tree node
      for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) {
-        p_tree->stat(nid).loss_chg = snode[nid].best.loss_chg;
-        p_tree->stat(nid).base_weight = snode[nid].weight;
-        p_tree->stat(nid).sum_hess = static_cast<float>(snode[nid].stats.sum_hess);
-        snode[nid].stats.SetLeafVec(param, p_tree->leafvec(nid));
+        p_tree->Stat(nid).loss_chg = snode_[nid].best.loss_chg;
+        p_tree->Stat(nid).base_weight = snode_[nid].weight;
+        p_tree->Stat(nid).sum_hess = static_cast<float>(snode_[nid].stats.sum_hess);
+        snode_[nid].stats.SetLeafVec(param_, p_tree->Leafvec(nid));
      }

      pruner_->Update(gpair, p_fmat, std::vector<RegTree*>{p_tree});

-      if (param.debug_verbose > 0) {
+      if (param_.debug_verbose > 0) {
        double total_time = dmlc::GetTime() - gstart;
        LOG(INFO) << "\nInitData:          "
                  << std::fixed << std::setw(6) << std::setprecision(4) << time_init_data
@ -278,13 +279,13 @@ class FastHistMaker: public TreeUpdater {
      }
    }

-    inline void BuildHist(const std::vector<bst_gpair>& gpair,
+    inline void BuildHist(const std::vector<GradientPair>& gpair,
                          const RowSetCollection::Elem row_indices,
                          const GHistIndexMatrix& gmat,
                          const GHistIndexBlockMatrix& gmatb,
                          const std::vector<bst_uint>& feat_set,
                          GHistRow hist) {
-      if (fhparam.enable_feature_grouping > 0) {
+      if (fhparam_.enable_feature_grouping > 0) {
        hist_builder_.BuildBlockHist(gpair, row_indices, gmatb, feat_set, hist);
      } else {
        hist_builder_.BuildHist(gpair, row_indices, gmat, feat_set, hist);
@ -297,7 +298,7 @@ class FastHistMaker: public TreeUpdater {

    inline bool UpdatePredictionCache(const DMatrix* data,
                                      HostDeviceVector<bst_float>* p_out_preds) {
-      std::vector<bst_float>& out_preds = p_out_preds->data_h();
+      std::vector<bst_float>& out_preds = p_out_preds->HostVector();

      // p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in
      // conjunction with Update().
@ -318,13 +319,13 @@ class FastHistMaker: public TreeUpdater {
          bst_float leaf_value;
          // if a node is marked as deleted by the pruner, traverse upward to locate
          // a non-deleted leaf.
-          if ((*p_last_tree_)[nid].is_deleted()) {
-            while ((*p_last_tree_)[nid].is_deleted()) {
-              nid = (*p_last_tree_)[nid].parent();
+          if ((*p_last_tree_)[nid].IsDeleted()) {
+            while ((*p_last_tree_)[nid].IsDeleted()) {
+              nid = (*p_last_tree_)[nid].Parent();
            }
-            CHECK((*p_last_tree_)[nid].is_leaf());
+            CHECK((*p_last_tree_)[nid].IsLeaf());
          }
-          leaf_value = (*p_last_tree_)[nid].leaf_value();
+          leaf_value = (*p_last_tree_)[nid].LeafValue();

          for (const size_t* it = rowset.begin; it < rowset.end; ++it) {
            out_preds[*it] += leaf_value;
@ -338,19 +339,19 @@ class FastHistMaker: public TreeUpdater {
   protected:
    // initialize temp data structure
    inline void InitData(const GHistIndexMatrix& gmat,
-                         const std::vector<bst_gpair>& gpair,
+                         const std::vector<GradientPair>& gpair,
                         const DMatrix& fmat,
                         const RegTree& tree) {
      CHECK_EQ(tree.param.num_nodes, tree.param.num_roots)
          << "ColMakerHist: can only grow new tree";
-      CHECK((param.max_depth > 0 || param.max_leaves > 0))
+      CHECK((param_.max_depth > 0 || param_.max_leaves > 0))
          << "max_depth or max_leaves cannot be both 0 (unlimited); "
          << "at least one should be a positive quantity.";
-      if (param.grow_policy == TrainParam::kDepthWise) {
-        CHECK(param.max_depth > 0) << "max_depth cannot be 0 (unlimited) "
+      if (param_.grow_policy == TrainParam::kDepthWise) {
+        CHECK(param_.max_depth > 0) << "max_depth cannot be 0 (unlimited) "
          << "when grow_policy is depthwise.";
      }
-      const auto& info = fmat.info();
+      const auto& info = fmat.Info();

      {
        // initialize the row set
@ -364,23 +365,23 @@ class FastHistMaker: public TreeUpdater {
        // initialize histogram builder
        #pragma omp parallel
        {
-          this->nthread = omp_get_num_threads();
+          this->nthread_ = omp_get_num_threads();
        }
-        hist_builder_.Init(this->nthread, nbins);
+        hist_builder_.Init(this->nthread_, nbins);

-        CHECK_EQ(info.root_index.size(), 0U);
+        CHECK_EQ(info.root_index_.size(), 0U);
        std::vector<size_t>& row_indices = row_set_collection_.row_indices_;
        // mark subsample and build list of member rows
-        if (param.subsample < 1.0f) {
-          std::bernoulli_distribution coin_flip(param.subsample);
+        if (param_.subsample < 1.0f) {
+          std::bernoulli_distribution coin_flip(param_.subsample);
          auto& rnd = common::GlobalRandom();
-          for (size_t i = 0; i < info.num_row; ++i) {
+          for (size_t i = 0; i < info.num_row_; ++i) {
            if (gpair[i].GetHess() >= 0.0f && coin_flip(rnd)) {
              row_indices.push_back(i);
            }
          }
        } else {
-          for (size_t i = 0; i < info.num_row; ++i) {
+          for (size_t i = 0; i < info.num_row_; ++i) {
            if (gpair[i].GetHess() >= 0.0f) {
              row_indices.push_back(i);
            }
@ -391,9 +392,9 @@ class FastHistMaker: public TreeUpdater {

      {
        /* determine layout of data */
-        const size_t nrow = info.num_row;
-        const size_t ncol = info.num_col;
-        const size_t nnz = info.num_nonzero;
+        const size_t nrow = info.num_row_;
+        const size_t ncol = info.num_col_;
+        const size_t nnz = info.num_nonzero_;
        // number of discrete bins for feature 0
        const uint32_t nbins_f0 = gmat.cut->row_ptr[1] - gmat.cut->row_ptr[0];
        if (nrow * ncol == nnz) {
@ -413,23 +414,23 @@ class FastHistMaker: public TreeUpdater {
        // store a pointer to training data
        p_last_fmat_ = &fmat;
        // initialize feature index
-        bst_uint ncol = static_cast<bst_uint>(info.num_col);
-        feat_index.clear();
+        auto ncol = static_cast<bst_uint>(info.num_col_);
+        feat_index_.clear();
        if (data_layout_ == kDenseDataOneBased) {
          for (bst_uint i = 1; i < ncol; ++i) {
-            feat_index.push_back(i);
+            feat_index_.push_back(i);
          }
        } else {
          for (bst_uint i = 0; i < ncol; ++i) {
-            feat_index.push_back(i);
+            feat_index_.push_back(i);
          }
        }
        bst_uint n = std::max(static_cast<bst_uint>(1),
-                              static_cast<bst_uint>(param.colsample_bytree * feat_index.size()));
-        std::shuffle(feat_index.begin(), feat_index.end(), common::GlobalRandom());
-        CHECK_GT(param.colsample_bytree, 0U)
+                              static_cast<bst_uint>(param_.colsample_bytree * feat_index_.size()));
+        std::shuffle(feat_index_.begin(), feat_index_.end(), common::GlobalRandom());
+        CHECK_GT(param_.colsample_bytree, 0U)
            << "colsample_bytree cannot be zero.";
-        feat_index.resize(n);
+        feat_index_.resize(n);
      }
      if (data_layout_ == kDenseDataZeroBased || data_layout_ == kDenseDataOneBased) {
        /* specialized code for dense data:
@ -437,7 +438,7 @@ class FastHistMaker: public TreeUpdater {
           For dense data (with no missing value),
              the sum of gradient histogram is equal to snode[nid] */
        const std::vector<uint32_t>& row_ptr = gmat.cut->row_ptr;
-        const bst_uint nfeature = static_cast<bst_uint>(row_ptr.size() - 1);
+        const auto nfeature = static_cast<bst_uint>(row_ptr.size() - 1);
        uint32_t min_nbins_per_feature = 0;
        for (bst_uint i = 0; i < nfeature; ++i) {
          const uint32_t nbins = row_ptr[i + 1] - row_ptr[i];
@ -451,14 +452,14 @@ class FastHistMaker: public TreeUpdater {
        CHECK_GT(min_nbins_per_feature, 0U);
      }
      {
-        snode.reserve(256);
-        snode.clear();
+        snode_.reserve(256);
+        snode_.clear();
      }
      {
-        if (param.grow_policy == TrainParam::kLossGuide) {
-          qexpand_.reset(new ExpandQueue(loss_guide));
+        if (param_.grow_policy == TrainParam::kLossGuide) {
+          qexpand_.reset(new ExpandQueue(LossGuide));
        } else {
-          qexpand_.reset(new ExpandQueue(depth_wise));
+          qexpand_.reset(new ExpandQueue(DepthWise));
        }
      }
    }
@ -470,25 +471,25 @@ class FastHistMaker: public TreeUpdater {
                              const RegTree& tree,
                              const std::vector<bst_uint>& feat_set) {
      // start enumeration
-      const MetaInfo& info = fmat.info();
-      const bst_uint nfeature = static_cast<bst_uint>(feat_set.size());
-      const bst_omp_uint nthread = static_cast<bst_omp_uint>(this->nthread);
+      const MetaInfo& info = fmat.Info();
+      const auto nfeature = static_cast<bst_uint>(feat_set.size());
+      const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
      best_split_tloc_.resize(nthread);
      #pragma omp parallel for schedule(static) num_threads(nthread)
      for (bst_omp_uint tid = 0; tid < nthread; ++tid) {
-        best_split_tloc_[tid] = snode[nid].best;
+        best_split_tloc_[tid] = snode_[nid].best;
      }
      #pragma omp parallel for schedule(dynamic) num_threads(nthread)
      for (bst_omp_uint i = 0; i < nfeature; ++i) {
        const bst_uint fid = feat_set[i];
        const unsigned tid = omp_get_thread_num();
-        this->EnumerateSplit(-1, gmat, hist[nid], snode[nid], constraints_[nid], info,
+        this->EnumerateSplit(-1, gmat, hist[nid], snode_[nid], constraints_[nid], info,
          &best_split_tloc_[tid], fid);
-        this->EnumerateSplit(+1, gmat, hist[nid], snode[nid], constraints_[nid], info,
+        this->EnumerateSplit(+1, gmat, hist[nid], snode_[nid], constraints_[nid], info,
          &best_split_tloc_[tid], fid);
      }
      for (unsigned tid = 0; tid < nthread; ++tid) {
-        snode[nid].best.Update(best_split_tloc_[tid]);
+        snode_[nid].best.Update(best_split_tloc_[tid]);
      }
    }

@ -499,12 +500,13 @@ class FastHistMaker: public TreeUpdater {
                           const DMatrix& fmat,
                           RegTree* p_tree) {
      XGBOOST_TYPE_SWITCH(column_matrix.dtype, {
-        ApplySplit_<DType>(nid, gmat, column_matrix, hist, fmat, p_tree);
+        ApplySplitSpecialize<DType>(nid, gmat, column_matrix, hist, fmat,
+                                    p_tree);
      });
    }

    template <typename T>
-    inline void ApplySplit_(int nid,
+    inline void ApplySplitSpecialize(int nid,
                            const GHistIndexMatrix& gmat,
                            const ColumnMatrix& column_matrix,
                            const HistCollection& hist,
@ -513,26 +515,26 @@ class FastHistMaker: public TreeUpdater {
      // TODO(hcho3): support feature sampling by levels

      /* 1. Create child nodes */
-      NodeEntry& e = snode[nid];
+      NodeEntry& e = snode_[nid];

      p_tree->AddChilds(nid);
-      (*p_tree)[nid].set_split(e.best.split_index(), e.best.split_value, e.best.default_left());
+      (*p_tree)[nid].SetSplit(e.best.SplitIndex(), e.best.split_value, e.best.DefaultLeft());
      // mark right child as 0, to indicate fresh leaf
-      int cleft = (*p_tree)[nid].cleft();
-      int cright = (*p_tree)[nid].cright();
-      (*p_tree)[cleft].set_leaf(0.0f, 0);
-      (*p_tree)[cright].set_leaf(0.0f, 0);
+      int cleft = (*p_tree)[nid].LeftChild();
+      int cright = (*p_tree)[nid].RightChild();
+      (*p_tree)[cleft].SetLeaf(0.0f, 0);
+      (*p_tree)[cright].SetLeaf(0.0f, 0);

      /* 2. Categorize member rows */
-      const bst_omp_uint nthread = static_cast<bst_omp_uint>(this->nthread);
+      const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
      row_split_tloc_.resize(nthread);
      for (bst_omp_uint i = 0; i < nthread; ++i) {
        row_split_tloc_[i].left.clear();
        row_split_tloc_[i].right.clear();
      }
-      const bool default_left = (*p_tree)[nid].default_left();
-      const bst_uint fid = (*p_tree)[nid].split_index();
-      const bst_float split_pt = (*p_tree)[nid].split_cond();
+      const bool default_left = (*p_tree)[nid].DefaultLeft();
+      const bst_uint fid = (*p_tree)[nid].SplitIndex();
+      const bst_float split_pt = (*p_tree)[nid].SplitCond();
      const uint32_t lower_bound = gmat.cut->row_ptr[fid];
      const uint32_t upper_bound = gmat.cut->row_ptr[fid + 1];
      int32_t split_cond = -1;
@ -558,7 +560,7 @@ class FastHistMaker: public TreeUpdater {
      }

      row_set_collection_.AddSplit(
-        nid, row_split_tloc_, (*p_tree)[nid].cleft(), (*p_tree)[nid].cright());
+        nid, row_split_tloc_, (*p_tree)[nid].LeftChild(), (*p_tree)[nid].RightChild());
    }

    template<typename T>
@ -569,24 +571,24 @@ class FastHistMaker: public TreeUpdater {
                                    bst_int split_cond,
                                    bool default_left) {
      std::vector<RowSetCollection::Split>& row_split_tloc = *p_row_split_tloc;
-      const int K = 8;  // loop unrolling factor
+      constexpr int kUnroll = 8;  // loop unrolling factor
      const size_t nrows = rowset.end - rowset.begin;
-      const size_t rest = nrows % K;
+      const size_t rest = nrows % kUnroll;

-      #pragma omp parallel for num_threads(nthread) schedule(static)
-      for (bst_omp_uint i = 0; i < nrows - rest; i += K) {
+      #pragma omp parallel for num_threads(nthread_) schedule(static)
+      for (bst_omp_uint i = 0; i < nrows - rest; i += kUnroll) {
        const bst_uint tid = omp_get_thread_num();
        auto& left = row_split_tloc[tid].left;
        auto& right = row_split_tloc[tid].right;
-        size_t rid[K];
-        T rbin[K];
-        for (int k = 0; k < K; ++k) {
+        size_t rid[kUnroll];
+        T rbin[kUnroll];
+        for (int k = 0; k < kUnroll; ++k) {
          rid[k] = rowset.begin[i + k];
        }
-        for (int k = 0; k < K; ++k) {
+        for (int k = 0; k < kUnroll; ++k) {
          rbin[k] = column.index[rid[k]];
        }
-        for (int k = 0; k < K; ++k) {
+        for (int k = 0; k < kUnroll; ++k) {                      // NOLINT
          if (rbin[k] == std::numeric_limits<T>::max()) {  // missing value
            if (default_left) {
              left.push_back(rid[k]);
@ -605,8 +607,8 @@ class FastHistMaker: public TreeUpdater {
        }
      }
      for (size_t i = nrows - rest; i < nrows; ++i) {
-        auto& left = row_split_tloc[nthread-1].left;
-        auto& right = row_split_tloc[nthread-1].right;
+        auto& left = row_split_tloc[nthread_-1].left;
+        auto& right = row_split_tloc[nthread_-1].right;
        const size_t rid = rowset.begin[i];
        const T rbin = column.index[rid];
        if (rbin == std::numeric_limits<T>::max()) {  // missing value
@ -635,27 +637,27 @@ class FastHistMaker: public TreeUpdater {
                                        bst_int split_cond,
                                        bool default_left) {
      std::vector<RowSetCollection::Split>& row_split_tloc = *p_row_split_tloc;
-      const int K = 8;  // loop unrolling factor
+      constexpr int kUnroll = 8;  // loop unrolling factor
      const size_t nrows = rowset.end - rowset.begin;
-      const size_t rest = nrows % K;
-      #pragma omp parallel for num_threads(nthread) schedule(static)
-      for (bst_omp_uint i = 0; i < nrows - rest; i += K) {
-        size_t rid[K];
-        GHistIndexRow row[K];
-        const uint32_t* p[K];
+      const size_t rest = nrows % kUnroll;
+      #pragma omp parallel for num_threads(nthread_) schedule(static)
+      for (bst_omp_uint i = 0; i < nrows - rest; i += kUnroll) {
+        size_t rid[kUnroll];
+        GHistIndexRow row[kUnroll];
+        const uint32_t* p[kUnroll];
        bst_uint tid = omp_get_thread_num();
        auto& left = row_split_tloc[tid].left;
        auto& right = row_split_tloc[tid].right;
-        for (int k = 0; k < K; ++k) {
+        for (int k = 0; k < kUnroll; ++k) {
          rid[k] = rowset.begin[i + k];
        }
-        for (int k = 0; k < K; ++k) {
+        for (int k = 0; k < kUnroll; ++k) {
          row[k] = gmat[rid[k]];
        }
-        for (int k = 0; k < K; ++k) {
+        for (int k = 0; k < kUnroll; ++k) {
          p[k] = std::lower_bound(row[k].index, row[k].index + row[k].size, lower_bound);
        }
-        for (int k = 0; k < K; ++k) {
+        for (int k = 0; k < kUnroll; ++k) {
          if (p[k] != row[k].index + row[k].size && *p[k] < upper_bound) {
            CHECK_LT(*p[k],
              static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
@ -708,11 +710,11 @@ class FastHistMaker: public TreeUpdater {
      std::vector<RowSetCollection::Split>& row_split_tloc = *p_row_split_tloc;
      const size_t nrows = rowset.end - rowset.begin;

-      #pragma omp parallel num_threads(nthread)
+      #pragma omp parallel num_threads(nthread_)
      {
-        const size_t tid = static_cast<size_t>(omp_get_thread_num());
-        const size_t ibegin = tid * nrows / nthread;
-        const size_t iend = (tid + 1) * nrows / nthread;
+        const auto tid = static_cast<size_t>(omp_get_thread_num());
+        const size_t ibegin = tid * nrows / nthread_;
+        const size_t iend = (tid + 1) * nrows / nthread_;
        if (ibegin < iend) {  // ensure that [ibegin, iend) is nonempty range
          // search first nonzero row with index >= rowset[ibegin]
          const size_t* p = std::lower_bound(column.row_ind,
@ -769,17 +771,17 @@ class FastHistMaker: public TreeUpdater {

    inline void InitNewNode(int nid,
                            const GHistIndexMatrix& gmat,
-                            const std::vector<bst_gpair>& gpair,
+                            const std::vector<GradientPair>& gpair,
                            const DMatrix& fmat,
                            const RegTree& tree) {
      {
-        snode.resize(tree.param.num_nodes, NodeEntry(param));
+        snode_.resize(tree.param.num_nodes, NodeEntry(param_));
        constraints_.resize(tree.param.num_nodes);
      }

      // setup constraints before calculating the weight
      {
-        auto& stats = snode[nid].stats;
+        auto& stats = snode_[nid].stats;
        if (data_layout_ == kDenseDataZeroBased || data_layout_ == kDenseDataOneBased) {
          /* specialized code for dense data
             For dense data (with no missing value),
@ -799,22 +801,22 @@ class FastHistMaker: public TreeUpdater {
            stats.Add(gpair[*it]);
          }
        }
-        if (!tree[nid].is_root()) {
-          const int pid = tree[nid].parent();
-          constraints_[pid].SetChild(param, tree[pid].split_index(),
-                                     snode[tree[pid].cleft()].stats,
-                                     snode[tree[pid].cright()].stats,
-                                     &constraints_[tree[pid].cleft()],
-                                     &constraints_[tree[pid].cright()]);
+        if (!tree[nid].IsRoot()) {
+          const int pid = tree[nid].Parent();
+          constraints_[pid].SetChild(param_, tree[pid].SplitIndex(),
+                                     snode_[tree[pid].LeftChild()].stats,
+                                     snode_[tree[pid].RightChild()].stats,
+                                     &constraints_[tree[pid].LeftChild()],
+                                     &constraints_[tree[pid].RightChild()]);
        }
      }

      // calculating the weights
      {
-        snode[nid].root_gain = static_cast<float>(
-            constraints_[nid].CalcGain(param, snode[nid].stats));
-        snode[nid].weight = static_cast<float>(
-            constraints_[nid].CalcWeight(param, snode[nid].stats));
+        snode_[nid].root_gain = static_cast<float>(
+            constraints_[nid].CalcGain(param_, snode_[nid].stats));
+        snode_[nid].weight = static_cast<float>(
+            constraints_[nid].CalcWeight(param_, snode_[nid].stats));
      }
    }

@ -834,8 +836,8 @@ class FastHistMaker: public TreeUpdater {
      const std::vector<bst_float>& cut_val = gmat.cut->cut;

      // statistics on both sides of split
-      TStats c(param);
-      TStats e(param);
+      TStats c(param_);
+      TStats e(param_);
      // best split so far
      SplitEntry best;

@ -846,7 +848,7 @@ class FastHistMaker: public TreeUpdater {
        static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
      // imin: index (offset) of the minimum value for feature fid
      //       need this for backward enumeration
-      const int32_t imin = static_cast<int32_t>(cut_ptr[fid]);
+      const auto imin = static_cast<int32_t>(cut_ptr[fid]);
      // ibegin, iend: smallest/largest cut points for feature fid
      // use int to allow for value -1
      int32_t ibegin, iend;
@ -862,21 +864,21 @@ class FastHistMaker: public TreeUpdater {
        // start working
        // try to find a split
        e.Add(hist.begin[i].sum_grad, hist.begin[i].sum_hess);
-        if (e.sum_hess >= param.min_child_weight) {
+        if (e.sum_hess >= param_.min_child_weight) {
          c.SetSubstract(snode.stats, e);
-          if (c.sum_hess >= param.min_child_weight) {
+          if (c.sum_hess >= param_.min_child_weight) {
            bst_float loss_chg;
            bst_float split_pt;
            if (d_step > 0) {
              // forward enumeration: split at right bound of each bin
              loss_chg = static_cast<bst_float>(
-                  constraint.CalcSplitGain(param, param.monotone_constraints[fid], e, c) -
+                  constraint.CalcSplitGain(param_, param_.monotone_constraints[fid], e, c) -
                  snode.root_gain);
              split_pt = cut_val[i];
            } else {
              // backward enumeration: split at left bound of each bin
              loss_chg = static_cast<bst_float>(
-                  constraint.CalcSplitGain(param, param.monotone_constraints[fid], c, e) -
+                  constraint.CalcSplitGain(param_, param_.monotone_constraints[fid], c, e) -
                  snode.root_gain);
              if (i == imin) {
                // for leftmost bin, left bound is the smallest feature value
@ -901,14 +903,14 @@ class FastHistMaker: public TreeUpdater {
      ExpandEntry(int nid, int depth, bst_float loss_chg, unsigned tstmp)
        : nid(nid), depth(depth), loss_chg(loss_chg), timestamp(tstmp) {}
    };
-    inline static bool depth_wise(ExpandEntry lhs, ExpandEntry rhs) {
+    inline static bool DepthWise(ExpandEntry lhs, ExpandEntry rhs) {
      if (lhs.depth == rhs.depth) {
        return lhs.timestamp > rhs.timestamp;  // favor small timestamp
      } else {
        return lhs.depth > rhs.depth;  // favor small depth
      }
    }
-    inline static bool loss_guide(ExpandEntry lhs, ExpandEntry rhs) {
+    inline static bool LossGuide(ExpandEntry lhs, ExpandEntry rhs) {
      if (lhs.loss_chg == rhs.loss_chg) {
        return lhs.timestamp > rhs.timestamp;  // favor small timestamp
      } else {
@ -917,19 +919,19 @@ class FastHistMaker: public TreeUpdater {
    }

    //  --data fields--
-    const TrainParam& param;
-    const FastHistParam& fhparam;
+    const TrainParam& param_;
+    const FastHistParam& fhparam_;
    // number of omp thread used during training
-    int nthread;
+    int nthread_;
    // Per feature: shuffle index of each feature index
-    std::vector<bst_uint> feat_index;
+    std::vector<bst_uint> feat_index_;
    // the internal row sets
    RowSetCollection row_set_collection_;
    // the temp space for split
    std::vector<RowSetCollection::Split> row_split_tloc_;
    std::vector<SplitEntry> best_split_tloc_;
    /*! \brief TreeNode Data: statistics for each constructed node */
-    std::vector<NodeEntry> snode;
+    std::vector<NodeEntry> snode_;
    /*! \brief culmulative histogram of gradients. */
    HistCollection hist_;
    /*! \brief feature with least # of bins. to be used for dense specialization
@ -948,9 +950,9 @@ class FastHistMaker: public TreeUpdater {
    // constraint value
    std::vector<TConstraint> constraints_;

-    typedef std::priority_queue<ExpandEntry,
-                            std::vector<ExpandEntry>,
-                            std::function<bool(ExpandEntry, ExpandEntry)>> ExpandQueue;
+    using ExpandQueue =
+        std::priority_queue<ExpandEntry, std::vector<ExpandEntry>,
+                            std::function<bool(ExpandEntry, ExpandEntry)>>;
    std::unique_ptr<ExpandQueue> qexpand_;

    enum DataLayout { kDenseDataZeroBased, kDenseDataOneBased, kSparseData };
@ -964,14 +966,14 @@ class FastHistMaker: public TreeUpdater {
 // simple switch to defer implementation.
 class FastHistTreeUpdaterSwitch : public TreeUpdater {
 public:
-  FastHistTreeUpdaterSwitch() : monotone_(false) {}
+  FastHistTreeUpdaterSwitch()  = default;
  void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
    for (auto &kv : args) {
      if (kv.first == "monotone_constraints" && kv.second.length() != 0) {
        monotone_ = true;
      }
    }
-    if (inner_.get() == nullptr) {
+    if (inner_ == nullptr) {
      if (monotone_) {
        inner_.reset(new FastHistMaker<GradStats, ValueConstraint>());
      } else {
@ -982,7 +984,7 @@ class FastHistTreeUpdaterSwitch : public TreeUpdater {
    inner_->Init(args);
  }

-  void Update(HostDeviceVector<bst_gpair>* gpair,
+  void Update(HostDeviceVector<GradientPair>* gpair,
              DMatrix* data,
              const std::vector<RegTree*>& trees) override {
    CHECK(inner_ != nullptr);
@ -991,7 +993,7 @@ class FastHistTreeUpdaterSwitch : public TreeUpdater {

 private:
  //  monotone constraints
-  bool monotone_;
+  bool monotone_{false};
  // internal implementation
  std::unique_ptr<TreeUpdater> inner_;
 };
--- a/src/tree/updater_gpu.cu
+++ b/src/tree/updater_gpu.cu
@ -22,25 +22,25 @@ DMLC_REGISTRY_FILE_TAG(updater_gpu);
 * @return the uniq key
 */

-static HOST_DEV_INLINE node_id_t abs2uniqKey(int tid, const node_id_t* abs,
+static HOST_DEV_INLINE NodeIdT abs2uniqKey(int tid, const NodeIdT* abs,
                                             const int* colIds,
-                                             node_id_t nodeStart, int nKeys) {
+                                             NodeIdT nodeStart, int nKeys) {
  int a = abs[tid];
-  if (a == UNUSED_NODE) return a;
+  if (a == kUnusedNode) return a;
  return ((a - nodeStart) + (colIds[tid] * nKeys));
 }

 /**
 * @struct Pair
- * @brief Pair used for key basd scan operations on bst_gpair
+ * @brief Pair used for key basd scan operations on GradientPair
 */
 struct Pair {
  int key;
-  bst_gpair value;
+  GradientPair value;
 };

 /** define a key that's not used at all in the entire boosting process */
-static const int NONE_KEY = -100;
+static const int kNoneKey = -100;

 /**
 * @brief Allocate temporary buffers needed for scan operations
@ -49,9 +49,9 @@ static const int NONE_KEY = -100;
 * @param size number of elements that will be scanned
 */
 template <int BLKDIM_L1L3 = 256>
-int scanTempBufferSize(int size) {
-  int nBlks = dh::div_round_up(size, BLKDIM_L1L3);
-  return nBlks;
+int ScanTempBufferSize(int size) {
+  int num_blocks = dh::DivRoundUp(size, BLKDIM_L1L3);
+  return num_blocks;
 }

 struct AddByKey {
@ -76,21 +76,21 @@ struct AddByKey {
 * @param instIds instance index buffer
 * @return the expected gradient value
 */
-HOST_DEV_INLINE bst_gpair get(int id, const bst_gpair* vals,
+HOST_DEV_INLINE GradientPair get(int id, const GradientPair* vals,
                              const int* instIds) {
  id = instIds[id];
  return vals[id];
 }

 template <int BLKDIM_L1L3>
-__global__ void cubScanByKeyL1(bst_gpair* scans, const bst_gpair* vals,
-                               const int* instIds, bst_gpair* mScans,
-                               int* mKeys, const node_id_t* keys, int nUniqKeys,
-                               const int* colIds, node_id_t nodeStart,
+__global__ void cubScanByKeyL1(GradientPair* scans, const GradientPair* vals,
+                               const int* instIds, GradientPair* mScans,
+                               int* mKeys, const NodeIdT* keys, int nUniqKeys,
+                               const int* colIds, NodeIdT nodeStart,
                               const int size) {
-  Pair rootPair = {NONE_KEY, bst_gpair(0.f, 0.f)};
+  Pair rootPair = {kNoneKey, GradientPair(0.f, 0.f)};
  int myKey;
-  bst_gpair myValue;
+  GradientPair myValue;
  typedef cub::BlockScan<Pair, BLKDIM_L1L3> BlockScan;
  __shared__ typename BlockScan::TempStorage temp_storage;
  Pair threadData;
@ -99,8 +99,8 @@ __global__ void cubScanByKeyL1(bst_gpair* scans, const bst_gpair* vals,
    myKey = abs2uniqKey(tid, keys, colIds, nodeStart, nUniqKeys);
    myValue = get(tid, vals, instIds);
  } else {
-    myKey = NONE_KEY;
-    myValue = 0.f;
+    myKey = kNoneKey;
+    myValue = {};
  }
  threadData.key = myKey;
  threadData.value = myValue;
@ -119,14 +119,14 @@ __global__ void cubScanByKeyL1(bst_gpair* scans, const bst_gpair* vals,
  }
  if (threadIdx.x == BLKDIM_L1L3 - 1) {
    threadData.value =
-        (myKey == previousKey) ? threadData.value : bst_gpair(0.0f, 0.0f);
+        (myKey == previousKey) ? threadData.value : GradientPair(0.0f, 0.0f);
    mKeys[blockIdx.x] = myKey;
    mScans[blockIdx.x] = threadData.value + myValue;
  }
 }

 template <int BLKSIZE>
-__global__ void cubScanByKeyL2(bst_gpair* mScans, int* mKeys, int mLength) {
+__global__ void cubScanByKeyL2(GradientPair* mScans, int* mKeys, int mLength) {
  typedef cub::BlockScan<Pair, BLKSIZE, cub::BLOCK_SCAN_WARP_SCANS> BlockScan;
  Pair threadData;
  __shared__ typename BlockScan::TempStorage temp_storage;
@ -140,31 +140,31 @@ __global__ void cubScanByKeyL2(bst_gpair* mScans, int* mKeys, int mLength) {
 }

 template <int BLKDIM_L1L3>
-__global__ void cubScanByKeyL3(bst_gpair* sums, bst_gpair* scans,
-                               const bst_gpair* vals, const int* instIds,
-                               const bst_gpair* mScans, const int* mKeys,
-                               const node_id_t* keys, int nUniqKeys,
-                               const int* colIds, node_id_t nodeStart,
+__global__ void cubScanByKeyL3(GradientPair* sums, GradientPair* scans,
+                               const GradientPair* vals, const int* instIds,
+                               const GradientPair* mScans, const int* mKeys,
+                               const NodeIdT* keys, int nUniqKeys,
+                               const int* colIds, NodeIdT nodeStart,
                               const int size) {
  int relId = threadIdx.x;
  int tid = (blockIdx.x * BLKDIM_L1L3) + relId;
  // to avoid the following warning from nvcc:
  //   __shared__ memory variable with non-empty constructor or destructor
  //     (potential race between threads)
-  __shared__ char gradBuff[sizeof(bst_gpair)];
+  __shared__ char gradBuff[sizeof(GradientPair)];
  __shared__ int s_mKeys;
-  bst_gpair* s_mScans = reinterpret_cast<bst_gpair*>(gradBuff);
+  GradientPair* s_mScans = reinterpret_cast<GradientPair*>(gradBuff);
  if (tid >= size) return;
  // cache block-wide partial scan info
  if (relId == 0) {
-    s_mKeys = (blockIdx.x > 0) ? mKeys[blockIdx.x - 1] : NONE_KEY;
-    s_mScans[0] = (blockIdx.x > 0) ? mScans[blockIdx.x - 1] : bst_gpair();
+    s_mKeys = (blockIdx.x > 0) ? mKeys[blockIdx.x - 1] : kNoneKey;
+    s_mScans[0] = (blockIdx.x > 0) ? mScans[blockIdx.x - 1] : GradientPair();
  }
  int myKey = abs2uniqKey(tid, keys, colIds, nodeStart, nUniqKeys);
  int previousKey =
-      tid == 0 ? NONE_KEY
+      tid == 0 ? kNoneKey
               : abs2uniqKey(tid - 1, keys, colIds, nodeStart, nUniqKeys);
-  bst_gpair myValue = scans[tid];
+  GradientPair myValue = scans[tid];
  __syncthreads();
  if (blockIdx.x > 0 && s_mKeys == previousKey) {
    myValue += s_mScans[0];
@ -174,7 +174,7 @@ __global__ void cubScanByKeyL3(bst_gpair* sums, bst_gpair* scans,
  }
  if ((previousKey != myKey) && (previousKey >= 0)) {
    sums[previousKey] = myValue;
-    myValue = bst_gpair(0.0f, 0.0f);
+    myValue = GradientPair(0.0f, 0.0f);
  }
  scans[tid] = myValue;
 }
@ -200,12 +200,12 @@ __global__ void cubScanByKeyL3(bst_gpair* sums, bst_gpair* scans,
 * @param nodeStart index of the leftmost node in the current level
 */
 template <int BLKDIM_L1L3 = 256, int BLKDIM_L2 = 512>
-void reduceScanByKey(bst_gpair* sums, bst_gpair* scans, const bst_gpair* vals,
-                     const int* instIds, const node_id_t* keys, int size,
-                     int nUniqKeys, int nCols, bst_gpair* tmpScans,
-                     int* tmpKeys, const int* colIds, node_id_t nodeStart) {
-  int nBlks = dh::div_round_up(size, BLKDIM_L1L3);
-  cudaMemset(sums, 0, nUniqKeys * nCols * sizeof(bst_gpair));
+void reduceScanByKey(GradientPair* sums, GradientPair* scans, const GradientPair* vals,
+                     const int* instIds, const NodeIdT* keys, int size,
+                     int nUniqKeys, int nCols, GradientPair* tmpScans,
+                     int* tmpKeys, const int* colIds, NodeIdT nodeStart) {
+  int nBlks = dh::DivRoundUp(size, BLKDIM_L1L3);
+  cudaMemset(sums, 0, nUniqKeys * nCols * sizeof(GradientPair));
  cubScanByKeyL1<BLKDIM_L1L3>
      <<<nBlks, BLKDIM_L1L3>>>(scans, vals, instIds, tmpScans, tmpKeys, keys,
                               nUniqKeys, colIds, nodeStart, size);
@ -243,13 +243,13 @@ struct ExactSplitCandidate {
 */
 enum ArgMaxByKeyAlgo {
  /** simplest, use gmem-atomics for all updates */
-  ABK_GMEM = 0,
+  kAbkGmem = 0,
  /** use smem-atomics for updates (when number of keys are less) */
-  ABK_SMEM
+  kAbkSmem
 };

 /** max depth until which to use shared mem based atomics for argmax */
-static const int MAX_ABK_LEVELS = 3;
+static const int kMaxAbkLevels = 3;

 HOST_DEV_INLINE ExactSplitCandidate maxSplit(ExactSplitCandidate a,
                                             ExactSplitCandidate b) {
@ -281,27 +281,27 @@ DEV_INLINE void atomicArgMax(ExactSplitCandidate* address,
 }

 DEV_INLINE void argMaxWithAtomics(
-    int id, ExactSplitCandidate* nodeSplits, const bst_gpair* gradScans,
-    const bst_gpair* gradSums, const float* vals, const int* colIds,
-    const node_id_t* nodeAssigns, const DeviceNodeStats* nodes, int nUniqKeys,
-    node_id_t nodeStart, int len, const GPUTrainingParam& param) {
+    int id, ExactSplitCandidate* nodeSplits, const GradientPair* gradScans,
+    const GradientPair* gradSums, const float* vals, const int* colIds,
+    const NodeIdT* nodeAssigns, const DeviceNodeStats* nodes, int nUniqKeys,
+    NodeIdT nodeStart, int len, const GPUTrainingParam& param) {
  int nodeId = nodeAssigns[id];
  // @todo: this is really a bad check! but will be fixed when we move
  //  to key-based reduction
  if ((id == 0) ||
      !((nodeId == nodeAssigns[id - 1]) && (colIds[id] == colIds[id - 1]) &&
        (vals[id] == vals[id - 1]))) {
-    if (nodeId != UNUSED_NODE) {
+    if (nodeId != kUnusedNode) {
      int sumId = abs2uniqKey(id, nodeAssigns, colIds, nodeStart, nUniqKeys);
-      bst_gpair colSum = gradSums[sumId];
+      GradientPair colSum = gradSums[sumId];
      int uid = nodeId - nodeStart;
      DeviceNodeStats n = nodes[nodeId];
-      bst_gpair parentSum = n.sum_gradients;
+      GradientPair parentSum = n.sum_gradients;
      float parentGain = n.root_gain;
      bool tmp;
      ExactSplitCandidate s;
-      bst_gpair missing = parentSum - colSum;
-      s.score = loss_chg_missing(gradScans[id], missing, parentSum, parentGain,
+      GradientPair missing = parentSum - colSum;
+      s.score = LossChangeMissing(gradScans[id], missing, parentSum, parentGain,
                                 param, tmp);
      s.index = id;
      atomicArgMax(nodeSplits + uid, s);
@ -310,10 +310,10 @@ DEV_INLINE void argMaxWithAtomics(
 }

 __global__ void atomicArgMaxByKeyGmem(
-    ExactSplitCandidate* nodeSplits, const bst_gpair* gradScans,
-    const bst_gpair* gradSums, const float* vals, const int* colIds,
-    const node_id_t* nodeAssigns, const DeviceNodeStats* nodes, int nUniqKeys,
-    node_id_t nodeStart, int len, const TrainParam param) {
+    ExactSplitCandidate* nodeSplits, const GradientPair* gradScans,
+    const GradientPair* gradSums, const float* vals, const int* colIds,
+    const NodeIdT* nodeAssigns, const DeviceNodeStats* nodes, int nUniqKeys,
+    NodeIdT nodeStart, int len, const TrainParam param) {
  int id = threadIdx.x + (blockIdx.x * blockDim.x);
  const int stride = blockDim.x * gridDim.x;
  for (; id < len; id += stride) {
@ -324,10 +324,10 @@ __global__ void atomicArgMaxByKeyGmem(
 }

 __global__ void atomicArgMaxByKeySmem(
-    ExactSplitCandidate* nodeSplits, const bst_gpair* gradScans,
-    const bst_gpair* gradSums, const float* vals, const int* colIds,
-    const node_id_t* nodeAssigns, const DeviceNodeStats* nodes, int nUniqKeys,
-    node_id_t nodeStart, int len, const TrainParam param) {
+    ExactSplitCandidate* nodeSplits, const GradientPair* gradScans,
+    const GradientPair* gradSums, const float* vals, const int* colIds,
+    const NodeIdT* nodeAssigns, const DeviceNodeStats* nodes, int nUniqKeys,
+    NodeIdT nodeStart, int len, const GPUTrainingParam param) {
  extern __shared__ char sArr[];
  ExactSplitCandidate* sNodeSplits =
      reinterpret_cast<ExactSplitCandidate*>(sArr);
@ -368,27 +368,27 @@ __global__ void atomicArgMaxByKeySmem(
 * @param algo which algorithm to use for argmax_by_key
 */
 template <int BLKDIM = 256, int ITEMS_PER_THREAD = 4>
-void argMaxByKey(ExactSplitCandidate* nodeSplits, const bst_gpair* gradScans,
-                 const bst_gpair* gradSums, const float* vals,
-                 const int* colIds, const node_id_t* nodeAssigns,
+void argMaxByKey(ExactSplitCandidate* nodeSplits, const GradientPair* gradScans,
+                 const GradientPair* gradSums, const float* vals,
+                 const int* colIds, const NodeIdT* nodeAssigns,
                 const DeviceNodeStats* nodes, int nUniqKeys,
-                 node_id_t nodeStart, int len, const TrainParam param,
+                 NodeIdT nodeStart, int len, const TrainParam param,
                 ArgMaxByKeyAlgo algo) {
-  dh::fillConst<ExactSplitCandidate, BLKDIM, ITEMS_PER_THREAD>(
-      dh::get_device_idx(param.gpu_id), nodeSplits, nUniqKeys,
+  dh::FillConst<ExactSplitCandidate, BLKDIM, ITEMS_PER_THREAD>(
+      dh::GetDeviceIdx(param.gpu_id), nodeSplits, nUniqKeys,
      ExactSplitCandidate());
-  int nBlks = dh::div_round_up(len, ITEMS_PER_THREAD * BLKDIM);
+  int nBlks = dh::DivRoundUp(len, ITEMS_PER_THREAD * BLKDIM);
  switch (algo) {
-    case ABK_GMEM:
+    case kAbkGmem:
      atomicArgMaxByKeyGmem<<<nBlks, BLKDIM>>>(
          nodeSplits, gradScans, gradSums, vals, colIds, nodeAssigns, nodes,
          nUniqKeys, nodeStart, len, param);
      break;
-    case ABK_SMEM:
+    case kAbkSmem:
      atomicArgMaxByKeySmem<<<nBlks, BLKDIM,
                              sizeof(ExactSplitCandidate) * nUniqKeys>>>(
          nodeSplits, gradScans, gradSums, vals, colIds, nodeAssigns, nodes,
-          nUniqKeys, nodeStart, len, param);
+          nUniqKeys, nodeStart, len, GPUTrainingParam(param));
      break;
    default:
      throw std::runtime_error("argMaxByKey: Bad algo passed!");
@ -404,22 +404,22 @@ __global__ void assignColIds(int* colIds, const int* colOffsets) {
  }
 }

-__global__ void fillDefaultNodeIds(node_id_t* nodeIdsPerInst,
+__global__ void fillDefaultNodeIds(NodeIdT* nodeIdsPerInst,
                                   const DeviceNodeStats* nodes, int nRows) {
  int id = threadIdx.x + (blockIdx.x * blockDim.x);
  if (id >= nRows) {
    return;
  }
  // if this element belongs to none of the currently active node-id's
-  node_id_t nId = nodeIdsPerInst[id];
-  if (nId == UNUSED_NODE) {
+  NodeIdT nId = nodeIdsPerInst[id];
+  if (nId == kUnusedNode) {
    return;
  }
  const DeviceNodeStats n = nodes[nId];
-  node_id_t result;
+  NodeIdT result;
  if (n.IsLeaf() || n.IsUnused()) {
-    result = UNUSED_NODE;
-  } else if (n.dir == LeftDir) {
+    result = kUnusedNode;
+  } else if (n.dir == kLeftDir) {
    result = (2 * n.idx) + 1;
  } else {
    result = (2 * n.idx) + 2;
@ -427,8 +427,8 @@ __global__ void fillDefaultNodeIds(node_id_t* nodeIdsPerInst,
  nodeIdsPerInst[id] = result;
 }

-__global__ void assignNodeIds(node_id_t* nodeIdsPerInst, int* nodeLocations,
-                              const node_id_t* nodeIds, const int* instId,
+__global__ void assignNodeIds(NodeIdT* nodeIdsPerInst, int* nodeLocations,
+                              const NodeIdT* nodeIds, const int* instId,
                              const DeviceNodeStats* nodes,
                              const int* colOffsets, const float* vals,
                              int nVals, int nCols) {
@ -441,7 +441,7 @@ __global__ void assignNodeIds(node_id_t* nodeIdsPerInst, int* nodeLocations,
    // the nodeIdsPerInst with all default assignments
    int nId = nodeIds[id];
    // if this element belongs to none of the currently active node-id's
-    if (nId != UNUSED_NODE) {
+    if (nId != kUnusedNode) {
      const DeviceNodeStats n = nodes[nId];
      int colId = n.fidx;
      // printf("nid=%d colId=%d id=%d\n", nId, colId, id);
@ -449,7 +449,7 @@ __global__ void assignNodeIds(node_id_t* nodeIdsPerInst, int* nodeLocations,
      int end = colOffsets[colId + 1];
      // @todo: too much wasteful threads!!
      if ((id >= start) && (id < end) && !(n.IsLeaf() || n.IsUnused())) {
-        node_id_t result = (2 * n.idx) + 1 + (vals[id] >= n.fvalue);
+        NodeIdT result = (2 * n.idx) + 1 + (vals[id] >= n.fvalue);
        nodeIdsPerInst[instId[id]] = result;
      }
    }
@ -475,31 +475,31 @@ class GPUMaker : public TreeUpdater {
  /** whether we have initialized memory already (so as not to repeat!) */
  bool allocated;
  /** feature values stored in column-major compressed format */
-  dh::dvec2<float> vals;
-  dh::dvec<float> vals_cached;
+  dh::DVec2<float> vals;
+  dh::DVec<float> vals_cached;
  /** corresponding instance id's of these featutre values */
-  dh::dvec2<int> instIds;
-  dh::dvec<int> instIds_cached;
+  dh::DVec2<int> instIds;
+  dh::DVec<int> instIds_cached;
  /** column offsets for these feature values */
-  dh::dvec<int> colOffsets;
-  dh::dvec<bst_gpair> gradsInst;
-  dh::dvec2<node_id_t> nodeAssigns;
-  dh::dvec2<int> nodeLocations;
-  dh::dvec<DeviceNodeStats> nodes;
-  dh::dvec<node_id_t> nodeAssignsPerInst;
-  dh::dvec<bst_gpair> gradSums;
-  dh::dvec<bst_gpair> gradScans;
-  dh::dvec<ExactSplitCandidate> nodeSplits;
+  dh::DVec<int> colOffsets;
+  dh::DVec<GradientPair> gradsInst;
+  dh::DVec2<NodeIdT> nodeAssigns;
+  dh::DVec2<int> nodeLocations;
+  dh::DVec<DeviceNodeStats> nodes;
+  dh::DVec<NodeIdT> nodeAssignsPerInst;
+  dh::DVec<GradientPair> gradSums;
+  dh::DVec<GradientPair> gradScans;
+  dh::DVec<ExactSplitCandidate> nodeSplits;
  int nVals;
  int nRows;
  int nCols;
  int maxNodes;
  int maxLeaves;
  dh::CubMemory tmp_mem;
-  dh::dvec<bst_gpair> tmpScanGradBuff;
-  dh::dvec<int> tmpScanKeyBuff;
-  dh::dvec<int> colIds;
-  dh::bulk_allocator<dh::memory_type::DEVICE> ba;
+  dh::DVec<GradientPair> tmpScanGradBuff;
+  dh::DVec<int> tmpScanKeyBuff;
+  dh::DVec<int> colIds;
+  dh::BulkAllocator<dh::MemoryType::kDevice> ba;

 public:
  GPUMaker() : allocated(false) {}
@ -512,9 +512,9 @@ class GPUMaker : public TreeUpdater {
    maxLeaves = 1 << param.max_depth;
  }

-  void Update(HostDeviceVector<bst_gpair>* gpair, DMatrix* dmat,
+  void Update(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
              const std::vector<RegTree*>& trees) override {
-    GradStats::CheckInfo(dmat->info());
+    GradStats::CheckInfo(dmat->Info());
    // rescale learning rate according to size of trees
    float lr = param.learning_rate;
    param.learning_rate = lr / trees.size();
@ -530,7 +530,7 @@ class GPUMaker : public TreeUpdater {
    param.learning_rate = lr;
  }
  /// @note: Update should be only after Init!!
-  void UpdateTree(HostDeviceVector<bst_gpair>* gpair, DMatrix* dmat,
+  void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
                  RegTree* hTree) {
    if (!allocated) {
      setupOneTimeData(dmat);
@ -538,33 +538,33 @@ class GPUMaker : public TreeUpdater {
    for (int i = 0; i < param.max_depth; ++i) {
      if (i == 0) {
        // make sure to start on a fresh tree with sorted values!
-        vals.current_dvec() = vals_cached;
-        instIds.current_dvec() = instIds_cached;
+        vals.CurrentDVec() = vals_cached;
+        instIds.CurrentDVec() = instIds_cached;
        transferGrads(gpair);
      }
      int nNodes = 1 << i;
-      node_id_t nodeStart = nNodes - 1;
+      NodeIdT nodeStart = nNodes - 1;
      initNodeData(i, nodeStart, nNodes);
      findSplit(i, nodeStart, nNodes);
    }
    // mark all the used nodes with unused children as leaf nodes
    markLeaves();
-    dense2sparse_tree(hTree, nodes, param);
+    Dense2SparseTree(hTree, nodes, param);
  }

-  void split2node(int nNodes, node_id_t nodeStart) {
-    auto d_nodes = nodes.data();
-    auto d_gradScans = gradScans.data();
-    auto d_gradSums = gradSums.data();
-    auto d_nodeAssigns = nodeAssigns.current();
-    auto d_colIds = colIds.data();
-    auto d_vals = vals.current();
-    auto d_nodeSplits = nodeSplits.data();
+  void split2node(int nNodes, NodeIdT nodeStart) {
+    auto d_nodes = nodes.Data();
+    auto d_gradScans = gradScans.Data();
+    auto d_gradSums = gradSums.Data();
+    auto d_nodeAssigns = nodeAssigns.Current();
+    auto d_colIds = colIds.Data();
+    auto d_vals = vals.Current();
+    auto d_nodeSplits = nodeSplits.Data();
    int nUniqKeys = nNodes;
    float min_split_loss = param.min_split_loss;
    auto gpu_param = GPUTrainingParam(param);

-    dh::launch_n(param.gpu_id, nNodes, [=] __device__(int uid) {
+    dh::LaunchN(param.gpu_id, nNodes, [=] __device__(int uid) {
      int absNodeId = uid + nodeStart;
      ExactSplitCandidate s = d_nodeSplits[uid];
      if (s.isSplittable(min_split_loss)) {
@ -573,26 +573,26 @@ class GPUMaker : public TreeUpdater {
            abs2uniqKey(idx, d_nodeAssigns, d_colIds, nodeStart, nUniqKeys);
        bool missingLeft = true;
        const DeviceNodeStats& n = d_nodes[absNodeId];
-        bst_gpair gradScan = d_gradScans[idx];
-        bst_gpair gradSum = d_gradSums[nodeInstId];
+        GradientPair gradScan = d_gradScans[idx];
+        GradientPair gradSum = d_gradSums[nodeInstId];
        float thresh = d_vals[idx];
        int colId = d_colIds[idx];
        // get the default direction for the current node
-        bst_gpair missing = n.sum_gradients - gradSum;
-        loss_chg_missing(gradScan, missing, n.sum_gradients, n.root_gain,
+        GradientPair missing = n.sum_gradients - gradSum;
+        LossChangeMissing(gradScan, missing, n.sum_gradients, n.root_gain,
                         gpu_param, missingLeft);
        // get the score/weight/id/gradSum for left and right child nodes
-        bst_gpair lGradSum = missingLeft ? gradScan + missing : gradScan;
-        bst_gpair rGradSum = n.sum_gradients - lGradSum;
+        GradientPair lGradSum = missingLeft ? gradScan + missing : gradScan;
+        GradientPair rGradSum = n.sum_gradients - lGradSum;

        // Create children
-        d_nodes[left_child_nidx(absNodeId)] =
-            DeviceNodeStats(lGradSum, left_child_nidx(absNodeId), gpu_param);
-        d_nodes[right_child_nidx(absNodeId)] =
-            DeviceNodeStats(rGradSum, right_child_nidx(absNodeId), gpu_param);
+        d_nodes[LeftChildNodeIdx(absNodeId)] =
+            DeviceNodeStats(lGradSum, LeftChildNodeIdx(absNodeId), gpu_param);
+        d_nodes[RightChildNodeIdx(absNodeId)] =
+            DeviceNodeStats(rGradSum, RightChildNodeIdx(absNodeId), gpu_param);
        // Set split for parent
        d_nodes[absNodeId].SetSplit(thresh, colId,
-                                    missingLeft ? LeftDir : RightDir, lGradSum,
+                                    missingLeft ? kLeftDir : kRightDir, lGradSum,
                                    rGradSum);
      } else {
        // cannot be split further, so this node is a leaf!
@ -601,21 +601,21 @@ class GPUMaker : public TreeUpdater {
    });
  }

-  void findSplit(int level, node_id_t nodeStart, int nNodes) {
-    reduceScanByKey(gradSums.data(), gradScans.data(), gradsInst.data(),
-                    instIds.current(), nodeAssigns.current(), nVals, nNodes,
-                    nCols, tmpScanGradBuff.data(), tmpScanKeyBuff.data(),
-                    colIds.data(), nodeStart);
-    argMaxByKey(nodeSplits.data(), gradScans.data(), gradSums.data(),
-                vals.current(), colIds.data(), nodeAssigns.current(),
-                nodes.data(), nNodes, nodeStart, nVals, param,
-                level <= MAX_ABK_LEVELS ? ABK_SMEM : ABK_GMEM);
+  void findSplit(int level, NodeIdT nodeStart, int nNodes) {
+    reduceScanByKey(gradSums.Data(), gradScans.Data(), gradsInst.Data(),
+                    instIds.Current(), nodeAssigns.Current(), nVals, nNodes,
+                    nCols, tmpScanGradBuff.Data(), tmpScanKeyBuff.Data(),
+                    colIds.Data(), nodeStart);
+    argMaxByKey(nodeSplits.Data(), gradScans.Data(), gradSums.Data(),
+                vals.Current(), colIds.Data(), nodeAssigns.Current(),
+                nodes.Data(), nNodes, nodeStart, nVals, param,
+                level <= kMaxAbkLevels ? kAbkSmem : kAbkGmem);
    split2node(nNodes, nodeStart);
  }

  void allocateAllData(int offsetSize) {
-    int tmpBuffSize = scanTempBufferSize(nVals);
-    ba.allocate(dh::get_device_idx(param.gpu_id), param.silent, &vals, nVals,
+    int tmpBuffSize = ScanTempBufferSize(nVals);
+    ba.Allocate(dh::GetDeviceIdx(param.gpu_id), param.silent, &vals, nVals,
                &vals_cached, nVals, &instIds, nVals, &instIds_cached, nVals,
                &colOffsets, offsetSize, &gradsInst, nRows, &nodeAssigns, nVals,
                &nodeLocations, nVals, &nodes, maxNodes, &nodeAssignsPerInst,
@ -625,7 +625,7 @@ class GPUMaker : public TreeUpdater {
  }

  void setupOneTimeData(DMatrix* dmat) {
-    size_t free_memory = dh::available_memory(dh::get_device_idx(param.gpu_id));
+    size_t free_memory = dh::AvailableMemory(dh::GetDeviceIdx(param.gpu_id));
    if (!dmat->SingleColBlock()) {
      throw std::runtime_error("exact::GPUBuilder - must have 1 column block");
    }
@ -640,11 +640,11 @@ class GPUMaker : public TreeUpdater {

  void convertToCsc(DMatrix* dmat, std::vector<float>* fval,
                    std::vector<int>* fId, std::vector<size_t>* offset) {
-    MetaInfo info = dmat->info();
-    CHECK(info.num_col < std::numeric_limits<int>::max());
-    CHECK(info.num_row < std::numeric_limits<int>::max());
-    nRows = static_cast<int>(info.num_row);
-    nCols = static_cast<int>(info.num_col);
+    MetaInfo info = dmat->Info();
+    CHECK(info.num_col_ < std::numeric_limits<int>::max());
+    CHECK(info.num_row_ < std::numeric_limits<int>::max());
+    nRows = static_cast<int>(info.num_row_);
+    nCols = static_cast<int>(info.num_col_);
    offset->reserve(nCols + 1);
    offset->push_back(0);
    fval->reserve(nCols * nRows);
@ -677,56 +677,56 @@ class GPUMaker : public TreeUpdater {
  void transferAndSortData(const std::vector<float>& fval,
                           const std::vector<int>& fId,
                           const std::vector<size_t>& offset) {
-    vals.current_dvec() = fval;
-    instIds.current_dvec() = fId;
+    vals.CurrentDVec() = fval;
+    instIds.CurrentDVec() = fId;
    colOffsets = offset;
-    dh::segmentedSort<float, int>(&tmp_mem, &vals, &instIds, nVals, nCols,
+    dh::SegmentedSort<float, int>(&tmp_mem, &vals, &instIds, nVals, nCols,
                                  colOffsets);
-    vals_cached = vals.current_dvec();
-    instIds_cached = instIds.current_dvec();
-    assignColIds<<<nCols, 512>>>(colIds.data(), colOffsets.data());
+    vals_cached = vals.CurrentDVec();
+    instIds_cached = instIds.CurrentDVec();
+    assignColIds<<<nCols, 512>>>(colIds.Data(), colOffsets.Data());
  }

-  void transferGrads(HostDeviceVector<bst_gpair>* gpair) {
+  void transferGrads(HostDeviceVector<GradientPair>* gpair) {
    // HACK
-    dh::safe_cuda(cudaMemcpy(gradsInst.data(), gpair->ptr_d(param.gpu_id),
-                             sizeof(bst_gpair) * nRows,
+    dh::safe_cuda(cudaMemcpy(gradsInst.Data(), gpair->DevicePointer(param.gpu_id),
+                             sizeof(GradientPair) * nRows,
                             cudaMemcpyDefault));
    // evaluate the full-grad reduction for the root node
-    dh::sumReduction<bst_gpair>(tmp_mem, gradsInst, gradSums, nRows);
+    dh::SumReduction<GradientPair>(tmp_mem, gradsInst, gradSums, nRows);
  }

-  void initNodeData(int level, node_id_t nodeStart, int nNodes) {
+  void initNodeData(int level, NodeIdT nodeStart, int nNodes) {
    // all instances belong to root node at the beginning!
    if (level == 0) {
-      nodes.fill(DeviceNodeStats());
-      nodeAssigns.current_dvec().fill(0);
-      nodeAssignsPerInst.fill(0);
+      nodes.Fill(DeviceNodeStats());
+      nodeAssigns.CurrentDVec().Fill(0);
+      nodeAssignsPerInst.Fill(0);
      // for root node, just update the gradient/score/weight/id info
      // before splitting it! Currently all data is on GPU, hence this
      // stupid little kernel
-      auto d_nodes = nodes.data();
-      auto d_sums = gradSums.data();
+      auto d_nodes = nodes.Data();
+      auto d_sums = gradSums.Data();
      auto gpu_params = GPUTrainingParam(param);
-      dh::launch_n(param.gpu_id, 1, [=] __device__(int idx) {
+      dh::LaunchN(param.gpu_id, 1, [=] __device__(int idx) {
        d_nodes[0] = DeviceNodeStats(d_sums[0], 0, gpu_params);
      });
    } else {
      const int BlkDim = 256;
      const int ItemsPerThread = 4;
      // assign default node ids first
-      int nBlks = dh::div_round_up(nRows, BlkDim);
-      fillDefaultNodeIds<<<nBlks, BlkDim>>>(nodeAssignsPerInst.data(),
-                                            nodes.data(), nRows);
+      int nBlks = dh::DivRoundUp(nRows, BlkDim);
+      fillDefaultNodeIds<<<nBlks, BlkDim>>>(nodeAssignsPerInst.Data(),
+                                            nodes.Data(), nRows);
      // evaluate the correct child indices of non-missing values next
-      nBlks = dh::div_round_up(nVals, BlkDim * ItemsPerThread);
+      nBlks = dh::DivRoundUp(nVals, BlkDim * ItemsPerThread);
      assignNodeIds<<<nBlks, BlkDim>>>(
-          nodeAssignsPerInst.data(), nodeLocations.current(),
-          nodeAssigns.current(), instIds.current(), nodes.data(),
-          colOffsets.data(), vals.current(), nVals, nCols);
+          nodeAssignsPerInst.Data(), nodeLocations.Current(),
+          nodeAssigns.Current(), instIds.Current(), nodes.Data(),
+          colOffsets.Data(), vals.Current(), nVals, nCols);
      // gather the node assignments across all other columns too
-      dh::gather(dh::get_device_idx(param.gpu_id), nodeAssigns.current(),
-                 nodeAssignsPerInst.data(), instIds.current(), nVals);
+      dh::Gather(dh::GetDeviceIdx(param.gpu_id), nodeAssigns.Current(),
+                 nodeAssignsPerInst.Data(), instIds.Current(), nVals);
      sortKeys(level);
    }
  }
@ -734,19 +734,19 @@ class GPUMaker : public TreeUpdater {
  void sortKeys(int level) {
    // segmented-sort the arrays based on node-id's
    // but we don't need more than level+1 bits for sorting!
-    segmentedSort(&tmp_mem, &nodeAssigns, &nodeLocations, nVals, nCols,
+    SegmentedSort(&tmp_mem, &nodeAssigns, &nodeLocations, nVals, nCols,
                  colOffsets, 0, level + 1);
-    dh::gather<float, int>(dh::get_device_idx(param.gpu_id), vals.other(),
-                           vals.current(), instIds.other(), instIds.current(),
-                           nodeLocations.current(), nVals);
+    dh::Gather<float, int>(dh::GetDeviceIdx(param.gpu_id), vals.other(),
+                           vals.Current(), instIds.other(), instIds.Current(),
+                           nodeLocations.Current(), nVals);
    vals.buff().selector ^= 1;
    instIds.buff().selector ^= 1;
  }

  void markLeaves() {
    const int BlkDim = 128;
-    int nBlks = dh::div_round_up(maxNodes, BlkDim);
-    markLeavesKernel<<<nBlks, BlkDim>>>(nodes.data(), maxNodes);
+    int nBlks = dh::DivRoundUp(maxNodes, BlkDim);
+    markLeavesKernel<<<nBlks, BlkDim>>>(nodes.Data(), maxNodes);
  }
 };

--- a/src/tree/updater_gpu_common.cuh
+++ b/src/tree/updater_gpu_common.cuh
@ -15,7 +15,7 @@
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600

 #else
-__device__ __forceinline__ double atomicAdd(double* address, double val) {
+XGBOOST_DEVICE __forceinline__ double atomicAdd(double* address, double val) {
  unsigned long long int* address_as_ull =
      (unsigned long long int*)address;                   // NOLINT
  unsigned long long int old = *address_as_ull, assumed;  // NOLINT
@ -37,8 +37,8 @@ namespace xgboost {
 namespace tree {

 // Atomic add function for double precision gradients
-__device__ __forceinline__ void AtomicAddGpair(bst_gpair_precise* dest,
-                                               const bst_gpair& gpair) {
+__device__ __forceinline__ void AtomicAddGpair(GradientPairPrecise* dest,
+                                               const GradientPair& gpair) {
  auto dst_ptr = reinterpret_cast<double*>(dest);

  atomicAdd(dst_ptr, static_cast<double>(gpair.GetGrad()));
@ -46,11 +46,11 @@ __device__ __forceinline__ void AtomicAddGpair(bst_gpair_precise* dest,
 }

 // For integer gradients
-__device__ __forceinline__ void AtomicAddGpair(bst_gpair_integer* dest,
-                                               const bst_gpair& gpair) {
+__device__ __forceinline__ void AtomicAddGpair(GradientPairInteger* dest,
+                                               const GradientPair& gpair) {
  auto dst_ptr = reinterpret_cast<unsigned long long int*>(dest);  // NOLINT
-  bst_gpair_integer tmp(gpair.GetGrad(), gpair.GetHess());
-  auto src_ptr = reinterpret_cast<bst_gpair_integer::value_t*>(&tmp);
+  GradientPairInteger tmp(gpair.GetGrad(), gpair.GetHess());
+  auto src_ptr = reinterpret_cast<GradientPairInteger::ValueT*>(&tmp);

  atomicAdd(dst_ptr,
            static_cast<unsigned long long int>(*src_ptr));  // NOLINT
@ -59,13 +59,11 @@ __device__ __forceinline__ void AtomicAddGpair(bst_gpair_integer* dest,
 }

 /**
- * \fn  void CheckGradientMax(const dh::dvec<bst_gpair>& gpair)
- *
 * \brief Check maximum gradient value is below 2^16. This is to prevent
 * overflow when using integer gradient summation.
 */

-inline void CheckGradientMax(const std::vector<bst_gpair>& gpair) {
+inline void CheckGradientMax(const std::vector<GradientPair>& gpair) {
  auto* ptr = reinterpret_cast<const float*>(gpair.data());
  float abs_max =
      std::accumulate(ptr, ptr + (gpair.size() * 2), 0.f,
@ -87,19 +85,19 @@ struct GPUTrainingParam {
  // default=0 means no constraint on weight delta
  float max_delta_step;

-  __host__ __device__ GPUTrainingParam() {}
+  GPUTrainingParam() = default;

-  __host__ __device__ GPUTrainingParam(const TrainParam& param)
+  XGBOOST_DEVICE explicit GPUTrainingParam(const TrainParam& param)
      : min_child_weight(param.min_child_weight),
        reg_lambda(param.reg_lambda),
        reg_alpha(param.reg_alpha),
        max_delta_step(param.max_delta_step) {}
 };

-typedef int node_id_t;
+using NodeIdT = int;

 /** used to assign default id to a Node */
-static const int UNUSED_NODE = -1;
+static const int kUnusedNode = -1;

 /**
 * @enum DefaultDirection node.cuh
@ -107,9 +105,9 @@ static const int UNUSED_NODE = -1;
 */
 enum DefaultDirection {
  /** move to left child */
-  LeftDir = 0,
+  kLeftDir = 0,
  /** move to right child */
-  RightDir
+  kRightDir
 };

 struct DeviceSplitCandidate {
@ -117,15 +115,15 @@ struct DeviceSplitCandidate {
  DefaultDirection dir;
  float fvalue;
  int findex;
-  bst_gpair_integer left_sum;
-  bst_gpair_integer right_sum;
+  GradientPair left_sum;
+  GradientPair right_sum;

-  __host__ __device__ DeviceSplitCandidate()
-      : loss_chg(-FLT_MAX), dir(LeftDir), fvalue(0), findex(-1) {}
+  XGBOOST_DEVICE DeviceSplitCandidate()
+      : loss_chg(-FLT_MAX), dir(kLeftDir), fvalue(0), findex(-1) {}

-  template <typename param_t>
-  __host__ __device__ void Update(const DeviceSplitCandidate& other,
-                                  const param_t& param) {
+  template <typename ParamT>
+  XGBOOST_DEVICE void Update(const DeviceSplitCandidate& other,
+                                  const ParamT& param) {
    if (other.loss_chg > loss_chg &&
        other.left_sum.GetHess() >= param.min_child_weight &&
        other.right_sum.GetHess() >= param.min_child_weight) {
@ -133,10 +131,10 @@ struct DeviceSplitCandidate {
    }
  }

-  __device__ void Update(float loss_chg_in, DefaultDirection dir_in,
+  XGBOOST_DEVICE void Update(float loss_chg_in, DefaultDirection dir_in,
                         float fvalue_in, int findex_in,
-                         bst_gpair_integer left_sum_in,
-                         bst_gpair_integer right_sum_in,
+                         GradientPair left_sum_in,
+                         GradientPair right_sum_in,
                         const GPUTrainingParam& param) {
    if (loss_chg_in > loss_chg &&
        left_sum_in.GetHess() >= param.min_child_weight &&
@ -149,11 +147,11 @@ struct DeviceSplitCandidate {
      findex = findex_in;
    }
  }
-  __device__ bool IsValid() const { return loss_chg > 0.0f; }
+  XGBOOST_DEVICE bool IsValid() const { return loss_chg > 0.0f; }
 };

 struct DeviceNodeStats {
-  bst_gpair sum_gradients;
+  GradientPair sum_gradients;
  float root_gain;
  float weight;

@ -161,31 +159,31 @@ struct DeviceNodeStats {
  DefaultDirection dir;
  /** threshold value for comparison */
  float fvalue;
-  bst_gpair left_sum;
-  bst_gpair right_sum;
+  GradientPair left_sum;
+  GradientPair right_sum;
  /** \brief The feature index. */
  int fidx;
  /** node id (used as key for reduce/scan) */
-  node_id_t idx;
+  NodeIdT idx;

  HOST_DEV_INLINE DeviceNodeStats()
      : sum_gradients(),
        root_gain(-FLT_MAX),
        weight(-FLT_MAX),
-        dir(LeftDir),
+        dir(kLeftDir),
        fvalue(0.f),
        left_sum(),
        right_sum(),
-        fidx(UNUSED_NODE),
-        idx(UNUSED_NODE) {}
+        fidx(kUnusedNode),
+        idx(kUnusedNode) {}

-  template <typename param_t>
-  HOST_DEV_INLINE DeviceNodeStats(bst_gpair sum_gradients, node_id_t nidx,
-                                  const param_t& param)
+  template <typename ParamT>
+  HOST_DEV_INLINE DeviceNodeStats(GradientPair sum_gradients, NodeIdT nidx,
+                                  const ParamT& param)
      : sum_gradients(sum_gradients),
-        dir(LeftDir),
+        dir(kLeftDir),
        fvalue(0.f),
-        fidx(UNUSED_NODE),
+        fidx(kUnusedNode),
        idx(nidx) {
    this->root_gain =
        CalcGain(param, sum_gradients.GetGrad(), sum_gradients.GetHess());
@ -194,7 +192,7 @@ struct DeviceNodeStats {
  }

  HOST_DEV_INLINE void SetSplit(float fvalue, int fidx, DefaultDirection dir,
-                                bst_gpair left_sum, bst_gpair right_sum) {
+                                GradientPair left_sum, GradientPair right_sum) {
    this->fvalue = fvalue;
    this->fidx = fidx;
    this->dir = dir;
@ -208,11 +206,11 @@ struct DeviceNodeStats {
  }

  /** Tells whether this node is part of the decision tree */
-  HOST_DEV_INLINE bool IsUnused() const { return (idx == UNUSED_NODE); }
+  HOST_DEV_INLINE bool IsUnused() const { return (idx == kUnusedNode); }

  /** Tells whether this node is a leaf of the decision tree */
  HOST_DEV_INLINE bool IsLeaf() const {
-    return (!IsUnused() && (fidx == UNUSED_NODE));
+    return (!IsUnused() && (fidx == kUnusedNode));
  }
 };

@ -221,37 +219,37 @@ struct SumCallbackOp {
  // Running prefix
  T running_total;
  // Constructor
-  __device__ SumCallbackOp() : running_total(T()) {}
-  __device__ T operator()(T block_aggregate) {
+  XGBOOST_DEVICE SumCallbackOp() : running_total(T()) {}
+  XGBOOST_DEVICE T operator()(T block_aggregate) {
    T old_prefix = running_total;
    running_total += block_aggregate;
    return old_prefix;
  }
 };

-template <typename gpair_t>
-__device__ inline float device_calc_loss_chg(const GPUTrainingParam& param,
-                                             const gpair_t& left,
-                                             const gpair_t& parent_sum,
+template <typename GradientPairT>
+XGBOOST_DEVICE inline float DeviceCalcLossChange(const GPUTrainingParam& param,
+                                             const GradientPairT& left,
+                                             const GradientPairT& parent_sum,
                                             const float& parent_gain) {
-  gpair_t right = parent_sum - left;
+  GradientPairT right = parent_sum - left;
  float left_gain = CalcGain(param, left.GetGrad(), left.GetHess());
  float right_gain = CalcGain(param, right.GetGrad(), right.GetHess());
  return left_gain + right_gain - parent_gain;
 }

 // Without constraints
-template <typename gpair_t>
-__device__ float inline loss_chg_missing(const gpair_t& scan,
-                                         const gpair_t& missing,
-                                         const gpair_t& parent_sum,
+template <typename GradientPairT>
+XGBOOST_DEVICE float inline LossChangeMissing(const GradientPairT& scan,
+                                         const GradientPairT& missing,
+                                         const GradientPairT& parent_sum,
                                         const float& parent_gain,
                                         const GPUTrainingParam& param,
                                         bool& missing_left_out) {  // NOLINT
  float missing_left_loss =
-      device_calc_loss_chg(param, scan + missing, parent_sum, parent_gain);
+      DeviceCalcLossChange(param, scan + missing, parent_sum, parent_gain);
  float missing_right_loss =
-      device_calc_loss_chg(param, scan, parent_sum, parent_gain);
+      DeviceCalcLossChange(param, scan, parent_sum, parent_gain);

  if (missing_left_loss >= missing_right_loss) {
    missing_left_out = true;
@ -263,9 +261,9 @@ __device__ float inline loss_chg_missing(const gpair_t& scan,
 }

 // With constraints
-template <typename gpair_t>
-__device__ float inline loss_chg_missing(
-    const gpair_t& scan, const gpair_t& missing, const gpair_t& parent_sum,
+template <typename GradientPairT>
+XGBOOST_DEVICE float inline LossChangeMissing(
+    const GradientPairT& scan, const GradientPairT& missing, const GradientPairT& parent_sum,
    const float& parent_gain, const GPUTrainingParam& param, int constraint,
    const ValueConstraint& value_constraint,
    bool& missing_left_out) {  // NOLINT
@ -285,54 +283,54 @@ __device__ float inline loss_chg_missing(
 }

 // Total number of nodes in tree, given depth
-__host__ __device__ inline int n_nodes(int depth) {
+XGBOOST_DEVICE inline int MaxNodesDepth(int depth) {
  return (1 << (depth + 1)) - 1;
 }

 // Number of nodes at this level of the tree
-__host__ __device__ inline int n_nodes_level(int depth) { return 1 << depth; }
+XGBOOST_DEVICE inline int MaxNodesLevel(int depth) { return 1 << depth; }

 // Whether a node is currently being processed at current depth
-__host__ __device__ inline bool is_active(int nidx, int depth) {
-  return nidx >= n_nodes(depth - 1);
+XGBOOST_DEVICE inline bool IsNodeActive(int nidx, int depth) {
+  return nidx >= MaxNodesDepth(depth - 1);
 }

-__host__ __device__ inline int parent_nidx(int nidx) { return (nidx - 1) / 2; }
+XGBOOST_DEVICE inline int ParentNodeIdx(int nidx) { return (nidx - 1) / 2; }

-__host__ __device__ inline int left_child_nidx(int nidx) {
+XGBOOST_DEVICE inline int LeftChildNodeIdx(int nidx) {
  return nidx * 2 + 1;
 }

-__host__ __device__ inline int right_child_nidx(int nidx) {
+XGBOOST_DEVICE inline int RightChildNodeIdx(int nidx) {
  return nidx * 2 + 2;
 }

-__host__ __device__ inline bool is_left_child(int nidx) {
+XGBOOST_DEVICE inline bool IsLeftChild(int nidx) {
  return nidx % 2 == 1;
 }

 // Copy gpu dense representation of tree to xgboost sparse representation
-inline void dense2sparse_tree(RegTree* p_tree,
-                              const dh::dvec<DeviceNodeStats>& nodes,
+inline void Dense2SparseTree(RegTree* p_tree,
+                              const dh::DVec<DeviceNodeStats>& nodes,
                              const TrainParam& param) {
  RegTree& tree = *p_tree;
-  std::vector<DeviceNodeStats> h_nodes = nodes.as_vector();
+  std::vector<DeviceNodeStats> h_nodes = nodes.AsVector();

  int nid = 0;
  for (int gpu_nid = 0; gpu_nid < h_nodes.size(); gpu_nid++) {
    const DeviceNodeStats& n = h_nodes[gpu_nid];
    if (!n.IsUnused() && !n.IsLeaf()) {
      tree.AddChilds(nid);
-      tree[nid].set_split(n.fidx, n.fvalue, n.dir == LeftDir);
-      tree.stat(nid).loss_chg = n.root_gain;
-      tree.stat(nid).base_weight = n.weight;
-      tree.stat(nid).sum_hess = n.sum_gradients.GetHess();
-      tree[tree[nid].cleft()].set_leaf(0);
-      tree[tree[nid].cright()].set_leaf(0);
+      tree[nid].SetSplit(n.fidx, n.fvalue, n.dir == kLeftDir);
+      tree.Stat(nid).loss_chg = n.root_gain;
+      tree.Stat(nid).base_weight = n.weight;
+      tree.Stat(nid).sum_hess = n.sum_gradients.GetHess();
+      tree[tree[nid].LeftChild()].SetLeaf(0);
+      tree[tree[nid].RightChild()].SetLeaf(0);
      nid++;
    } else if (n.IsLeaf()) {
-      tree[nid].set_leaf(n.weight * param.learning_rate);
-      tree.stat(nid).sum_hess = n.sum_gradients.GetHess();
+      tree[nid].SetLeaf(n.weight * param.learning_rate);
+      tree.Stat(nid).sum_hess = n.sum_gradients.GetHess();
      nid++;
    }
  }
@ -346,11 +344,11 @@ struct BernoulliRng {
  float p;
  uint32_t seed;

-  __host__ __device__ BernoulliRng(float p, size_t seed_) : p(p) {
+  XGBOOST_DEVICE BernoulliRng(float p, size_t seed_) : p(p) {
    seed = static_cast<uint32_t>(seed_);
  }

-  __host__ __device__ bool operator()(const int i) const {
+  XGBOOST_DEVICE bool operator()(const int i) const {
    thrust::default_random_engine rng(seed);
    thrust::uniform_real_distribution<float> dist;
    rng.discard(i);
@ -359,25 +357,25 @@ struct BernoulliRng {
 };

 // Set gradient pair to 0 with p = 1 - subsample
-inline void subsample_gpair(dh::dvec<bst_gpair>* p_gpair, float subsample,
+inline void SubsampleGradientPair(dh::DVec<GradientPair>* p_gpair, float subsample,
                            int offset = 0) {
  if (subsample == 1.0) {
    return;
  }

-  dh::dvec<bst_gpair>& gpair = *p_gpair;
+  dh::DVec<GradientPair>& gpair = *p_gpair;

-  auto d_gpair = gpair.data();
+  auto d_gpair = gpair.Data();
  BernoulliRng rng(subsample, common::GlobalRandom()());

-  dh::launch_n(gpair.device_idx(), gpair.size(), [=] __device__(int i) {
+  dh::LaunchN(gpair.DeviceIdx(), gpair.Size(), [=] XGBOOST_DEVICE(int i) {
    if (!rng(i + offset)) {
-      d_gpair[i] = bst_gpair();
+      d_gpair[i] = GradientPair();
    }
  });
 }

-inline std::vector<int> col_sample(std::vector<int> features, float colsample) {
+inline std::vector<int> ColSample(std::vector<int> features, float colsample) {
  CHECK_GT(features.size(), 0);
  int n = std::max(1, static_cast<int>(colsample * features.size()));

@ -397,9 +395,9 @@ inline std::vector<int> col_sample(std::vector<int> features, float colsample) {
 */

 class ColumnSampler {
-  std::vector<int> feature_set_tree;
-  std::map<int, std::vector<int>> feature_set_level;
-  TrainParam param;
+  std::vector<int> feature_set_tree_;
+  std::map<int, std::vector<int>> feature_set_level_;
+  TrainParam param_;

 public:
  /**
@ -413,10 +411,10 @@ class ColumnSampler {

  void Init(int64_t num_col, const TrainParam& param) {
    this->Reset();
-    this->param = param;
-    feature_set_tree.resize(num_col);
-    std::iota(feature_set_tree.begin(), feature_set_tree.end(), 0);
-    feature_set_tree = col_sample(feature_set_tree, param.colsample_bytree);
+    this->param_ = param;
+    feature_set_tree_.resize(num_col);
+    std::iota(feature_set_tree_.begin(), feature_set_tree_.end(), 0);
+    feature_set_tree_ = ColSample(feature_set_tree_, param.colsample_bytree);
  }

  /**
@ -426,8 +424,8 @@ class ColumnSampler {
   */

  void Reset() {
-    feature_set_tree.clear();
-    feature_set_level.clear();
+    feature_set_tree_.clear();
+    feature_set_level_.clear();
  }

  /**
@ -442,13 +440,13 @@ class ColumnSampler {
   */

  bool ColumnUsed(int column, int depth) {
-    if (feature_set_level.count(depth) == 0) {
-      feature_set_level[depth] =
-          col_sample(feature_set_tree, param.colsample_bylevel);
+    if (feature_set_level_.count(depth) == 0) {
+      feature_set_level_[depth] =
+          ColSample(feature_set_tree_, param_.colsample_bylevel);
    }

-    return std::binary_search(feature_set_level[depth].begin(),
-                              feature_set_level[depth].end(), column);
+    return std::binary_search(feature_set_level_[depth].begin(),
+                              feature_set_level_[depth].end(), column);
  }
 };

--- a/src/tree/updater_gpu_hist.cu
+++ b/src/tree/updater_gpu_hist.cu
@ -23,22 +23,22 @@ namespace tree {

 DMLC_REGISTRY_FILE_TAG(updater_gpu_hist);

-typedef bst_gpair_precise gpair_sum_t;
+using GradientPairSumT = GradientPairPrecise;

-template <int BLOCK_THREADS, typename reduce_t, typename temp_storage_t>
-__device__ gpair_sum_t ReduceFeature(const gpair_sum_t* begin,
-                                     const gpair_sum_t* end,
-                                     temp_storage_t* temp_storage) {
-  __shared__ cub::Uninitialized<gpair_sum_t> uninitialized_sum;
-  gpair_sum_t& shared_sum = uninitialized_sum.Alias();
+template <int BLOCK_THREADS, typename ReduceT, typename TempStorageT>
+__device__ GradientPairSumT ReduceFeature(const GradientPairSumT* begin,
+                                     const GradientPairSumT* end,
+                                     TempStorageT* temp_storage) {
+  __shared__ cub::Uninitialized<GradientPairSumT> uninitialized_sum;
+  GradientPairSumT& shared_sum = uninitialized_sum.Alias();

-  gpair_sum_t local_sum = gpair_sum_t();
+  GradientPairSumT local_sum = GradientPairSumT();
  for (auto itr = begin; itr < end; itr += BLOCK_THREADS) {
    bool thread_active = itr + threadIdx.x < end;
    // Scan histogram
-    gpair_sum_t bin = thread_active ? *(itr + threadIdx.x) : gpair_sum_t();
+    GradientPairSumT bin = thread_active ? *(itr + threadIdx.x) : GradientPairSumT();

-    local_sum += reduce_t(temp_storage->sum_reduce).Reduce(bin, cub::Sum());
+    local_sum += ReduceT(temp_storage->sum_reduce).Reduce(bin, cub::Sum());
  }

  if (threadIdx.x == 0) {
@ -49,41 +49,41 @@ __device__ gpair_sum_t ReduceFeature(const gpair_sum_t* begin,
  return shared_sum;
 }

-template <int BLOCK_THREADS, typename reduce_t, typename scan_t,
-          typename max_reduce_t, typename temp_storage_t>
-__device__ void EvaluateFeature(int fidx, const gpair_sum_t* hist,
+template <int BLOCK_THREADS, typename ReduceT, typename scan_t,
+          typename max_ReduceT, typename TempStorageT>
+__device__ void EvaluateFeature(int fidx, const GradientPairSumT* hist,
                                const int* feature_segments, float min_fvalue,
                                const float* gidx_fvalue_map,
                                DeviceSplitCandidate* best_split,
                                const DeviceNodeStats& node,
                                const GPUTrainingParam& param,
-                                temp_storage_t* temp_storage, int constraint,
+                                TempStorageT* temp_storage, int constraint,
                                const ValueConstraint& value_constraint) {
  int gidx_begin = feature_segments[fidx];
  int gidx_end = feature_segments[fidx + 1];

-  gpair_sum_t feature_sum = ReduceFeature<BLOCK_THREADS, reduce_t>(
+  GradientPairSumT feature_sum = ReduceFeature<BLOCK_THREADS, ReduceT>(
      hist + gidx_begin, hist + gidx_end, temp_storage);

-  auto prefix_op = SumCallbackOp<gpair_sum_t>();
+  auto prefix_op = SumCallbackOp<GradientPairSumT>();
  for (int scan_begin = gidx_begin; scan_begin < gidx_end;
       scan_begin += BLOCK_THREADS) {
    bool thread_active = scan_begin + threadIdx.x < gidx_end;

-    gpair_sum_t bin =
-        thread_active ? hist[scan_begin + threadIdx.x] : gpair_sum_t();
+    GradientPairSumT bin =
+        thread_active ? hist[scan_begin + threadIdx.x] : GradientPairSumT();
    scan_t(temp_storage->scan).ExclusiveScan(bin, bin, cub::Sum(), prefix_op);

    // Calculate  gain
-    gpair_sum_t parent_sum = gpair_sum_t(node.sum_gradients);
+    GradientPairSumT parent_sum = GradientPairSumT(node.sum_gradients);

-    gpair_sum_t missing = parent_sum - feature_sum;
+    GradientPairSumT missing = parent_sum - feature_sum;

    bool missing_left = true;
    const float null_gain = -FLT_MAX;
    float gain = null_gain;
    if (thread_active) {
-      gain = loss_chg_missing(bin, missing, parent_sum, node.root_gain, param,
+      gain = LossChangeMissing(bin, missing, parent_sum, node.root_gain, param,
                              constraint, value_constraint, missing_left);
    }

@ -92,7 +92,7 @@ __device__ void EvaluateFeature(int fidx, const gpair_sum_t* hist,
    // Find thread with best gain
    cub::KeyValuePair<int, float> tuple(threadIdx.x, gain);
    cub::KeyValuePair<int, float> best =
-        max_reduce_t(temp_storage->max_reduce).Reduce(tuple, cub::ArgMax());
+        max_ReduceT(temp_storage->max_reduce).Reduce(tuple, cub::ArgMax());

    __shared__ cub::KeyValuePair<int, float> block_max;
    if (threadIdx.x == 0) {
@ -107,11 +107,11 @@ __device__ void EvaluateFeature(int fidx, const gpair_sum_t* hist,
      float fvalue =
          gidx == gidx_begin ? min_fvalue : gidx_fvalue_map[gidx - 1];

-      gpair_sum_t left = missing_left ? bin + missing : bin;
-      gpair_sum_t right = parent_sum - left;
+      GradientPairSumT left = missing_left ? bin + missing : bin;
+      GradientPairSumT right = parent_sum - left;

-      best_split->Update(gain, missing_left ? LeftDir : RightDir, fvalue, fidx,
-                         left, right, param);
+      best_split->Update(gain, missing_left ? kLeftDir : kRightDir, fvalue, fidx,
+                         GradientPair(left), GradientPair(right), param);
    }
    __syncthreads();
  }
@ -119,17 +119,17 @@ __device__ void EvaluateFeature(int fidx, const gpair_sum_t* hist,

 template <int BLOCK_THREADS>
 __global__ void evaluate_split_kernel(
-    const gpair_sum_t* d_hist, int nidx, uint64_t n_features,
+    const GradientPairSumT* d_hist, int nidx, uint64_t n_features,
    DeviceNodeStats nodes, const int* d_feature_segments,
    const float* d_fidx_min_map, const float* d_gidx_fvalue_map,
    GPUTrainingParam gpu_param, DeviceSplitCandidate* d_split,
    ValueConstraint value_constraint, int* d_monotonic_constraints) {
  typedef cub::KeyValuePair<int, float> ArgMaxT;
-  typedef cub::BlockScan<gpair_sum_t, BLOCK_THREADS, cub::BLOCK_SCAN_WARP_SCANS>
+  typedef cub::BlockScan<GradientPairSumT, BLOCK_THREADS, cub::BLOCK_SCAN_WARP_SCANS>
      BlockScanT;
  typedef cub::BlockReduce<ArgMaxT, BLOCK_THREADS> MaxReduceT;

-  typedef cub::BlockReduce<gpair_sum_t, BLOCK_THREADS> SumReduceT;
+  typedef cub::BlockReduce<GradientPairSumT, BLOCK_THREADS> SumReduceT;

  union TempStorage {
    typename BlockScanT::TempStorage scan;
@ -163,8 +163,8 @@ __global__ void evaluate_split_kernel(
 }

 // Find a gidx value for a given feature otherwise return -1 if not found
-template <typename gidx_iter_t>
-__device__ int BinarySearchRow(bst_uint begin, bst_uint end, gidx_iter_t data,
+template <typename GidxIterT>
+__device__ int BinarySearchRow(bst_uint begin, bst_uint end, GidxIterT data,
                               int fidx_begin, int fidx_end) {
  bst_uint previous_middle = UINT32_MAX;
  while (end != begin) {
@ -189,19 +189,19 @@ __device__ int BinarySearchRow(bst_uint begin, bst_uint end, gidx_iter_t data,
 }

 struct DeviceHistogram {
-  dh::bulk_allocator<dh::memory_type::DEVICE> ba;
-  dh::dvec<gpair_sum_t> data;
+  dh::BulkAllocator<dh::MemoryType::kDevice> ba;
+  dh::DVec<GradientPairSumT> data;
  int n_bins;
  void Init(int device_idx, int max_nodes, int n_bins, bool silent) {
    this->n_bins = n_bins;
-    ba.allocate(device_idx, silent, &data, size_t(max_nodes) * size_t(n_bins));
+    ba.Allocate(device_idx, silent, &data, size_t(max_nodes) * size_t(n_bins));
  }

-  void Reset() { data.fill(gpair_sum_t()); }
-  gpair_sum_t* GetHistPtr(int nidx) { return data.data() + nidx * n_bins; }
+  void Reset() { data.Fill(GradientPairSumT()); }
+  GradientPairSumT* GetHistPtr(int nidx) { return data.Data() + nidx * n_bins; }

  void PrintNidx(int nidx) const {
-    auto h_data = data.as_vector();
+    auto h_data = data.AsVector();
    std::cout << "nidx " << nidx << ":\n";
    for (int i = n_bins * nidx; i < n_bins * (nidx + 1); i++) {
      std::cout << h_data[i] << " ";
@ -216,7 +216,7 @@ struct CalcWeightTrainParam {
  float reg_lambda;
  float max_delta_step;
  float learning_rate;
-  __host__ __device__ CalcWeightTrainParam(const TrainParam& p)
+  XGBOOST_DEVICE explicit CalcWeightTrainParam(const TrainParam& p)
      : min_child_weight(p.min_child_weight),
        reg_alpha(p.reg_alpha),
        reg_lambda(p.reg_lambda),
@ -240,19 +240,19 @@ struct DeviceShard {

  int device_idx;
  int normalised_device_idx;  // Device index counting from param.gpu_id
-  dh::bulk_allocator<dh::memory_type::DEVICE> ba;
-  dh::dvec<common::compressed_byte_t> gidx_buffer;
-  dh::dvec<bst_gpair> gpair;
-  dh::dvec2<bst_uint> ridx;  // Row index relative to this shard
-  dh::dvec2<int> position;
+  dh::BulkAllocator<dh::MemoryType::kDevice> ba;
+  dh::DVec<common::CompressedByteT> gidx_buffer;
+  dh::DVec<GradientPair> gpair;
+  dh::DVec2<bst_uint> ridx;  // Row index relative to this shard
+  dh::DVec2<int> position;
  std::vector<Segment> ridx_segments;
-  dh::dvec<int> feature_segments;
-  dh::dvec<float> gidx_fvalue_map;
-  dh::dvec<float> min_fvalue;
-  dh::dvec<int> monotone_constraints;
-  dh::dvec<bst_float> prediction_cache;
-  std::vector<bst_gpair> node_sum_gradients;
-  dh::dvec<bst_gpair> node_sum_gradients_d;
+  dh::DVec<int> feature_segments;
+  dh::DVec<float> gidx_fvalue_map;
+  dh::DVec<float> min_fvalue;
+  dh::DVec<int> monotone_constraints;
+  dh::DVec<bst_float> prediction_cache;
+  std::vector<GradientPair> node_sum_gradients;
+  dh::DVec<GradientPair> node_sum_gradients_d;
  common::CompressedIterator<uint32_t> gidx;
  int row_stride;
  bst_uint row_begin_idx;  // The row offset for this shard
@ -311,8 +311,8 @@ struct DeviceShard {
        << "Max leaves and max depth cannot both be unconstrained for "
           "gpu_hist.";
    int max_nodes =
-        param.max_leaves > 0 ? param.max_leaves * 2 : n_nodes(param.max_depth);
-    ba.allocate(device_idx, param.silent, &gidx_buffer, compressed_size_bytes,
+        param.max_leaves > 0 ? param.max_leaves * 2 : MaxNodesDepth(param.max_depth);
+    ba.Allocate(device_idx, param.silent, &gidx_buffer, compressed_size_bytes,
                &gpair, n_rows, &ridx, n_rows, &position, n_rows,
                &prediction_cache, n_rows, &node_sum_gradients_d, max_nodes,
                &feature_segments, gmat.cut->row_ptr.size(), &gidx_fvalue_map,
@ -328,11 +328,11 @@ struct DeviceShard {

    // Compress gidx
    common::CompressedBufferWriter cbw(num_symbols);
-    std::vector<common::compressed_byte_t> host_buffer(gidx_buffer.size());
+    std::vector<common::CompressedByteT> host_buffer(gidx_buffer.Size());
    cbw.Write(host_buffer.data(), ellpack_matrix.begin(), ellpack_matrix.end());
    gidx_buffer = host_buffer;
    gidx =
-        common::CompressedIterator<uint32_t>(gidx_buffer.data(), num_symbols);
+        common::CompressedIterator<uint32_t>(gidx_buffer.Data(), num_symbols);

    common::CompressedIterator<uint32_t> ci_host(host_buffer.data(),
                                                 num_symbols);
@ -369,19 +369,19 @@ struct DeviceShard {
  }

  // Reset values for each update iteration
-  void Reset(HostDeviceVector<bst_gpair>* dh_gpair, int device) {
+  void Reset(HostDeviceVector<GradientPair>* dh_gpair, int device) {
    auto begin = dh_gpair->tbegin(device);
    dh::safe_cuda(cudaSetDevice(device_idx));
-    position.current_dvec().fill(0);
+    position.CurrentDVec().Fill(0);
    std::fill(node_sum_gradients.begin(), node_sum_gradients.end(),
-              bst_gpair());
+              GradientPair());

-    thrust::sequence(ridx.current_dvec().tbegin(), ridx.current_dvec().tend());
+    thrust::sequence(ridx.CurrentDVec().tbegin(), ridx.CurrentDVec().tend());

    std::fill(ridx_segments.begin(), ridx_segments.end(), Segment(0, 0));
-    ridx_segments.front() = Segment(0, ridx.size());
+    ridx_segments.front() = Segment(0, ridx.Size());
    this->gpair.copy(begin + row_begin_idx, begin + row_end_idx);
-    subsample_gpair(&gpair, param.subsample, row_begin_idx);
+    SubsampleGradientPair(&gpair, param.subsample, row_begin_idx);
    hist.Reset();
  }

@ -389,13 +389,13 @@ struct DeviceShard {
    auto segment = ridx_segments[nidx];
    auto d_node_hist = hist.GetHistPtr(nidx);
    auto d_gidx = gidx;
-    auto d_ridx = ridx.current();
-    auto d_gpair = gpair.data();
+    auto d_ridx = ridx.Current();
+    auto d_gpair = gpair.Data();
    auto row_stride = this->row_stride;
    auto null_gidx_value = this->null_gidx_value;
    auto n_elements = segment.Size() * row_stride;

-    dh::launch_n(device_idx, n_elements, [=] __device__(size_t idx) {
+    dh::LaunchN(device_idx, n_elements, [=] __device__(size_t idx) {
      int ridx = d_ridx[(idx / row_stride) + segment.begin];
      int gidx = d_gidx[ridx * row_stride + idx % row_stride];

@ -410,7 +410,7 @@ struct DeviceShard {
    auto d_node_hist_histogram = hist.GetHistPtr(nidx_histogram);
    auto d_node_hist_subtraction = hist.GetHistPtr(nidx_subtraction);

-    dh::launch_n(device_idx, hist.n_bins, [=] __device__(size_t idx) {
+    dh::LaunchN(device_idx, hist.n_bins, [=] __device__(size_t idx) {
      d_node_hist_subtraction[idx] =
          d_node_hist_parent[idx] - d_node_hist_histogram[idx];
    });
@ -432,11 +432,11 @@ struct DeviceShard {
    auto d_left_count = temp_memory.Pointer<int64_t>();
    dh::safe_cuda(cudaMemset(d_left_count, 0, sizeof(int64_t)));
    auto segment = ridx_segments[nidx];
-    auto d_ridx = ridx.current();
-    auto d_position = position.current();
+    auto d_ridx = ridx.Current();
+    auto d_position = position.Current();
    auto d_gidx = gidx;
    auto row_stride = this->row_stride;
-    dh::launch_n<1, 512>(
+    dh::LaunchN<1, 512>(
        device_idx, segment.Size(), [=] __device__(bst_uint idx) {
          idx += segment.begin;
          auto ridx = d_ridx[idx];
@ -482,22 +482,22 @@ struct DeviceShard {

    size_t temp_storage_bytes = 0;
    cub::DeviceRadixSort::SortPairs(
-        nullptr, temp_storage_bytes, position.current() + segment.begin,
-        position.other() + segment.begin, ridx.current() + segment.begin,
+        nullptr, temp_storage_bytes, position.Current() + segment.begin,
+        position.other() + segment.begin, ridx.Current() + segment.begin,
        ridx.other() + segment.begin, segment.Size(), min_bits, max_bits);

    temp_memory.LazyAllocate(temp_storage_bytes);

    cub::DeviceRadixSort::SortPairs(
        temp_memory.d_temp_storage, temp_memory.temp_storage_bytes,
-        position.current() + segment.begin, position.other() + segment.begin,
-        ridx.current() + segment.begin, ridx.other() + segment.begin,
+        position.Current() + segment.begin, position.other() + segment.begin,
+        ridx.Current() + segment.begin, ridx.other() + segment.begin,
        segment.Size(), min_bits, max_bits);
    dh::safe_cuda(cudaMemcpy(
-        position.current() + segment.begin, position.other() + segment.begin,
+        position.Current() + segment.begin, position.other() + segment.begin,
        segment.Size() * sizeof(int), cudaMemcpyDeviceToDevice));
    dh::safe_cuda(cudaMemcpy(
-        ridx.current() + segment.begin, ridx.other() + segment.begin,
+        ridx.Current() + segment.begin, ridx.other() + segment.begin,
        segment.Size() * sizeof(bst_uint), cudaMemcpyDeviceToDevice));
  }

@ -505,8 +505,8 @@ struct DeviceShard {
    dh::safe_cuda(cudaSetDevice(device_idx));
    if (!prediction_cache_initialised) {
      dh::safe_cuda(cudaMemcpy(
-          prediction_cache.data(), &out_preds_d[row_begin_idx],
-          prediction_cache.size() * sizeof(bst_float), cudaMemcpyDefault));
+          prediction_cache.Data(), &out_preds_d[row_begin_idx],
+          prediction_cache.Size() * sizeof(bst_float), cudaMemcpyDefault));
    }
    prediction_cache_initialised = true;

@ -514,13 +514,13 @@ struct DeviceShard {

    thrust::copy(node_sum_gradients.begin(), node_sum_gradients.end(),
                 node_sum_gradients_d.tbegin());
-    auto d_position = position.current();
-    auto d_ridx = ridx.current();
-    auto d_node_sum_gradients = node_sum_gradients_d.data();
-    auto d_prediction_cache = prediction_cache.data();
+    auto d_position = position.Current();
+    auto d_ridx = ridx.Current();
+    auto d_node_sum_gradients = node_sum_gradients_d.Data();
+    auto d_prediction_cache = prediction_cache.Data();

-    dh::launch_n(
-        device_idx, prediction_cache.size(), [=] __device__(int local_idx) {
+    dh::LaunchN(
+        device_idx, prediction_cache.Size(), [=] __device__(int local_idx) {
          int pos = d_position[local_idx];
          bst_float weight = CalcWeight(param_d, d_node_sum_gradients[pos]);
          d_prediction_cache[d_ridx[local_idx]] +=
@ -528,8 +528,8 @@ struct DeviceShard {
        });

    dh::safe_cuda(cudaMemcpy(
-        &out_preds_d[row_begin_idx], prediction_cache.data(),
-        prediction_cache.size() * sizeof(bst_float), cudaMemcpyDefault));
+        &out_preds_d[row_begin_idx], prediction_cache.Data(),
+        prediction_cache.Size() * sizeof(bst_float), cudaMemcpyDefault));
  }
 };

@ -537,33 +537,32 @@ class GPUHistMaker : public TreeUpdater {
 public:
  struct ExpandEntry;

-  GPUHistMaker() : initialised(false), p_last_fmat_(nullptr) {}
-  ~GPUHistMaker() {}
+  GPUHistMaker() : initialised_(false), p_last_fmat_(nullptr) {}
  void Init(
      const std::vector<std::pair<std::string, std::string>>& args) override {
-    param.InitAllowUnknown(args);
-    CHECK(param.n_gpus != 0) << "Must have at least one device";
-    n_devices = param.n_gpus;
+    param_.InitAllowUnknown(args);
+    CHECK(param_.n_gpus != 0) << "Must have at least one device";
+    n_devices_ = param_.n_gpus;

-    dh::check_compute_capability();
+    dh::CheckComputeCapability();

-    if (param.grow_policy == TrainParam::kLossGuide) {
-      qexpand_.reset(new ExpandQueue(loss_guide));
+    if (param_.grow_policy == TrainParam::kLossGuide) {
+      qexpand_.reset(new ExpandQueue(LossGuide));
    } else {
-      qexpand_.reset(new ExpandQueue(depth_wise));
+      qexpand_.reset(new ExpandQueue(DepthWise));
    }

-    monitor.Init("updater_gpu_hist", param.debug_verbose);
+    monitor_.Init("updater_gpu_hist", param_.debug_verbose);
  }

-  void Update(HostDeviceVector<bst_gpair>* gpair, DMatrix* dmat,
+  void Update(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
              const std::vector<RegTree*>& trees) override {
-    monitor.Start("Update", dList);
-    GradStats::CheckInfo(dmat->info());
+    monitor_.Start("Update", device_list_);
+    GradStats::CheckInfo(dmat->Info());
    // rescale learning rate according to size of trees
-    float lr = param.learning_rate;
-    param.learning_rate = lr / trees.size();
-    ValueConstraint::Init(&param, dmat->info().num_col);
+    float lr = param_.learning_rate;
+    param_.learning_rate = lr / trees.size();
+    ValueConstraint::Init(&param_, dmat->Info().num_col_);
    // build tree
    try {
      for (size_t i = 0; i < trees.size(); ++i) {
@ -572,97 +571,97 @@ class GPUHistMaker : public TreeUpdater {
    } catch (const std::exception& e) {
      LOG(FATAL) << "GPU plugin exception: " << e.what() << std::endl;
    }
-    param.learning_rate = lr;
-    monitor.Stop("Update", dList);
+    param_.learning_rate = lr;
+    monitor_.Stop("Update", device_list_);
  }

  void InitDataOnce(DMatrix* dmat) {
-    info = &dmat->info();
-    monitor.Start("Quantiles", dList);
-    hmat_.Init(dmat, param.max_bin);
+    info_ = &dmat->Info();
+    monitor_.Start("Quantiles", device_list_);
+    hmat_.Init(dmat, param_.max_bin);
    gmat_.cut = &hmat_;
    gmat_.Init(dmat);
-    monitor.Stop("Quantiles", dList);
-    n_bins = hmat_.row_ptr.back();
+    monitor_.Stop("Quantiles", device_list_);
+    n_bins_ = hmat_.row_ptr.back();

-    int n_devices = dh::n_devices(param.n_gpus, info->num_row);
+    int n_devices = dh::NDevices(param_.n_gpus, info_->num_row_);

    bst_uint row_begin = 0;
    bst_uint shard_size =
-        std::ceil(static_cast<double>(info->num_row) / n_devices);
+        std::ceil(static_cast<double>(info_->num_row_) / n_devices);

-    dList.resize(n_devices);
+    device_list_.resize(n_devices);
    for (int d_idx = 0; d_idx < n_devices; ++d_idx) {
-      int device_idx = (param.gpu_id + d_idx) % dh::n_visible_devices();
-      dList[d_idx] = device_idx;
+      int device_idx = (param_.gpu_id + d_idx) % dh::NVisibleDevices();
+      device_list_[d_idx] = device_idx;
    }

-    reducer.Init(dList);
+    reducer_.Init(device_list_);

    // Partition input matrix into row segments
    std::vector<size_t> row_segments;
-    shards.resize(n_devices);
+    shards_.resize(n_devices);
    row_segments.push_back(0);
    for (int d_idx = 0; d_idx < n_devices; ++d_idx) {
      bst_uint row_end =
-          std::min(static_cast<size_t>(row_begin + shard_size), info->num_row);
+          std::min(static_cast<size_t>(row_begin + shard_size), info_->num_row_);
      row_segments.push_back(row_end);
      row_begin = row_end;
    }

    // Create device shards
-    omp_set_num_threads(shards.size());
+    omp_set_num_threads(shards_.size());
 #pragma omp parallel
    {
      auto cpu_thread_id = omp_get_thread_num();
-      shards[cpu_thread_id] = std::unique_ptr<DeviceShard>(
-          new DeviceShard(dList[cpu_thread_id], cpu_thread_id, gmat_,
+      shards_[cpu_thread_id] = std::unique_ptr<DeviceShard>(
+          new DeviceShard(device_list_[cpu_thread_id], cpu_thread_id, gmat_,
                          row_segments[cpu_thread_id],
-                          row_segments[cpu_thread_id + 1], n_bins, param));
+                          row_segments[cpu_thread_id + 1], n_bins_, param_));
    }

    p_last_fmat_ = dmat;
-    initialised = true;
+    initialised_ = true;
  }

-  void InitData(HostDeviceVector<bst_gpair>* gpair, DMatrix* dmat,
+  void InitData(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
                const RegTree& tree) {
-    monitor.Start("InitDataOnce", dList);
-    if (!initialised) {
+    monitor_.Start("InitDataOnce", device_list_);
+    if (!initialised_) {
      this->InitDataOnce(dmat);
    }
-    monitor.Stop("InitDataOnce", dList);
+    monitor_.Stop("InitDataOnce", device_list_);

-    column_sampler.Init(info->num_col, param);
+    column_sampler_.Init(info_->num_col_, param_);

    // Copy gpair & reset memory
-    monitor.Start("InitDataReset", dList);
-    omp_set_num_threads(shards.size());
+    monitor_.Start("InitDataReset", device_list_);
+    omp_set_num_threads(shards_.size());

    // TODO(canonizer): make it parallel again once HostDeviceVector is
    // thread-safe
-    for (int shard = 0; shard < shards.size(); ++shard)
-      shards[shard]->Reset(gpair, param.gpu_id);
-    monitor.Stop("InitDataReset", dList);
+    for (int shard = 0; shard < shards_.size(); ++shard)
+      shards_[shard]->Reset(gpair, param_.gpu_id);
+    monitor_.Stop("InitDataReset", device_list_);
  }

  void AllReduceHist(int nidx) {
-    for (auto& shard : shards) {
+    for (auto& shard : shards_) {
      auto d_node_hist = shard->hist.GetHistPtr(nidx);
-      reducer.AllReduceSum(
+      reducer_.AllReduceSum(
          shard->normalised_device_idx,
-          reinterpret_cast<gpair_sum_t::value_t*>(d_node_hist),
-          reinterpret_cast<gpair_sum_t::value_t*>(d_node_hist),
-          n_bins * (sizeof(gpair_sum_t) / sizeof(gpair_sum_t::value_t)));
+          reinterpret_cast<GradientPairSumT::ValueT*>(d_node_hist),
+          reinterpret_cast<GradientPairSumT::ValueT*>(d_node_hist),
+          n_bins_ * (sizeof(GradientPairSumT) / sizeof(GradientPairSumT::ValueT)));
    }

-    reducer.Synchronize();
+    reducer_.Synchronize();
  }

  void BuildHistLeftRight(int nidx_parent, int nidx_left, int nidx_right) {
    size_t left_node_max_elements = 0;
    size_t right_node_max_elements = 0;
-    for (auto& shard : shards) {
+    for (auto& shard : shards_) {
      left_node_max_elements = (std::max)(
          left_node_max_elements, shard->ridx_segments[nidx_left].Size());
      right_node_max_elements = (std::max)(
@ -677,13 +676,13 @@ class GPUHistMaker : public TreeUpdater {
      subtraction_trick_nidx = nidx_left;
    }

-    for (auto& shard : shards) {
+    for (auto& shard : shards_) {
      shard->BuildHist(build_hist_nidx);
    }

    this->AllReduceHist(build_hist_nidx);

-    for (auto& shard : shards) {
+    for (auto& shard : shards_) {
      shard->SubtractionTrick(nidx_parent, build_hist_nidx,
                              subtraction_trick_nidx);
    }
@ -692,12 +691,12 @@ class GPUHistMaker : public TreeUpdater {
  // Returns best loss
  std::vector<DeviceSplitCandidate> EvaluateSplits(
      const std::vector<int>& nidx_set, RegTree* p_tree) {
-    auto columns = info->num_col;
+    auto columns = info_->num_col_;
    std::vector<DeviceSplitCandidate> best_splits(nidx_set.size());
    std::vector<DeviceSplitCandidate> candidate_splits(nidx_set.size() *
                                                       columns);
    // Use first device
-    auto& shard = shards.front();
+    auto& shard = shards_.front();
    dh::safe_cuda(cudaSetDevice(shard->device_idx));
    shard->temp_memory.LazyAllocate(sizeof(DeviceSplitCandidate) * columns *
                                    nidx_set.size());
@ -708,16 +707,16 @@ class GPUHistMaker : public TreeUpdater {
    // Use streams to process nodes concurrently
    for (auto i = 0; i < nidx_set.size(); i++) {
      auto nidx = nidx_set[i];
-      DeviceNodeStats node(shard->node_sum_gradients[nidx], nidx, param);
+      DeviceNodeStats node(shard->node_sum_gradients[nidx], nidx, param_);

      const int BLOCK_THREADS = 256;
      evaluate_split_kernel<BLOCK_THREADS>
          <<<uint32_t(columns), BLOCK_THREADS, 0, streams[i]>>>(
-              shard->hist.GetHistPtr(nidx), nidx, info->num_col, node,
-              shard->feature_segments.data(), shard->min_fvalue.data(),
-              shard->gidx_fvalue_map.data(), GPUTrainingParam(param),
+              shard->hist.GetHistPtr(nidx), nidx, info_->num_col_, node,
+              shard->feature_segments.Data(), shard->min_fvalue.Data(),
+              shard->gidx_fvalue_map.Data(), GPUTrainingParam(param_),
              d_split + i * columns, node_value_constraints_[nidx],
-              shard->monotone_constraints.data());
+              shard->monotone_constraints.Data());
    }

    dh::safe_cuda(
@ -730,9 +729,9 @@ class GPUHistMaker : public TreeUpdater {
      DeviceSplitCandidate nidx_best;
      for (auto fidx = 0; fidx < columns; fidx++) {
        auto& candidate = candidate_splits[i * columns + fidx];
-        if (column_sampler.ColumnUsed(candidate.findex,
+        if (column_sampler_.ColumnUsed(candidate.findex,
                                      p_tree->GetDepth(nidx))) {
-          nidx_best.Update(candidate_splits[i * columns + fidx], param);
+          nidx_best.Update(candidate_splits[i * columns + fidx], param_);
        }
      }
      best_splits[i] = nidx_best;
@ -743,34 +742,34 @@ class GPUHistMaker : public TreeUpdater {
  void InitRoot(RegTree* p_tree) {
    auto root_nidx = 0;
    // Sum gradients
-    std::vector<bst_gpair> tmp_sums(shards.size());
-    omp_set_num_threads(shards.size());
+    std::vector<GradientPair> tmp_sums(shards_.size());
+    omp_set_num_threads(shards_.size());
 #pragma omp parallel
    {
      auto cpu_thread_id = omp_get_thread_num();
-      auto& shard = shards[cpu_thread_id];
+      auto& shard = shards_[cpu_thread_id];
      dh::safe_cuda(cudaSetDevice(shard->device_idx));
-      tmp_sums[cpu_thread_id] = dh::sumReduction(
-          shard->temp_memory, shard->gpair.data(), shard->gpair.size());
+      tmp_sums[cpu_thread_id] = dh::SumReduction(
+          shard->temp_memory, shard->gpair.Data(), shard->gpair.Size());
    }
    auto sum_gradient =
-        std::accumulate(tmp_sums.begin(), tmp_sums.end(), bst_gpair_precise());
+        std::accumulate(tmp_sums.begin(), tmp_sums.end(), GradientPair());

    // Generate root histogram
-    for (auto& shard : shards) {
+    for (auto& shard : shards_) {
      shard->BuildHist(root_nidx);
    }

    this->AllReduceHist(root_nidx);

    // Remember root stats
-    p_tree->stat(root_nidx).sum_hess = sum_gradient.GetHess();
-    auto weight = CalcWeight(param, sum_gradient);
-    p_tree->stat(root_nidx).base_weight = weight;
-    (*p_tree)[root_nidx].set_leaf(param.learning_rate * weight);
+    p_tree->Stat(root_nidx).sum_hess = sum_gradient.GetHess();
+    auto weight = CalcWeight(param_, sum_gradient);
+    p_tree->Stat(root_nidx).base_weight = weight;
+    (*p_tree)[root_nidx].SetLeaf(param_.learning_rate * weight);

    // Store sum gradients
-    for (auto& shard : shards) {
+    for (auto& shard : shards_) {
      shard->node_sum_gradients[root_nidx] = sum_gradient;
    }

@ -785,14 +784,14 @@ class GPUHistMaker : public TreeUpdater {

  void UpdatePosition(const ExpandEntry& candidate, RegTree* p_tree) {
    auto nidx = candidate.nid;
-    auto left_nidx = (*p_tree)[nidx].cleft();
-    auto right_nidx = (*p_tree)[nidx].cright();
+    auto left_nidx = (*p_tree)[nidx].LeftChild();
+    auto right_nidx = (*p_tree)[nidx].RightChild();

    // convert floating-point split_pt into corresponding bin_id
    // split_cond = -1 indicates that split_pt is less than all known cut points
    auto split_gidx = -1;
    auto fidx = candidate.split.findex;
-    auto default_dir_left = candidate.split.dir == LeftDir;
+    auto default_dir_left = candidate.split.dir == kLeftDir;
    auto fidx_begin = hmat_.row_ptr[fidx];
    auto fidx_end = hmat_.row_ptr[fidx + 1];
    for (auto i = fidx_begin; i < fidx_end; ++i) {
@ -801,13 +800,13 @@ class GPUHistMaker : public TreeUpdater {
      }
    }

-    auto is_dense = info->num_nonzero == info->num_row * info->num_col;
+    auto is_dense = info_->num_nonzero_ == info_->num_row_ * info_->num_col_;

-    omp_set_num_threads(shards.size());
+    omp_set_num_threads(shards_.size());
 #pragma omp parallel
    {
      auto cpu_thread_id = omp_get_thread_num();
-      shards[cpu_thread_id]->UpdatePosition(nidx, left_nidx, right_nidx, fidx,
+      shards_[cpu_thread_id]->UpdatePosition(nidx, left_nidx, right_nidx, fidx,
                                            split_gidx, default_dir_left,
                                            is_dense, fidx_begin, fidx_end);
    }
@ -818,55 +817,55 @@ class GPUHistMaker : public TreeUpdater {
    RegTree& tree = *p_tree;
    tree.AddChilds(candidate.nid);
    auto& parent = tree[candidate.nid];
-    parent.set_split(candidate.split.findex, candidate.split.fvalue,
-                     candidate.split.dir == LeftDir);
-    tree.stat(candidate.nid).loss_chg = candidate.split.loss_chg;
+    parent.SetSplit(candidate.split.findex, candidate.split.fvalue,
+                     candidate.split.dir == kLeftDir);
+    tree.Stat(candidate.nid).loss_chg = candidate.split.loss_chg;

    // Set up child constraints
    node_value_constraints_.resize(tree.GetNodes().size());
-    GradStats left_stats(param);
+    GradStats left_stats(param_);
    left_stats.Add(candidate.split.left_sum);
-    GradStats right_stats(param);
+    GradStats right_stats(param_);
    right_stats.Add(candidate.split.right_sum);
    node_value_constraints_[candidate.nid].SetChild(
-        param, parent.split_index(), left_stats, right_stats,
-        &node_value_constraints_[parent.cleft()],
-        &node_value_constraints_[parent.cright()]);
+        param_, parent.SplitIndex(), left_stats, right_stats,
+        &node_value_constraints_[parent.LeftChild()],
+        &node_value_constraints_[parent.RightChild()]);

    // Configure left child
    auto left_weight =
-        node_value_constraints_[parent.cleft()].CalcWeight(param, left_stats);
-    tree[parent.cleft()].set_leaf(left_weight * param.learning_rate, 0);
-    tree.stat(parent.cleft()).base_weight = left_weight;
-    tree.stat(parent.cleft()).sum_hess = candidate.split.left_sum.GetHess();
+        node_value_constraints_[parent.LeftChild()].CalcWeight(param_, left_stats);
+    tree[parent.LeftChild()].SetLeaf(left_weight * param_.learning_rate, 0);
+    tree.Stat(parent.LeftChild()).base_weight = left_weight;
+    tree.Stat(parent.LeftChild()).sum_hess = candidate.split.left_sum.GetHess();

    // Configure right child
    auto right_weight =
-        node_value_constraints_[parent.cright()].CalcWeight(param, right_stats);
-    tree[parent.cright()].set_leaf(right_weight * param.learning_rate, 0);
-    tree.stat(parent.cright()).base_weight = right_weight;
-    tree.stat(parent.cright()).sum_hess = candidate.split.right_sum.GetHess();
+        node_value_constraints_[parent.RightChild()].CalcWeight(param_, right_stats);
+    tree[parent.RightChild()].SetLeaf(right_weight * param_.learning_rate, 0);
+    tree.Stat(parent.RightChild()).base_weight = right_weight;
+    tree.Stat(parent.RightChild()).sum_hess = candidate.split.right_sum.GetHess();
    // Store sum gradients
-    for (auto& shard : shards) {
-      shard->node_sum_gradients[parent.cleft()] = candidate.split.left_sum;
-      shard->node_sum_gradients[parent.cright()] = candidate.split.right_sum;
+    for (auto& shard : shards_) {
+      shard->node_sum_gradients[parent.LeftChild()] = candidate.split.left_sum;
+      shard->node_sum_gradients[parent.RightChild()] = candidate.split.right_sum;
    }
    this->UpdatePosition(candidate, p_tree);
  }

-  void UpdateTree(HostDeviceVector<bst_gpair>* gpair, DMatrix* p_fmat,
+  void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat,
                  RegTree* p_tree) {
    // Temporarily store number of threads so we can change it back later
    int nthread = omp_get_max_threads();

    auto& tree = *p_tree;

-    monitor.Start("InitData", dList);
+    monitor_.Start("InitData", device_list_);
    this->InitData(gpair, p_fmat, *p_tree);
-    monitor.Stop("InitData", dList);
-    monitor.Start("InitRoot", dList);
+    monitor_.Stop("InitData", device_list_);
+    monitor_.Start("InitRoot", device_list_);
    this->InitRoot(p_tree);
-    monitor.Stop("InitRoot", dList);
+    monitor_.Stop("InitRoot", device_list_);

    auto timestamp = qexpand_->size();
    auto num_leaves = 1;
@ -874,25 +873,25 @@ class GPUHistMaker : public TreeUpdater {
    while (!qexpand_->empty()) {
      auto candidate = qexpand_->top();
      qexpand_->pop();
-      if (!candidate.IsValid(param, num_leaves)) continue;
+      if (!candidate.IsValid(param_, num_leaves)) continue;
      // std::cout << candidate;
-      monitor.Start("ApplySplit", dList);
+      monitor_.Start("ApplySplit", device_list_);
      this->ApplySplit(candidate, p_tree);
-      monitor.Stop("ApplySplit", dList);
+      monitor_.Stop("ApplySplit", device_list_);
      num_leaves++;

-      auto left_child_nidx = tree[candidate.nid].cleft();
-      auto right_child_nidx = tree[candidate.nid].cright();
+      auto left_child_nidx = tree[candidate.nid].LeftChild();
+      auto right_child_nidx = tree[candidate.nid].RightChild();

      // Only create child entries if needed
-      if (ExpandEntry::ChildIsValid(param, tree.GetDepth(left_child_nidx),
+      if (ExpandEntry::ChildIsValid(param_, tree.GetDepth(left_child_nidx),
                                    num_leaves)) {
-        monitor.Start("BuildHist", dList);
+        monitor_.Start("BuildHist", device_list_);
        this->BuildHistLeftRight(candidate.nid, left_child_nidx,
                                 right_child_nidx);
-        monitor.Stop("BuildHist", dList);
+        monitor_.Stop("BuildHist", device_list_);

-        monitor.Start("EvaluateSplits", dList);
+        monitor_.Start("EvaluateSplits", device_list_);
        auto splits =
            this->EvaluateSplits({left_child_nidx, right_child_nidx}, p_tree);
        qexpand_->push(ExpandEntry(left_child_nidx,
@ -901,7 +900,7 @@ class GPUHistMaker : public TreeUpdater {
        qexpand_->push(ExpandEntry(right_child_nidx,
                                   tree.GetDepth(right_child_nidx), splits[1],
                                   timestamp++));
-        monitor.Stop("EvaluateSplits", dList);
+        monitor_.Stop("EvaluateSplits", device_list_);
      }
    }
    // Reset omp num threads
@ -910,17 +909,17 @@ class GPUHistMaker : public TreeUpdater {

  bool UpdatePredictionCache(
      const DMatrix* data, HostDeviceVector<bst_float>* p_out_preds) override {
-    monitor.Start("UpdatePredictionCache", dList);
-    if (shards.empty() || p_last_fmat_ == nullptr || p_last_fmat_ != data)
+    monitor_.Start("UpdatePredictionCache", device_list_);
+    if (shards_.empty() || p_last_fmat_ == nullptr || p_last_fmat_ != data)
      return false;

-    bst_float* out_preds_d = p_out_preds->ptr_d(param.gpu_id);
+    bst_float* out_preds_d = p_out_preds->DevicePointer(param_.gpu_id);

 #pragma omp parallel for schedule(static, 1)
-    for (int shard = 0; shard < shards.size(); ++shard) {
-      shards[shard]->UpdatePredictionCache(out_preds_d);
+    for (int shard = 0; shard < shards_.size(); ++shard) {
+      shards_[shard]->UpdatePredictionCache(out_preds_d);
    }
-    monitor.Stop("UpdatePredictionCache", dList);
+    monitor_.Stop("UpdatePredictionCache", device_list_);
    return true;
  }

@ -933,7 +932,7 @@ class GPUHistMaker : public TreeUpdater {
                uint64_t timestamp)
        : nid(nid), depth(depth), split(split), timestamp(timestamp) {}
    bool IsValid(const TrainParam& param, int num_leaves) const {
-      if (split.loss_chg <= rt_eps) return false;
+      if (split.loss_chg <= kRtEps) return false;
      if (split.left_sum.GetHess() == 0 || split.right_sum.GetHess() == 0)
        return false;
      if (param.max_depth > 0 && depth == param.max_depth) return false;
@ -959,38 +958,38 @@ class GPUHistMaker : public TreeUpdater {
    }
  };

-  inline static bool depth_wise(ExpandEntry lhs, ExpandEntry rhs) {
+  inline static bool DepthWise(ExpandEntry lhs, ExpandEntry rhs) {
    if (lhs.depth == rhs.depth) {
      return lhs.timestamp > rhs.timestamp;  // favor small timestamp
    } else {
      return lhs.depth > rhs.depth;  // favor small depth
    }
  }
-  inline static bool loss_guide(ExpandEntry lhs, ExpandEntry rhs) {
+  inline static bool LossGuide(ExpandEntry lhs, ExpandEntry rhs) {
    if (lhs.split.loss_chg == rhs.split.loss_chg) {
      return lhs.timestamp > rhs.timestamp;  // favor small timestamp
    } else {
      return lhs.split.loss_chg < rhs.split.loss_chg;  // favor large loss_chg
    }
  }
-  TrainParam param;
+  TrainParam param_;
  common::HistCutMatrix hmat_;
  common::GHistIndexMatrix gmat_;
-  MetaInfo* info;
-  bool initialised;
-  int n_devices;
-  int n_bins;
+  MetaInfo* info_;
+  bool initialised_;
+  int n_devices_;
+  int n_bins_;

-  std::vector<std::unique_ptr<DeviceShard>> shards;
-  ColumnSampler column_sampler;
+  std::vector<std::unique_ptr<DeviceShard>> shards_;
+  ColumnSampler column_sampler_;
  typedef std::priority_queue<ExpandEntry, std::vector<ExpandEntry>,
                              std::function<bool(ExpandEntry, ExpandEntry)>>
      ExpandQueue;
  std::unique_ptr<ExpandQueue> qexpand_;
-  common::Monitor monitor;
-  dh::AllReducer reducer;
+  common::Monitor monitor_;
+  dh::AllReducer reducer_;
  std::vector<ValueConstraint> node_value_constraints_;
-  std::vector<int> dList;
+  std::vector<int> device_list_;

  DMatrix* p_last_fmat_;
 };
--- a/src/tree/updater_histmaker.cc
+++ b/src/tree/updater_histmaker.cc
@ -21,18 +21,18 @@ DMLC_REGISTRY_FILE_TAG(updater_histmaker);
 template<typename TStats>
 class HistMaker: public BaseMaker {
 public:
-  void Update(HostDeviceVector<bst_gpair> *gpair,
+  void Update(HostDeviceVector<GradientPair> *gpair,
              DMatrix *p_fmat,
              const std::vector<RegTree*> &trees) override {
-    TStats::CheckInfo(p_fmat->info());
+    TStats::CheckInfo(p_fmat->Info());
    // rescale learning rate according to size of trees
-    float lr = param.learning_rate;
-    param.learning_rate = lr / trees.size();
+    float lr = param_.learning_rate;
+    param_.learning_rate = lr / trees.size();
    // build tree
-    for (size_t i = 0; i < trees.size(); ++i) {
-      this->Update(gpair->data_h(), p_fmat, trees[i]);
+    for (auto tree : trees) {
+      this->Update(gpair->HostVector(), p_fmat, tree);
    }
-    param.learning_rate = lr;
+    param_.learning_rate = lr;
  }

 protected:
@ -45,13 +45,13 @@ class HistMaker: public BaseMaker {
    /*! \brief size of histogram */
    unsigned size;
    // default constructor
-    HistUnit() {}
+    HistUnit() = default;
    // constructor
    HistUnit(const bst_float *cut, TStats *data, unsigned size)
        : cut(cut), data(data), size(size) {}
    /*! \brief add a histogram to data */
    inline void Add(bst_float fv,
-                    const std::vector<bst_gpair> &gpair,
+                    const std::vector<GradientPair> &gpair,
                    const MetaInfo &info,
                    const bst_uint ridx) {
      unsigned i = std::upper_bound(cut, cut + size, fv) - cut;
@ -116,44 +116,44 @@ class HistMaker: public BaseMaker {
    }
  };
  // workspace of thread
-  ThreadWSpace wspace;
+  ThreadWSpace wspace_;
  // reducer for histogram
-  rabit::Reducer<TStats, TStats::Reduce> histred;
+  rabit::Reducer<TStats, TStats::Reduce> histred_;
  // set of working features
-  std::vector<bst_uint> fwork_set;
+  std::vector<bst_uint> fwork_set_;
  // update function implementation
-  virtual void Update(const std::vector<bst_gpair> &gpair,
+  virtual void Update(const std::vector<GradientPair> &gpair,
                      DMatrix *p_fmat,
                      RegTree *p_tree) {
    this->InitData(gpair, *p_fmat, *p_tree);
-    this->InitWorkSet(p_fmat, *p_tree, &fwork_set);
+    this->InitWorkSet(p_fmat, *p_tree, &fwork_set_);
    // mark root node as fresh.
    for (int i = 0; i < p_tree->param.num_roots; ++i) {
-      (*p_tree)[i].set_leaf(0.0f, 0);
+      (*p_tree)[i].SetLeaf(0.0f, 0);
    }

-    for (int depth = 0; depth < param.max_depth; ++depth) {
+    for (int depth = 0; depth < param_.max_depth; ++depth) {
      // reset and propose candidate split
-      this->ResetPosAndPropose(gpair, p_fmat, fwork_set, *p_tree);
+      this->ResetPosAndPropose(gpair, p_fmat, fwork_set_, *p_tree);
      // create histogram
-      this->CreateHist(gpair, p_fmat, fwork_set, *p_tree);
+      this->CreateHist(gpair, p_fmat, fwork_set_, *p_tree);
      // find split based on histogram statistics
-      this->FindSplit(depth, gpair, p_fmat, fwork_set, p_tree);
+      this->FindSplit(depth, gpair, p_fmat, fwork_set_, p_tree);
      // reset position after split
      this->ResetPositionAfterSplit(p_fmat, *p_tree);
      this->UpdateQueueExpand(*p_tree);
      // if nothing left to be expand, break
-      if (qexpand.size() == 0) break;
+      if (qexpand_.size() == 0) break;
    }
-    for (size_t i = 0; i < qexpand.size(); ++i) {
-      const int nid = qexpand[i];
-      (*p_tree)[nid].set_leaf(p_tree->stat(nid).base_weight * param.learning_rate);
+    for (size_t i = 0; i < qexpand_.size(); ++i) {
+      const int nid = qexpand_[i];
+      (*p_tree)[nid].SetLeaf(p_tree->Stat(nid).base_weight * param_.learning_rate);
    }
  }
  // this function does two jobs
  // (1) reset the position in array position, to be the latest leaf id
  // (2) propose a set of candidate cuts and set wspace.rptr wspace.cut correctly
-  virtual void ResetPosAndPropose(const std::vector<bst_gpair> &gpair,
+  virtual void ResetPosAndPropose(const std::vector<GradientPair> &gpair,
                                  DMatrix *p_fmat,
                                  const std::vector <bst_uint> &fset,
                                  const RegTree &tree) = 0;
@ -170,7 +170,7 @@ class HistMaker: public BaseMaker {
  virtual void ResetPositionAfterSplit(DMatrix *p_fmat,
                                       const RegTree &tree) {
  }
-  virtual void CreateHist(const std::vector<bst_gpair> &gpair,
+  virtual void CreateHist(const std::vector<GradientPair> &gpair,
                          DMatrix *p_fmat,
                          const std::vector <bst_uint> &fset,
                          const RegTree &tree)  = 0;
@ -183,14 +183,14 @@ class HistMaker: public BaseMaker {
                             TStats *left_sum) {
    if (hist.size == 0) return;

-    double root_gain = node_sum.CalcGain(param);
-    TStats s(param), c(param);
+    double root_gain = node_sum.CalcGain(param_);
+    TStats s(param_), c(param_);
    for (bst_uint i = 0; i < hist.size; ++i) {
      s.Add(hist.data[i]);
-      if (s.sum_hess >= param.min_child_weight) {
+      if (s.sum_hess >= param_.min_child_weight) {
        c.SetSubstract(node_sum, s);
-        if (c.sum_hess >= param.min_child_weight) {
-          double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
+        if (c.sum_hess >= param_.min_child_weight) {
+          double loss_chg = s.CalcGain(param_) + c.CalcGain(param_) - root_gain;
          if (best->Update(static_cast<bst_float>(loss_chg), fid, hist.cut[i], false)) {
            *left_sum = s;
          }
@ -200,10 +200,10 @@ class HistMaker: public BaseMaker {
    s.Clear();
    for (bst_uint i = hist.size - 1; i != 0; --i) {
      s.Add(hist.data[i]);
-      if (s.sum_hess >= param.min_child_weight) {
+      if (s.sum_hess >= param_.min_child_weight) {
        c.SetSubstract(node_sum, s);
-        if (c.sum_hess >= param.min_child_weight) {
-          double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
+        if (c.sum_hess >= param_.min_child_weight) {
+          double loss_chg = s.CalcGain(param_) + c.CalcGain(param_) - root_gain;
          if (best->Update(static_cast<bst_float>(loss_chg), fid, hist.cut[i-1], true)) {
            *left_sum = c;
          }
@ -212,65 +212,64 @@ class HistMaker: public BaseMaker {
    }
  }
  inline void FindSplit(int depth,
-                        const std::vector<bst_gpair> &gpair,
+                        const std::vector<GradientPair> &gpair,
                        DMatrix *p_fmat,
                        const std::vector <bst_uint> &fset,
                        RegTree *p_tree) {
    const size_t num_feature = fset.size();
    // get the best split condition for each node
-    std::vector<SplitEntry> sol(qexpand.size());
-    std::vector<TStats> left_sum(qexpand.size());
-    bst_omp_uint nexpand = static_cast<bst_omp_uint>(qexpand.size());
+    std::vector<SplitEntry> sol(qexpand_.size());
+    std::vector<TStats> left_sum(qexpand_.size());
+    auto nexpand = static_cast<bst_omp_uint>(qexpand_.size());
    #pragma omp parallel for schedule(dynamic, 1)
    for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
-      const int nid = qexpand[wid];
-      CHECK_EQ(node2workindex[nid], static_cast<int>(wid));
+      const int nid = qexpand_[wid];
+      CHECK_EQ(node2workindex_[nid], static_cast<int>(wid));
      SplitEntry &best = sol[wid];
-      TStats &node_sum = wspace.hset[0][num_feature + wid * (num_feature + 1)].data[0];
+      TStats &node_sum = wspace_.hset[0][num_feature + wid * (num_feature + 1)].data[0];
      for (size_t i = 0; i < fset.size(); ++i) {
-        EnumerateSplit(this->wspace.hset[0][i + wid * (num_feature+1)],
+        EnumerateSplit(this->wspace_.hset[0][i + wid * (num_feature+1)],
                       node_sum, fset[i], &best, &left_sum[wid]);
      }
    }
    // get the best result, we can synchronize the solution
    for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
-      const int nid = qexpand[wid];
+      const int nid = qexpand_[wid];
      const SplitEntry &best = sol[wid];
-      const TStats &node_sum = wspace.hset[0][num_feature + wid * (num_feature + 1)].data[0];
+      const TStats &node_sum = wspace_.hset[0][num_feature + wid * (num_feature + 1)].data[0];
      this->SetStats(p_tree, nid, node_sum);
      // set up the values
-      p_tree->stat(nid).loss_chg = best.loss_chg;
+      p_tree->Stat(nid).loss_chg = best.loss_chg;
      // now we know the solution in snode[nid], set split
-      if (best.loss_chg > rt_eps) {
+      if (best.loss_chg > kRtEps) {
        p_tree->AddChilds(nid);
-        (*p_tree)[nid].set_split(best.split_index(),
-                                 best.split_value, best.default_left());
+        (*p_tree)[nid].SetSplit(best.SplitIndex(),
+                                 best.split_value, best.DefaultLeft());
        // mark right child as 0, to indicate fresh leaf
-        (*p_tree)[(*p_tree)[nid].cleft()].set_leaf(0.0f, 0);
-        (*p_tree)[(*p_tree)[nid].cright()].set_leaf(0.0f, 0);
+        (*p_tree)[(*p_tree)[nid].LeftChild()].SetLeaf(0.0f, 0);
+        (*p_tree)[(*p_tree)[nid].RightChild()].SetLeaf(0.0f, 0);
        // right side sum
        TStats right_sum;
        right_sum.SetSubstract(node_sum, left_sum[wid]);
-        this->SetStats(p_tree, (*p_tree)[nid].cleft(), left_sum[wid]);
-        this->SetStats(p_tree, (*p_tree)[nid].cright(), right_sum);
+        this->SetStats(p_tree, (*p_tree)[nid].LeftChild(), left_sum[wid]);
+        this->SetStats(p_tree, (*p_tree)[nid].RightChild(), right_sum);
      } else {
-        (*p_tree)[nid].set_leaf(p_tree->stat(nid).base_weight * param.learning_rate);
+        (*p_tree)[nid].SetLeaf(p_tree->Stat(nid).base_weight * param_.learning_rate);
      }
    }
  }

  inline void SetStats(RegTree *p_tree, int nid, const TStats &node_sum) {
-    p_tree->stat(nid).base_weight = static_cast<bst_float>(node_sum.CalcWeight(param));
-    p_tree->stat(nid).sum_hess = static_cast<bst_float>(node_sum.sum_hess);
-    node_sum.SetLeafVec(param, p_tree->leafvec(nid));
+    p_tree->Stat(nid).base_weight = static_cast<bst_float>(node_sum.CalcWeight(param_));
+    p_tree->Stat(nid).sum_hess = static_cast<bst_float>(node_sum.sum_hess);
+    node_sum.SetLeafVec(param_, p_tree->Leafvec(nid));
  }
 };

 template<typename TStats>
 class CQHistMaker: public HistMaker<TStats> {
 public:
-  CQHistMaker() : cache_dmatrix_(nullptr) {
-  }
+  CQHistMaker()  = default;

 protected:
  struct HistEntry {
@ -281,7 +280,7 @@ class CQHistMaker: public HistMaker<TStats> {
     * do linear scan, start from istart
     */
    inline void Add(bst_float fv,
-                    const std::vector<bst_gpair> &gpair,
+                    const std::vector<GradientPair> &gpair,
                    const MetaInfo &info,
                    const bst_uint ridx) {
      while (istart < hist.size && !(fv < hist.cut[istart])) ++istart;
@ -293,7 +292,7 @@ class CQHistMaker: public HistMaker<TStats> {
     * do linear scan, start from istart
     */
    inline void Add(bst_float fv,
-                    bst_gpair gstats) {
+                    GradientPair gstats) {
      if (fv < hist.cut[istart]) {
        hist.data[istart].Add(gstats);
      } else {
@ -311,190 +310,190 @@ class CQHistMaker: public HistMaker<TStats> {
    }
  };
  // sketch type used for this
-  typedef common::WXQuantileSketch<bst_float, bst_float> WXQSketch;
+  using WXQSketch = common::WXQuantileSketch<bst_float, bst_float>;
  // initialize the work set of tree
  void InitWorkSet(DMatrix *p_fmat,
                   const RegTree &tree,
                   std::vector<bst_uint> *p_fset) override {
    if (p_fmat != cache_dmatrix_) {
-      feat_helper.InitByCol(p_fmat, tree);
+      feat_helper_.InitByCol(p_fmat, tree);
      cache_dmatrix_ = p_fmat;
    }
-    feat_helper.SyncInfo();
-    feat_helper.SampleCol(this->param.colsample_bytree, p_fset);
+    feat_helper_.SyncInfo();
+    feat_helper_.SampleCol(this->param_.colsample_bytree, p_fset);
  }
  // code to create histogram
-  void CreateHist(const std::vector<bst_gpair> &gpair,
+  void CreateHist(const std::vector<GradientPair> &gpair,
                  DMatrix *p_fmat,
                  const std::vector<bst_uint> &fset,
                  const RegTree &tree) override {
-    const MetaInfo &info = p_fmat->info();
+    const MetaInfo &info = p_fmat->Info();
    // fill in reverse map
-    feat2workindex.resize(tree.param.num_feature);
-    std::fill(feat2workindex.begin(), feat2workindex.end(), -1);
+    feat2workindex_.resize(tree.param.num_feature);
+    std::fill(feat2workindex_.begin(), feat2workindex_.end(), -1);
    for (size_t i = 0; i < fset.size(); ++i) {
-      feat2workindex[fset[i]] = static_cast<int>(i);
+      feat2workindex_[fset[i]] = static_cast<int>(i);
    }
    // start to work
-    this->wspace.Init(this->param, 1);
+    this->wspace_.Init(this->param_, 1);
    // if it is C++11, use lazy evaluation for Allreduce,
    // to gain speedup in recovery
 #if __cplusplus >= 201103L
    auto lazy_get_hist = [&]()
 #endif
    {
-      thread_hist.resize(omp_get_max_threads());
+      thread_hist_.resize(omp_get_max_threads());
      // start accumulating statistics
      dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(fset);
      iter->BeforeFirst();
      while (iter->Next()) {
        const ColBatch &batch = iter->Value();
        // start enumeration
-        const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
+        const auto nsize = static_cast<bst_omp_uint>(batch.size);
        #pragma omp parallel for schedule(dynamic, 1)
        for (bst_omp_uint i = 0; i < nsize; ++i) {
-          int offset = feat2workindex[batch.col_index[i]];
+          int offset = feat2workindex_[batch.col_index[i]];
          if (offset >= 0) {
            this->UpdateHistCol(gpair, batch[i], info, tree,
                                fset, offset,
-                                &thread_hist[omp_get_thread_num()]);
+                                &thread_hist_[omp_get_thread_num()]);
          }
        }
      }
      // update node statistics.
      this->GetNodeStats(gpair, *p_fmat, tree,
-                         &thread_stats, &node_stats);
-      for (size_t i = 0; i < this->qexpand.size(); ++i) {
-        const int nid = this->qexpand[i];
-        const int wid = this->node2workindex[nid];
-        this->wspace.hset[0][fset.size() + wid * (fset.size()+1)]
-            .data[0] = node_stats[nid];
+                         &thread_stats_, &node_stats_);
+      for (size_t i = 0; i < this->qexpand_.size(); ++i) {
+        const int nid = this->qexpand_[i];
+        const int wid = this->node2workindex_[nid];
+        this->wspace_.hset[0][fset.size() + wid * (fset.size()+1)]
+            .data[0] = node_stats_[nid];
      }
    };
    // sync the histogram
    // if it is C++11, use lazy evaluation for Allreduce
 #if __cplusplus >= 201103L
-    this->histred.Allreduce(dmlc::BeginPtr(this->wspace.hset[0].data),
-                            this->wspace.hset[0].data.size(), lazy_get_hist);
+    this->histred_.Allreduce(dmlc::BeginPtr(this->wspace_.hset[0].data),
+                            this->wspace_.hset[0].data.size(), lazy_get_hist);
 #else
-    this->histred.Allreduce(dmlc::BeginPtr(this->wspace.hset[0].data),
-                            this->wspace.hset[0].data.size());
+    this->histred_.Allreduce(dmlc::BeginPtr(this->wspace_.hset[0].data),
+                            this->wspace_.hset[0].data.size());
 #endif
  }
  void ResetPositionAfterSplit(DMatrix *p_fmat,
                               const RegTree &tree) override {
-    this->GetSplitSet(this->qexpand, tree, &fsplit_set);
+    this->GetSplitSet(this->qexpand_, tree, &fsplit_set_);
  }
-  void ResetPosAndPropose(const std::vector<bst_gpair> &gpair,
+  void ResetPosAndPropose(const std::vector<GradientPair> &gpair,
                          DMatrix *p_fmat,
                          const std::vector<bst_uint> &fset,
                          const RegTree &tree) override {
-    const MetaInfo &info = p_fmat->info();
+    const MetaInfo &info = p_fmat->Info();
    // fill in reverse map
-    feat2workindex.resize(tree.param.num_feature);
-    std::fill(feat2workindex.begin(), feat2workindex.end(), -1);
-    work_set.clear();
-    for (size_t i = 0; i < fset.size(); ++i) {
-      if (feat_helper.Type(fset[i]) == 2) {
-        feat2workindex[fset[i]] = static_cast<int>(work_set.size());
-        work_set.push_back(fset[i]);
+    feat2workindex_.resize(tree.param.num_feature);
+    std::fill(feat2workindex_.begin(), feat2workindex_.end(), -1);
+    work_set_.clear();
+    for (auto fidx : fset) {
+      if (feat_helper_.Type(fidx) == 2) {
+        feat2workindex_[fidx] = static_cast<int>(work_set_.size());
+        work_set_.push_back(fidx);
      } else {
-        feat2workindex[fset[i]] = -2;
+        feat2workindex_[fidx] = -2;
      }
    }
-    const size_t work_set_size = work_set.size();
+    const size_t work_set_size = work_set_.size();

-    sketchs.resize(this->qexpand.size() * work_set_size);
-    for (size_t i = 0; i < sketchs.size(); ++i) {
-      sketchs[i].Init(info.num_row, this->param.sketch_eps);
+    sketchs_.resize(this->qexpand_.size() * work_set_size);
+    for (size_t i = 0; i < sketchs_.size(); ++i) {
+      sketchs_[i].Init(info.num_row_, this->param_.sketch_eps);
    }
    // intitialize the summary array
-    summary_array.resize(sketchs.size());
+    summary_array_.resize(sketchs_.size());
    // setup maximum size
-    unsigned max_size = this->param.max_sketch_size();
-    for (size_t i = 0; i < sketchs.size(); ++i) {
-      summary_array[i].Reserve(max_size);
+    unsigned max_size = this->param_.MaxSketchSize();
+    for (size_t i = 0; i < sketchs_.size(); ++i) {
+      summary_array_[i].Reserve(max_size);
    }
    {
      // get smmary
-      thread_sketch.resize(omp_get_max_threads());
+      thread_sketch_.resize(omp_get_max_threads());

      // TWOPASS: use the real set + split set in the column iteration.
      this->SetDefaultPostion(p_fmat, tree);
-      work_set.insert(work_set.end(), fsplit_set.begin(), fsplit_set.end());
-      std::sort(work_set.begin(), work_set.end());
-      work_set.resize(std::unique(work_set.begin(), work_set.end()) - work_set.begin());
+      work_set_.insert(work_set_.end(), fsplit_set_.begin(), fsplit_set_.end());
+      std::sort(work_set_.begin(), work_set_.end());
+      work_set_.resize(std::unique(work_set_.begin(), work_set_.end()) - work_set_.begin());

      // start accumulating statistics
-      dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(work_set);
+      dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(work_set_);
      iter->BeforeFirst();
      while (iter->Next()) {
        const ColBatch &batch = iter->Value();
        // TWOPASS: use the real set + split set in the column iteration.
-        this->CorrectNonDefaultPositionByBatch(batch, fsplit_set, tree);
+        this->CorrectNonDefaultPositionByBatch(batch, fsplit_set_, tree);

        // start enumeration
-        const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
+        const auto nsize = static_cast<bst_omp_uint>(batch.size);
        #pragma omp parallel for schedule(dynamic, 1)
        for (bst_omp_uint i = 0; i < nsize; ++i) {
-          int offset = feat2workindex[batch.col_index[i]];
+          int offset = feat2workindex_[batch.col_index[i]];
          if (offset >= 0) {
            this->UpdateSketchCol(gpair, batch[i], tree,
                                  work_set_size, offset,
-                                  &thread_sketch[omp_get_thread_num()]);
+                                  &thread_sketch_[omp_get_thread_num()]);
          }
        }
      }
-      for (size_t i = 0; i < sketchs.size(); ++i) {
+      for (size_t i = 0; i < sketchs_.size(); ++i) {
        common::WXQuantileSketch<bst_float, bst_float>::SummaryContainer out;
-        sketchs[i].GetSummary(&out);
-        summary_array[i].SetPrune(out, max_size);
+        sketchs_[i].GetSummary(&out);
+        summary_array_[i].SetPrune(out, max_size);
      }
-      CHECK_EQ(summary_array.size(), sketchs.size());
+      CHECK_EQ(summary_array_.size(), sketchs_.size());
    }
-    if (summary_array.size() != 0) {
+    if (summary_array_.size() != 0) {
      size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size);
-      sreducer.Allreduce(dmlc::BeginPtr(summary_array), nbytes, summary_array.size());
+      sreducer_.Allreduce(dmlc::BeginPtr(summary_array_), nbytes, summary_array_.size());
    }
    // now we get the final result of sketch, setup the cut
-    this->wspace.cut.clear();
-    this->wspace.rptr.clear();
-    this->wspace.rptr.push_back(0);
-    for (size_t wid = 0; wid < this->qexpand.size(); ++wid) {
-      for (size_t i = 0; i < fset.size(); ++i) {
-        int offset = feat2workindex[fset[i]];
+    this->wspace_.cut.clear();
+    this->wspace_.rptr.clear();
+    this->wspace_.rptr.push_back(0);
+    for (size_t wid = 0; wid < this->qexpand_.size(); ++wid) {
+      for (unsigned int i : fset) {
+        int offset = feat2workindex_[i];
        if (offset >= 0) {
-          const WXQSketch::Summary &a = summary_array[wid * work_set_size + offset];
+          const WXQSketch::Summary &a = summary_array_[wid * work_set_size + offset];
          for (size_t i = 1; i < a.size; ++i) {
-            bst_float cpt = a.data[i].value - rt_eps;
-            if (i == 1 || cpt > this->wspace.cut.back()) {
-              this->wspace.cut.push_back(cpt);
+            bst_float cpt = a.data[i].value - kRtEps;
+            if (i == 1 || cpt > this->wspace_.cut.back()) {
+              this->wspace_.cut.push_back(cpt);
            }
          }
          // push a value that is greater than anything
          if (a.size != 0) {
            bst_float cpt = a.data[a.size - 1].value;
            // this must be bigger than last value in a scale
-            bst_float last = cpt + fabs(cpt) + rt_eps;
-            this->wspace.cut.push_back(last);
+            bst_float last = cpt + fabs(cpt) + kRtEps;
+            this->wspace_.cut.push_back(last);
          }
-          this->wspace.rptr.push_back(static_cast<unsigned>(this->wspace.cut.size()));
+          this->wspace_.rptr.push_back(static_cast<unsigned>(this->wspace_.cut.size()));
        } else {
          CHECK_EQ(offset, -2);
-          bst_float cpt = feat_helper.MaxValue(fset[i]);
-          this->wspace.cut.push_back(cpt + fabs(cpt) + rt_eps);
-          this->wspace.rptr.push_back(static_cast<unsigned>(this->wspace.cut.size()));
+          bst_float cpt = feat_helper_.MaxValue(i);
+          this->wspace_.cut.push_back(cpt + fabs(cpt) + kRtEps);
+          this->wspace_.rptr.push_back(static_cast<unsigned>(this->wspace_.cut.size()));
        }
      }
      // reserve last value for global statistics
-      this->wspace.cut.push_back(0.0f);
-      this->wspace.rptr.push_back(static_cast<unsigned>(this->wspace.cut.size()));
+      this->wspace_.cut.push_back(0.0f);
+      this->wspace_.rptr.push_back(static_cast<unsigned>(this->wspace_.cut.size()));
    }
-    CHECK_EQ(this->wspace.rptr.size(),
-             (fset.size() + 1) * this->qexpand.size() + 1);
+    CHECK_EQ(this->wspace_.rptr.size(),
+             (fset.size() + 1) * this->qexpand_.size() + 1);
  }

-  inline void UpdateHistCol(const std::vector<bst_gpair> &gpair,
+  inline void UpdateHistCol(const std::vector<GradientPair> &gpair,
                            const ColBatch::Inst &c,
                            const MetaInfo &info,
                            const RegTree &tree,
@ -505,21 +504,21 @@ class CQHistMaker: public HistMaker<TStats> {
    // initialize sbuilder for use
    std::vector<HistEntry> &hbuilder = *p_temp;
    hbuilder.resize(tree.param.num_nodes);
-    for (size_t i = 0; i < this->qexpand.size(); ++i) {
-      const unsigned nid = this->qexpand[i];
-      const unsigned wid = this->node2workindex[nid];
+    for (size_t i = 0; i < this->qexpand_.size(); ++i) {
+      const unsigned nid = this->qexpand_[i];
+      const unsigned wid = this->node2workindex_[nid];
      hbuilder[nid].istart = 0;
-      hbuilder[nid].hist = this->wspace.hset[0][fid_offset + wid * (fset.size()+1)];
+      hbuilder[nid].hist = this->wspace_.hset[0][fid_offset + wid * (fset.size()+1)];
    }
-    if (TStats::kSimpleStats != 0 && this->param.cache_opt != 0) {
-      const bst_uint kBuffer = 32;
+    if (TStats::kSimpleStats != 0 && this->param_.cache_opt != 0) {
+      constexpr bst_uint kBuffer = 32;
      bst_uint align_length = c.length / kBuffer * kBuffer;
      int buf_position[kBuffer];
-      bst_gpair buf_gpair[kBuffer];
+      GradientPair buf_gpair[kBuffer];
      for (bst_uint j = 0; j < align_length; j += kBuffer) {
        for (bst_uint i = 0; i < kBuffer; ++i) {
          bst_uint ridx = c[j + i].index;
-          buf_position[i] = this->position[ridx];
+          buf_position[i] = this->position_[ridx];
          buf_gpair[i] = gpair[ridx];
        }
        for (bst_uint i = 0; i < kBuffer; ++i) {
@ -531,7 +530,7 @@ class CQHistMaker: public HistMaker<TStats> {
      }
      for (bst_uint j = align_length; j < c.length; ++j) {
        const bst_uint ridx = c[j].index;
-        const int nid = this->position[ridx];
+        const int nid = this->position_[ridx];
        if (nid >= 0) {
          hbuilder[nid].Add(c[j].fvalue, gpair[ridx]);
        }
@ -539,14 +538,14 @@ class CQHistMaker: public HistMaker<TStats> {
    } else {
      for (bst_uint j = 0; j < c.length; ++j) {
        const bst_uint ridx = c[j].index;
-        const int nid = this->position[ridx];
+        const int nid = this->position_[ridx];
        if (nid >= 0) {
          hbuilder[nid].Add(c[j].fvalue, gpair, info, ridx);
        }
      }
    }
  }
-  inline void UpdateSketchCol(const std::vector<bst_gpair> &gpair,
+  inline void UpdateSketchCol(const std::vector<GradientPair> &gpair,
                              const ColBatch::Inst &c,
                              const RegTree &tree,
                              size_t work_set_size,
@ -556,45 +555,45 @@ class CQHistMaker: public HistMaker<TStats> {
    // initialize sbuilder for use
    std::vector<BaseMaker::SketchEntry> &sbuilder = *p_temp;
    sbuilder.resize(tree.param.num_nodes);
-    for (size_t i = 0; i < this->qexpand.size(); ++i) {
-      const unsigned nid = this->qexpand[i];
-      const unsigned wid = this->node2workindex[nid];
+    for (size_t i = 0; i < this->qexpand_.size(); ++i) {
+      const unsigned nid = this->qexpand_[i];
+      const unsigned wid = this->node2workindex_[nid];
      sbuilder[nid].sum_total = 0.0f;
-      sbuilder[nid].sketch = &sketchs[wid * work_set_size + offset];
+      sbuilder[nid].sketch = &sketchs_[wid * work_set_size + offset];
    }

    // first pass, get sum of weight, TODO, optimization to skip first pass
    for (bst_uint j = 0; j < c.length; ++j) {
        const bst_uint ridx = c[j].index;
-        const int nid = this->position[ridx];
+        const int nid = this->position_[ridx];
        if (nid >= 0) {
        sbuilder[nid].sum_total += gpair[ridx].GetHess();
      }
    }
    // if only one value, no need to do second pass
    if (c[0].fvalue  == c[c.length-1].fvalue) {
-      for (size_t i = 0; i < this->qexpand.size(); ++i) {
-        const int nid = this->qexpand[i];
+      for (size_t i = 0; i < this->qexpand_.size(); ++i) {
+        const int nid = this->qexpand_[i];
        sbuilder[nid].sketch->Push(c[0].fvalue, static_cast<bst_float>(sbuilder[nid].sum_total));
      }
      return;
    }
    // two pass scan
-    unsigned max_size = this->param.max_sketch_size();
-    for (size_t i = 0; i < this->qexpand.size(); ++i) {
-      const int nid = this->qexpand[i];
+    unsigned max_size = this->param_.MaxSketchSize();
+    for (size_t i = 0; i < this->qexpand_.size(); ++i) {
+      const int nid = this->qexpand_[i];
      sbuilder[nid].Init(max_size);
    }
    // second pass, build the sketch
-    if (TStats::kSimpleStats != 0 && this->param.cache_opt != 0) {
-      const bst_uint kBuffer = 32;
+    if (TStats::kSimpleStats != 0 && this->param_.cache_opt != 0) {
+      constexpr bst_uint kBuffer = 32;
      bst_uint align_length = c.length / kBuffer * kBuffer;
      int buf_position[kBuffer];
      bst_float buf_hess[kBuffer];
      for (bst_uint j = 0; j < align_length; j += kBuffer) {
        for (bst_uint i = 0; i < kBuffer; ++i) {
          bst_uint ridx = c[j + i].index;
-          buf_position[i] = this->position[ridx];
+          buf_position[i] = this->position_[ridx];
          buf_hess[i] = gpair[ridx].GetHess();
        }
        for (bst_uint i = 0; i < kBuffer; ++i) {
@ -606,7 +605,7 @@ class CQHistMaker: public HistMaker<TStats> {
      }
      for (bst_uint j = align_length; j < c.length; ++j) {
        const bst_uint ridx = c[j].index;
-        const int nid = this->position[ridx];
+        const int nid = this->position_[ridx];
        if (nid >= 0) {
          sbuilder[nid].Push(c[j].fvalue, gpair[ridx].GetHess(), max_size);
        }
@ -614,136 +613,137 @@ class CQHistMaker: public HistMaker<TStats> {
    } else {
      for (bst_uint j = 0; j < c.length; ++j) {
        const bst_uint ridx = c[j].index;
-        const int nid = this->position[ridx];
+        const int nid = this->position_[ridx];
        if (nid >= 0) {
          sbuilder[nid].Push(c[j].fvalue, gpair[ridx].GetHess(), max_size);
        }
      }
    }
-    for (size_t i = 0; i < this->qexpand.size(); ++i) {
-      const int nid = this->qexpand[i];
+    for (size_t i = 0; i < this->qexpand_.size(); ++i) {
+      const int nid = this->qexpand_[i];
      sbuilder[nid].Finalize(max_size);
    }
  }
  // cached dmatrix where we initialized the feature on.
-  const DMatrix* cache_dmatrix_;
+  const DMatrix* cache_dmatrix_{nullptr};
  // feature helper
-  BaseMaker::FMetaHelper feat_helper;
+  BaseMaker::FMetaHelper feat_helper_;
  // temp space to map feature id to working index
-  std::vector<int> feat2workindex;
+  std::vector<int> feat2workindex_;
  // set of index from fset that are current work set
-  std::vector<bst_uint> work_set;
+  std::vector<bst_uint> work_set_;
  // set of index from that are split candidates.
-  std::vector<bst_uint> fsplit_set;
+  std::vector<bst_uint> fsplit_set_;
  // thread temp data
-  std::vector<std::vector<BaseMaker::SketchEntry> > thread_sketch;
+  std::vector<std::vector<BaseMaker::SketchEntry> > thread_sketch_;
  // used to hold statistics
-  std::vector<std::vector<TStats> > thread_stats;
+  std::vector<std::vector<TStats> > thread_stats_;
  // used to hold start pointer
-  std::vector<std::vector<HistEntry> > thread_hist;
+  std::vector<std::vector<HistEntry> > thread_hist_;
  // node statistics
-  std::vector<TStats> node_stats;
+  std::vector<TStats> node_stats_;
  // summary array
-  std::vector<WXQSketch::SummaryContainer> summary_array;
+  std::vector<WXQSketch::SummaryContainer> summary_array_;
  // reducer for summary
-  rabit::SerializeReducer<WXQSketch::SummaryContainer> sreducer;
+  rabit::SerializeReducer<WXQSketch::SummaryContainer> sreducer_;
  // per node, per feature sketch
-  std::vector<common::WXQuantileSketch<bst_float, bst_float> > sketchs;
+  std::vector<common::WXQuantileSketch<bst_float, bst_float> > sketchs_;
 };

 // global proposal
 template<typename TStats>
 class GlobalProposalHistMaker: public CQHistMaker<TStats> {
 protected:
-  void ResetPosAndPropose(const std::vector<bst_gpair> &gpair,
+  void ResetPosAndPropose(const std::vector<GradientPair> &gpair,
                          DMatrix *p_fmat,
                          const std::vector<bst_uint> &fset,
                          const RegTree &tree) override {
-    if (this->qexpand.size() == 1) {
+    if (this->qexpand_.size() == 1) {
      cached_rptr_.clear();
      cached_cut_.clear();
    }
    if (cached_rptr_.size() == 0) {
-      CHECK_EQ(this->qexpand.size(), 1U);
+      CHECK_EQ(this->qexpand_.size(), 1U);
      CQHistMaker<TStats>::ResetPosAndPropose(gpair, p_fmat, fset, tree);
-      cached_rptr_ = this->wspace.rptr;
-      cached_cut_ = this->wspace.cut;
+      cached_rptr_ = this->wspace_.rptr;
+      cached_cut_ = this->wspace_.cut;
    } else {
-      this->wspace.cut.clear();
-      this->wspace.rptr.clear();
-      this->wspace.rptr.push_back(0);
-      for (size_t i = 0; i < this->qexpand.size(); ++i) {
+      this->wspace_.cut.clear();
+      this->wspace_.rptr.clear();
+      this->wspace_.rptr.push_back(0);
+      for (size_t i = 0; i < this->qexpand_.size(); ++i) {
        for (size_t j = 0; j < cached_rptr_.size() - 1; ++j) {
-          this->wspace.rptr.push_back(
-              this->wspace.rptr.back() + cached_rptr_[j + 1] - cached_rptr_[j]);
+          this->wspace_.rptr.push_back(
+              this->wspace_.rptr.back() + cached_rptr_[j + 1] - cached_rptr_[j]);
        }
-        this->wspace.cut.insert(this->wspace.cut.end(), cached_cut_.begin(), cached_cut_.end());
+        this->wspace_.cut.insert(this->wspace_.cut.end(), cached_cut_.begin(), cached_cut_.end());
      }
-      CHECK_EQ(this->wspace.rptr.size(),
-               (fset.size() + 1) * this->qexpand.size() + 1);
-      CHECK_EQ(this->wspace.rptr.back(), this->wspace.cut.size());
+      CHECK_EQ(this->wspace_.rptr.size(),
+               (fset.size() + 1) * this->qexpand_.size() + 1);
+      CHECK_EQ(this->wspace_.rptr.back(), this->wspace_.cut.size());
    }
  }

  // code to create histogram
-  void CreateHist(const std::vector<bst_gpair> &gpair,
+  void CreateHist(const std::vector<GradientPair> &gpair,
                  DMatrix *p_fmat,
                  const std::vector<bst_uint> &fset,
                  const RegTree &tree) override {
-    const MetaInfo &info = p_fmat->info();
+    const MetaInfo &info = p_fmat->Info();
    // fill in reverse map
-    this->feat2workindex.resize(tree.param.num_feature);
-    this->work_set = fset;
-    std::fill(this->feat2workindex.begin(), this->feat2workindex.end(), -1);
+    this->feat2workindex_.resize(tree.param.num_feature);
+    this->work_set_ = fset;
+    std::fill(this->feat2workindex_.begin(), this->feat2workindex_.end(), -1);
    for (size_t i = 0; i < fset.size(); ++i) {
-      this->feat2workindex[fset[i]] = static_cast<int>(i);
+      this->feat2workindex_[fset[i]] = static_cast<int>(i);
    }
    // start to work
-    this->wspace.Init(this->param, 1);
+    this->wspace_.Init(this->param_, 1);
    // to gain speedup in recovery
    {
-      this->thread_hist.resize(omp_get_max_threads());
+      this->thread_hist_.resize(omp_get_max_threads());

      // TWOPASS: use the real set + split set in the column iteration.
      this->SetDefaultPostion(p_fmat, tree);
-      this->work_set.insert(this->work_set.end(), this->fsplit_set.begin(), this->fsplit_set.end());
-      std::sort(this->work_set.begin(), this->work_set.end());
-      this->work_set.resize(
-          std::unique(this->work_set.begin(), this->work_set.end()) - this->work_set.begin());
+      this->work_set_.insert(this->work_set_.end(), this->fsplit_set_.begin(),
+                             this->fsplit_set_.end());
+      std::sort(this->work_set_.begin(), this->work_set_.end());
+      this->work_set_.resize(
+          std::unique(this->work_set_.begin(), this->work_set_.end()) - this->work_set_.begin());

      // start accumulating statistics
-      dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(this->work_set);
+      dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(this->work_set_);
      iter->BeforeFirst();
      while (iter->Next()) {
        const ColBatch &batch = iter->Value();
        // TWOPASS: use the real set + split set in the column iteration.
-        this->CorrectNonDefaultPositionByBatch(batch, this->fsplit_set, tree);
+        this->CorrectNonDefaultPositionByBatch(batch, this->fsplit_set_, tree);

        // start enumeration
-        const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
+        const auto nsize = static_cast<bst_omp_uint>(batch.size);
        #pragma omp parallel for schedule(dynamic, 1)
        for (bst_omp_uint i = 0; i < nsize; ++i) {
-          int offset = this->feat2workindex[batch.col_index[i]];
+          int offset = this->feat2workindex_[batch.col_index[i]];
          if (offset >= 0) {
            this->UpdateHistCol(gpair, batch[i], info, tree,
                                fset, offset,
-                                &this->thread_hist[omp_get_thread_num()]);
+                                &this->thread_hist_[omp_get_thread_num()]);
          }
        }
      }

      // update node statistics.
      this->GetNodeStats(gpair, *p_fmat, tree,
-                         &(this->thread_stats), &(this->node_stats));
-      for (size_t i = 0; i < this->qexpand.size(); ++i) {
-        const int nid = this->qexpand[i];
-        const int wid = this->node2workindex[nid];
-        this->wspace.hset[0][fset.size() + wid * (fset.size()+1)]
-            .data[0] = this->node_stats[nid];
+                         &(this->thread_stats_), &(this->node_stats_));
+      for (size_t i = 0; i < this->qexpand_.size(); ++i) {
+        const int nid = this->qexpand_[i];
+        const int wid = this->node2workindex_[nid];
+        this->wspace_.hset[0][fset.size() + wid * (fset.size()+1)]
+            .data[0] = this->node_stats_[nid];
      }
    }
-    this->histred.Allreduce(dmlc::BeginPtr(this->wspace.hset[0].data),
-                            this->wspace.hset[0].data.size());
+    this->histred_.Allreduce(dmlc::BeginPtr(this->wspace_.hset[0].data),
+                            this->wspace_.hset[0].data.size());
  }

  // cached unit pointer
@ -756,17 +756,17 @@ class GlobalProposalHistMaker: public CQHistMaker<TStats> {
 template<typename TStats>
 class QuantileHistMaker: public HistMaker<TStats> {
 protected:
-  typedef common::WXQuantileSketch<bst_float, bst_float> WXQSketch;
-  void ResetPosAndPropose(const std::vector<bst_gpair> &gpair,
+  using WXQSketch = common::WXQuantileSketch<bst_float, bst_float>;
+  void ResetPosAndPropose(const std::vector<GradientPair> &gpair,
                          DMatrix *p_fmat,
                          const std::vector <bst_uint> &fset,
                          const RegTree &tree) override {
-    const MetaInfo &info = p_fmat->info();
+    const MetaInfo &info = p_fmat->Info();
    // initialize the data structure
    const int nthread = omp_get_max_threads();
-    sketchs.resize(this->qexpand.size() * tree.param.num_feature);
-    for (size_t i = 0; i < sketchs.size(); ++i) {
-      sketchs[i].Init(info.num_row, this->param.sketch_eps);
+    sketchs_.resize(this->qexpand_.size() * tree.param.num_feature);
+    for (size_t i = 0; i < sketchs_.size(); ++i) {
+      sketchs_[i].Init(info.num_row_, this->param_.sketch_eps);
    }
    // start accumulating statistics
    dmlc::DataIter<RowBatch> *iter = p_fmat->RowIterator();
@ -775,7 +775,7 @@ class QuantileHistMaker: public HistMaker<TStats> {
      const RowBatch &batch = iter->Value();
      // parallel convert to column major format
      common::ParallelGroupBuilder<SparseBatch::Entry>
-          builder(&col_ptr, &col_data, &thread_col_ptr);
+          builder(&col_ptr_, &col_data_, &thread_col_ptr_);
      builder.InitBudget(tree.param.num_feature, nthread);

      const bst_omp_uint nbatch = static_cast<bst_omp_uint>(batch.size);
@ -783,13 +783,13 @@ class QuantileHistMaker: public HistMaker<TStats> {
      for (bst_omp_uint i = 0; i < nbatch; ++i) {
        RowBatch::Inst inst = batch[i];
        const bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
-        int nid = this->position[ridx];
+        int nid = this->position_[ridx];
        if (nid >= 0) {
-          if (!tree[nid].is_leaf()) {
-            this->position[ridx] = nid = HistMaker<TStats>::NextLevel(inst, tree, nid);
+          if (!tree[nid].IsLeaf()) {
+            this->position_[ridx] = nid = HistMaker<TStats>::NextLevel(inst, tree, nid);
          }
-          if (this->node2workindex[nid] < 0) {
-            this->position[ridx] = ~nid;
+          if (this->node2workindex_[nid] < 0) {
+            this->position_[ridx] = ~nid;
          } else {
            for (bst_uint j = 0; j < inst.length; ++j) {
              builder.AddBudget(inst[j].index, omp_get_thread_num());
@ -802,7 +802,7 @@ class QuantileHistMaker: public HistMaker<TStats> {
      for (bst_omp_uint i = 0; i < nbatch; ++i) {
        RowBatch::Inst inst = batch[i];
        const bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
-        const int nid = this->position[ridx];
+        const int nid = this->position_[ridx];
        if (nid >= 0) {
          for (bst_uint j = 0; j < inst.length; ++j) {
            builder.Push(inst[j].index,
@ -812,71 +812,71 @@ class QuantileHistMaker: public HistMaker<TStats> {
        }
      }
      // start putting things into sketch
-      const bst_omp_uint nfeat = col_ptr.size() - 1;
+      const bst_omp_uint nfeat = col_ptr_.size() - 1;
      #pragma omp parallel for schedule(dynamic, 1)
      for (bst_omp_uint k = 0; k < nfeat; ++k) {
-        for (size_t i = col_ptr[k]; i < col_ptr[k+1]; ++i) {
-          const SparseBatch::Entry &e = col_data[i];
-          const int wid = this->node2workindex[e.index];
-          sketchs[wid * tree.param.num_feature + k].Push(e.fvalue, gpair[e.index].GetHess());
+        for (size_t i = col_ptr_[k]; i < col_ptr_[k+1]; ++i) {
+          const SparseBatch::Entry &e = col_data_[i];
+          const int wid = this->node2workindex_[e.index];
+          sketchs_[wid * tree.param.num_feature + k].Push(e.fvalue, gpair[e.index].GetHess());
        }
      }
    }
    // setup maximum size
-    unsigned max_size = this->param.max_sketch_size();
+    unsigned max_size = this->param_.MaxSketchSize();
    // synchronize sketch
-    summary_array.resize(sketchs.size());
-    for (size_t i = 0; i < sketchs.size(); ++i) {
+    summary_array_.resize(sketchs_.size());
+    for (size_t i = 0; i < sketchs_.size(); ++i) {
      common::WQuantileSketch<bst_float, bst_float>::SummaryContainer out;
-      sketchs[i].GetSummary(&out);
-      summary_array[i].Reserve(max_size);
-      summary_array[i].SetPrune(out, max_size);
+      sketchs_[i].GetSummary(&out);
+      summary_array_[i].Reserve(max_size);
+      summary_array_[i].SetPrune(out, max_size);
    }

    size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size);
-    sreducer.Allreduce(dmlc::BeginPtr(summary_array), nbytes, summary_array.size());
+    sreducer_.Allreduce(dmlc::BeginPtr(summary_array_), nbytes, summary_array_.size());
    // now we get the final result of sketch, setup the cut
-    this->wspace.cut.clear();
-    this->wspace.rptr.clear();
-    this->wspace.rptr.push_back(0);
-    for (size_t wid = 0; wid < this->qexpand.size(); ++wid) {
+    this->wspace_.cut.clear();
+    this->wspace_.rptr.clear();
+    this->wspace_.rptr.push_back(0);
+    for (size_t wid = 0; wid < this->qexpand_.size(); ++wid) {
      for (int fid = 0; fid < tree.param.num_feature; ++fid) {
-        const WXQSketch::Summary &a = summary_array[wid * tree.param.num_feature + fid];
+        const WXQSketch::Summary &a = summary_array_[wid * tree.param.num_feature + fid];
        for (size_t i = 1; i < a.size; ++i) {
-          bst_float cpt = a.data[i].value - rt_eps;
-          if (i == 1 || cpt > this->wspace.cut.back()) {
-            this->wspace.cut.push_back(cpt);
+          bst_float cpt = a.data[i].value - kRtEps;
+          if (i == 1 || cpt > this->wspace_.cut.back()) {
+            this->wspace_.cut.push_back(cpt);
          }
        }
        // push a value that is greater than anything
        if (a.size != 0) {
          bst_float cpt = a.data[a.size - 1].value;
          // this must be bigger than last value in a scale
-          bst_float last = cpt + fabs(cpt) + rt_eps;
-          this->wspace.cut.push_back(last);
+          bst_float last = cpt + fabs(cpt) + kRtEps;
+          this->wspace_.cut.push_back(last);
        }
-        this->wspace.rptr.push_back(this->wspace.cut.size());
+        this->wspace_.rptr.push_back(this->wspace_.cut.size());
      }
      // reserve last value for global statistics
-      this->wspace.cut.push_back(0.0f);
-      this->wspace.rptr.push_back(this->wspace.cut.size());
+      this->wspace_.cut.push_back(0.0f);
+      this->wspace_.rptr.push_back(this->wspace_.cut.size());
    }
-    CHECK_EQ(this->wspace.rptr.size(),
-             (tree.param.num_feature + 1) * this->qexpand.size() + 1);
+    CHECK_EQ(this->wspace_.rptr.size(),
+             (tree.param.num_feature + 1) * this->qexpand_.size() + 1);
  }

 private:
  // summary array
-  std::vector<WXQSketch::SummaryContainer> summary_array;
+  std::vector<WXQSketch::SummaryContainer> summary_array_;
  // reducer for summary
-  rabit::SerializeReducer<WXQSketch::SummaryContainer> sreducer;
+  rabit::SerializeReducer<WXQSketch::SummaryContainer> sreducer_;
  // local temp column data structure
-  std::vector<size_t> col_ptr;
+  std::vector<size_t> col_ptr_;
  // local storage of column data
-  std::vector<SparseBatch::Entry> col_data;
-  std::vector<std::vector<size_t> > thread_col_ptr;
+  std::vector<SparseBatch::Entry> col_data_;
+  std::vector<std::vector<size_t> > thread_col_ptr_;
  // per node, per feature sketch
-  std::vector<common::WQuantileSketch<bst_float, bst_float> > sketchs;
+  std::vector<common::WQuantileSketch<bst_float, bst_float> > sketchs_;
 };

 XGBOOST_REGISTER_TREE_UPDATER(LocalHistMaker, "grow_local_histmaker")
--- a/src/tree/updater_prune.cc
+++ b/src/tree/updater_prune.cc
@ -21,37 +21,37 @@ DMLC_REGISTRY_FILE_TAG(updater_prune);
 class TreePruner: public TreeUpdater {
 public:
  TreePruner() {
-    syncher.reset(TreeUpdater::Create("sync"));
+    syncher_.reset(TreeUpdater::Create("sync"));
  }
  // set training parameter
  void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
-    param.InitAllowUnknown(args);
-    syncher->Init(args);
+    param_.InitAllowUnknown(args);
+    syncher_->Init(args);
  }
  // update the tree, do pruning
-  void Update(HostDeviceVector<bst_gpair> *gpair,
+  void Update(HostDeviceVector<GradientPair> *gpair,
              DMatrix *p_fmat,
              const std::vector<RegTree*> &trees) override {
    // rescale learning rate according to size of trees
-    float lr = param.learning_rate;
-    param.learning_rate = lr / trees.size();
-    for (size_t i = 0; i < trees.size(); ++i) {
-      this->DoPrune(*trees[i]);
+    float lr = param_.learning_rate;
+    param_.learning_rate = lr / trees.size();
+    for (auto tree : trees) {
+      this->DoPrune(*tree);
    }
-    param.learning_rate = lr;
-    syncher->Update(gpair, p_fmat, trees);
+    param_.learning_rate = lr;
+    syncher_->Update(gpair, p_fmat, trees);
  }

 private:
  // try to prune off current leaf
  inline int TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) { // NOLINT(*)
-    if (tree[nid].is_root()) return npruned;
-    int pid = tree[nid].parent();
-    RegTree::NodeStat &s = tree.stat(pid);
+    if (tree[nid].IsRoot()) return npruned;
+    int pid = tree[nid].Parent();
+    RegTree::NodeStat &s = tree.Stat(pid);
    ++s.leaf_child_cnt;
-    if (s.leaf_child_cnt >= 2 && param.need_prune(s.loss_chg, depth - 1)) {
+    if (s.leaf_child_cnt >= 2 && param_.NeedPrune(s.loss_chg, depth - 1)) {
      // need to be pruned
-      tree.ChangeToLeaf(pid, param.learning_rate * s.base_weight);
+      tree.ChangeToLeaf(pid, param_.learning_rate * s.base_weight);
      // tail recursion
      return this->TryPruneLeaf(tree, pid, depth - 1, npruned + 2);
    } else {
@ -63,25 +63,25 @@ class TreePruner: public TreeUpdater {
    int npruned = 0;
    // initialize auxiliary statistics
    for (int nid = 0; nid < tree.param.num_nodes; ++nid) {
-      tree.stat(nid).leaf_child_cnt = 0;
+      tree.Stat(nid).leaf_child_cnt = 0;
    }
    for (int nid = 0; nid < tree.param.num_nodes; ++nid) {
-      if (tree[nid].is_leaf()) {
+      if (tree[nid].IsLeaf()) {
        npruned = this->TryPruneLeaf(tree, nid, tree.GetDepth(nid), npruned);
      }
    }
-    if (!param.silent) {
+    if (!param_.silent) {
      LOG(INFO) << "tree pruning end, " << tree.param.num_roots << " roots, "
-                << tree.num_extra_nodes() << " extra nodes, " << npruned
+                << tree.NumExtraNodes() << " extra nodes, " << npruned
                << " pruned nodes, max_depth=" << tree.MaxDepth();
    }
  }

 private:
  // synchronizer
-  std::unique_ptr<TreeUpdater> syncher;
+  std::unique_ptr<TreeUpdater> syncher_;
  // training parameter
-  TrainParam param;
+  TrainParam param_;
 };

 XGBOOST_REGISTER_TREE_UPDATER(TreePruner, "prune")
--- a/src/tree/updater_refresh.cc
+++ b/src/tree/updater_refresh.cc
@ -22,14 +22,14 @@ template<typename TStats>
 class TreeRefresher: public TreeUpdater {
 public:
  void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
-    param.InitAllowUnknown(args);
+    param_.InitAllowUnknown(args);
  }
  // update the tree, do pruning
-  void Update(HostDeviceVector<bst_gpair> *gpair,
+  void Update(HostDeviceVector<GradientPair> *gpair,
              DMatrix *p_fmat,
              const std::vector<RegTree*> &trees) override {
    if (trees.size() == 0) return;
-    std::vector<bst_gpair> &gpair_h = gpair->data_h();
+    std::vector<GradientPair> &gpair_h = gpair->HostVector();
    // number of threads
    // thread temporal space
    std::vector<std::vector<TStats> > stemp;
@ -42,11 +42,11 @@ class TreeRefresher: public TreeUpdater {
    {
      int tid = omp_get_thread_num();
      int num_nodes = 0;
-      for (size_t i = 0; i < trees.size(); ++i) {
-        num_nodes += trees[i]->param.num_nodes;
+      for (auto tree : trees) {
+        num_nodes += tree->param.num_nodes;
      }
-      stemp[tid].resize(num_nodes, TStats(param));
-      std::fill(stemp[tid].begin(), stemp[tid].end(), TStats(param));
+      stemp[tid].resize(num_nodes, TStats(param_));
+      std::fill(stemp[tid].begin(), stemp[tid].end(), TStats(param_));
      fvec_temp[tid].Init(trees[0]->param.num_feature);
    }
    // if it is C++11, use lazy evaluation for Allreduce,
@ -55,32 +55,32 @@ class TreeRefresher: public TreeUpdater {
    auto lazy_get_stats = [&]()
 #endif
    {
-      const MetaInfo &info = p_fmat->info();
+      const MetaInfo &info = p_fmat->Info();
      // start accumulating statistics
      dmlc::DataIter<RowBatch> *iter = p_fmat->RowIterator();
      iter->BeforeFirst();
      while (iter->Next()) {
        const RowBatch &batch = iter->Value();
        CHECK_LT(batch.size, std::numeric_limits<unsigned>::max());
-        const bst_omp_uint nbatch = static_cast<bst_omp_uint>(batch.size);
+        const auto nbatch = static_cast<bst_omp_uint>(batch.size);
        #pragma omp parallel for schedule(static)
        for (bst_omp_uint i = 0; i < nbatch; ++i) {
          RowBatch::Inst inst = batch[i];
          const int tid = omp_get_thread_num();
-          const bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
+          const auto ridx = static_cast<bst_uint>(batch.base_rowid + i);
          RegTree::FVec &feats = fvec_temp[tid];
          feats.Fill(inst);
          int offset = 0;
-          for (size_t j = 0; j < trees.size(); ++j) {
-            AddStats(*trees[j], feats, gpair_h, info, ridx,
+          for (auto tree : trees) {
+            AddStats(*tree, feats, gpair_h, info, ridx,
                     dmlc::BeginPtr(stemp[tid]) + offset);
-            offset += trees[j]->param.num_nodes;
+            offset += tree->param.num_nodes;
          }
          feats.Drop(inst);
        }
      }
      // aggregate the statistics
-      int num_nodes = static_cast<int>(stemp[0].size());
+      auto num_nodes = static_cast<int>(stemp[0].size());
      #pragma omp parallel for schedule(static)
      for (int nid = 0; nid < num_nodes; ++nid) {
        for (int tid = 1; tid < nthread; ++tid) {
@ -89,64 +89,64 @@ class TreeRefresher: public TreeUpdater {
      }
    };
 #if __cplusplus >= 201103L
-    reducer.Allreduce(dmlc::BeginPtr(stemp[0]), stemp[0].size(), lazy_get_stats);
+    reducer_.Allreduce(dmlc::BeginPtr(stemp[0]), stemp[0].size(), lazy_get_stats);
 #else
-    reducer.Allreduce(dmlc::BeginPtr(stemp[0]), stemp[0].size());
+    reducer_.Allreduce(dmlc::BeginPtr(stemp[0]), stemp[0].size());
 #endif
    // rescale learning rate according to size of trees
-    float lr = param.learning_rate;
-    param.learning_rate = lr / trees.size();
+    float lr = param_.learning_rate;
+    param_.learning_rate = lr / trees.size();
    int offset = 0;
-    for (size_t i = 0; i < trees.size(); ++i) {
-      for (int rid = 0; rid < trees[i]->param.num_roots; ++rid) {
-        this->Refresh(dmlc::BeginPtr(stemp[0]) + offset, rid, trees[i]);
+    for (auto tree : trees) {
+      for (int rid = 0; rid < tree->param.num_roots; ++rid) {
+        this->Refresh(dmlc::BeginPtr(stemp[0]) + offset, rid, tree);
      }
-      offset += trees[i]->param.num_nodes;
+      offset += tree->param.num_nodes;
    }
    // set learning rate back
-    param.learning_rate = lr;
+    param_.learning_rate = lr;
  }

 private:
  inline static void AddStats(const RegTree &tree,
                              const RegTree::FVec &feat,
-                              const std::vector<bst_gpair> &gpair,
+                              const std::vector<GradientPair> &gpair,
                              const MetaInfo &info,
                              const bst_uint ridx,
                              TStats *gstats) {
    // start from groups that belongs to current data
-    int pid = static_cast<int>(info.GetRoot(ridx));
+    auto pid = static_cast<int>(info.GetRoot(ridx));
    gstats[pid].Add(gpair, info, ridx);
    // tranverse tree
-    while (!tree[pid].is_leaf()) {
-      unsigned split_index = tree[pid].split_index();
-      pid = tree.GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index));
+    while (!tree[pid].IsLeaf()) {
+      unsigned split_index = tree[pid].SplitIndex();
+      pid = tree.GetNext(pid, feat.Fvalue(split_index), feat.IsMissing(split_index));
      gstats[pid].Add(gpair, info, ridx);
    }
  }
  inline void Refresh(const TStats *gstats,
                      int nid, RegTree *p_tree) {
    RegTree &tree = *p_tree;
-    tree.stat(nid).base_weight = static_cast<bst_float>(gstats[nid].CalcWeight(param));
-    tree.stat(nid).sum_hess = static_cast<bst_float>(gstats[nid].sum_hess);
-    gstats[nid].SetLeafVec(param, tree.leafvec(nid));
-    if (tree[nid].is_leaf()) {
-      if (param.refresh_leaf) {
-        tree[nid].set_leaf(tree.stat(nid).base_weight * param.learning_rate);
+    tree.Stat(nid).base_weight = static_cast<bst_float>(gstats[nid].CalcWeight(param_));
+    tree.Stat(nid).sum_hess = static_cast<bst_float>(gstats[nid].sum_hess);
+    gstats[nid].SetLeafVec(param_, tree.Leafvec(nid));
+    if (tree[nid].IsLeaf()) {
+      if (param_.refresh_leaf) {
+        tree[nid].SetLeaf(tree.Stat(nid).base_weight * param_.learning_rate);
      }
    } else {
-      tree.stat(nid).loss_chg = static_cast<bst_float>(
-          gstats[tree[nid].cleft()].CalcGain(param) +
-          gstats[tree[nid].cright()].CalcGain(param) -
-          gstats[nid].CalcGain(param));
-      this->Refresh(gstats, tree[nid].cleft(), p_tree);
-      this->Refresh(gstats, tree[nid].cright(), p_tree);
+      tree.Stat(nid).loss_chg = static_cast<bst_float>(
+          gstats[tree[nid].LeftChild()].CalcGain(param_) +
+          gstats[tree[nid].RightChild()].CalcGain(param_) -
+          gstats[nid].CalcGain(param_));
+      this->Refresh(gstats, tree[nid].LeftChild(), p_tree);
+      this->Refresh(gstats, tree[nid].RightChild(), p_tree);
    }
  }
  // training parameter
-  TrainParam param;
+  TrainParam param_;
  // reducer
-  rabit::Reducer<TStats, TStats::Reduce> reducer;
+  rabit::Reducer<TStats, TStats::Reduce> reducer_;
 };

 XGBOOST_REGISTER_TREE_UPDATER(TreeRefresher, "refresh")
--- a/src/tree/updater_skmaker.cc
+++ b/src/tree/updater_skmaker.cc
@ -22,58 +22,57 @@ DMLC_REGISTRY_FILE_TAG(updater_skmaker);

 class SketchMaker: public BaseMaker {
 public:
-  void Update(HostDeviceVector<bst_gpair> *gpair,
+  void Update(HostDeviceVector<GradientPair> *gpair,
              DMatrix *p_fmat,
              const std::vector<RegTree*> &trees) override {
    // rescale learning rate according to size of trees
-    float lr = param.learning_rate;
-    param.learning_rate = lr / trees.size();
+    float lr = param_.learning_rate;
+    param_.learning_rate = lr / trees.size();
    // build tree
-    for (size_t i = 0; i < trees.size(); ++i) {
-      this->Update(gpair->data_h(), p_fmat, trees[i]);
+    for (auto tree : trees) {
+      this->Update(gpair->HostVector(), p_fmat, tree);
    }
-    param.learning_rate = lr;
+    param_.learning_rate = lr;
  }

 protected:
-  inline void Update(const std::vector<bst_gpair> &gpair,
+  inline void Update(const std::vector<GradientPair> &gpair,
                     DMatrix *p_fmat,
                     RegTree *p_tree) {
    this->InitData(gpair, *p_fmat, *p_tree);
-    for (int depth = 0; depth < param.max_depth; ++depth) {
+    for (int depth = 0; depth < param_.max_depth; ++depth) {
      this->GetNodeStats(gpair, *p_fmat, *p_tree,
-                         &thread_stats, &node_stats);
+                         &thread_stats_, &node_stats_);
      this->BuildSketch(gpair, p_fmat, *p_tree);
      this->SyncNodeStats();
      this->FindSplit(depth, gpair, p_fmat, p_tree);
-      this->ResetPositionCol(qexpand, p_fmat, *p_tree);
+      this->ResetPositionCol(qexpand_, p_fmat, *p_tree);
      this->UpdateQueueExpand(*p_tree);
      // if nothing left to be expand, break
-      if (qexpand.size() == 0) break;
+      if (qexpand_.size() == 0) break;
    }
-    if (qexpand.size() != 0) {
+    if (qexpand_.size() != 0) {
      this->GetNodeStats(gpair, *p_fmat, *p_tree,
-                         &thread_stats, &node_stats);
+                         &thread_stats_, &node_stats_);
      this->SyncNodeStats();
    }
    // set all statistics correctly
    for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) {
-      this->SetStats(nid, node_stats[nid], p_tree);
-      if (!(*p_tree)[nid].is_leaf()) {
-        p_tree->stat(nid).loss_chg = static_cast<bst_float>(
-            node_stats[(*p_tree)[nid].cleft()].CalcGain(param) +
-            node_stats[(*p_tree)[nid].cright()].CalcGain(param) -
-            node_stats[nid].CalcGain(param));
+      this->SetStats(nid, node_stats_[nid], p_tree);
+      if (!(*p_tree)[nid].IsLeaf()) {
+        p_tree->Stat(nid).loss_chg = static_cast<bst_float>(
+            node_stats_[(*p_tree)[nid].LeftChild()].CalcGain(param_) +
+            node_stats_[(*p_tree)[nid].RightChild()].CalcGain(param_) -
+            node_stats_[nid].CalcGain(param_));
      }
    }
    // set left leaves
-    for (size_t i = 0; i < qexpand.size(); ++i) {
-      const int nid = qexpand[i];
-      (*p_tree)[nid].set_leaf(p_tree->stat(nid).base_weight * param.learning_rate);
+    for (int nid : qexpand_) {
+      (*p_tree)[nid].SetLeaf(p_tree->Stat(nid).base_weight * param_.learning_rate);
    }
  }
  // define the sketch we want to use
-  typedef common::WXQuantileSketch<bst_float, bst_float> WXQSketch;
+  using WXQSketch = common::WXQuantileSketch<bst_float, bst_float>;

 private:
  // statistics needed in the gradient calculation
@ -84,20 +83,20 @@ class SketchMaker: public BaseMaker {
    double neg_grad;
    /*! \brief sum of hessian statistics */
    double sum_hess;
-    SKStats(void) {}
+    SKStats() = default;
    // constructor
    explicit SKStats(const TrainParam &param) {
      this->Clear();
    }
    /*! \brief clear the statistics */
-    inline void Clear(void) {
+    inline void Clear() {
      neg_grad = pos_grad = sum_hess = 0.0f;
    }
    // accumulate statistics
-    inline void Add(const std::vector<bst_gpair> &gpair,
+    inline void Add(const std::vector<GradientPair> &gpair,
                    const MetaInfo &info,
                    bst_uint ridx) {
-      const bst_gpair &b = gpair[ridx];
+      const GradientPair &b = gpair[ridx];
      if (b.GetGrad() >= 0.0f) {
        pos_grad += b.GetGrad();
      } else {
@ -133,48 +132,48 @@ class SketchMaker: public BaseMaker {
    inline void SetLeafVec(const TrainParam &param, bst_float *vec) const {
    }
  };
-  inline void BuildSketch(const std::vector<bst_gpair> &gpair,
+  inline void BuildSketch(const std::vector<GradientPair> &gpair,
                          DMatrix *p_fmat,
                          const RegTree &tree) {
-    const MetaInfo& info = p_fmat->info();
-    sketchs.resize(this->qexpand.size() * tree.param.num_feature * 3);
-    for (size_t i = 0; i < sketchs.size(); ++i) {
-      sketchs[i].Init(info.num_row, this->param.sketch_eps);
+    const MetaInfo& info = p_fmat->Info();
+    sketchs_.resize(this->qexpand_.size() * tree.param.num_feature * 3);
+    for (auto & sketch : sketchs_) {
+      sketch.Init(info.num_row_, this->param_.sketch_eps);
    }
-    thread_sketch.resize(omp_get_max_threads());
+    thread_sketch_.resize(omp_get_max_threads());
    // number of rows in
-    const size_t nrows = p_fmat->buffered_rowset().size();
+    const size_t nrows = p_fmat->BufferedRowset().Size();
    // start accumulating statistics
    dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator();
    iter->BeforeFirst();
    while (iter->Next()) {
      const ColBatch &batch = iter->Value();
      // start enumeration
-      const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
+      const auto nsize = static_cast<bst_omp_uint>(batch.size);
      #pragma omp parallel for schedule(dynamic, 1)
      for (bst_omp_uint i = 0; i < nsize; ++i) {
        this->UpdateSketchCol(gpair, batch[i], tree,
-                              node_stats,
+                              node_stats_,
                              batch.col_index[i],
                              batch[i].length == nrows,
-                              &thread_sketch[omp_get_thread_num()]);
+                              &thread_sketch_[omp_get_thread_num()]);
      }
    }
    // setup maximum size
-    unsigned max_size = param.max_sketch_size();
+    unsigned max_size = param_.MaxSketchSize();
    // synchronize sketch
-    summary_array.resize(sketchs.size());
-    for (size_t i = 0; i < sketchs.size(); ++i) {
+    summary_array_.resize(sketchs_.size());
+    for (size_t i = 0; i < sketchs_.size(); ++i) {
      common::WXQuantileSketch<bst_float, bst_float>::SummaryContainer out;
-      sketchs[i].GetSummary(&out);
-      summary_array[i].Reserve(max_size);
-      summary_array[i].SetPrune(out, max_size);
+      sketchs_[i].GetSummary(&out);
+      summary_array_[i].Reserve(max_size);
+      summary_array_[i].SetPrune(out, max_size);
    }
    size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size);
-    sketch_reducer.Allreduce(dmlc::BeginPtr(summary_array), nbytes, summary_array.size());
+    sketch_reducer_.Allreduce(dmlc::BeginPtr(summary_array_), nbytes, summary_array_.size());
  }
  // update sketch information in column fid
-  inline void UpdateSketchCol(const std::vector<bst_gpair> &gpair,
+  inline void UpdateSketchCol(const std::vector<GradientPair> &gpair,
                              const ColBatch::Inst &c,
                              const RegTree &tree,
                              const std::vector<SKStats> &nstats,
@ -185,20 +184,19 @@ class SketchMaker: public BaseMaker {
    // initialize sbuilder for use
    std::vector<SketchEntry> &sbuilder = *p_temp;
    sbuilder.resize(tree.param.num_nodes * 3);
-    for (size_t i = 0; i < this->qexpand.size(); ++i) {
-      const unsigned nid = this->qexpand[i];
-      const unsigned wid = this->node2workindex[nid];
+    for (unsigned int nid : this->qexpand_) {
+      const unsigned wid = this->node2workindex_[nid];
      for (int k = 0; k < 3; ++k) {
        sbuilder[3 * nid + k].sum_total = 0.0f;
-        sbuilder[3 * nid + k].sketch = &sketchs[(wid * tree.param.num_feature + fid) * 3 + k];
+        sbuilder[3 * nid + k].sketch = &sketchs_[(wid * tree.param.num_feature + fid) * 3 + k];
      }
    }
    if (!col_full) {
      for (bst_uint j = 0; j < c.length; ++j) {
        const bst_uint ridx = c[j].index;
-        const int nid = this->position[ridx];
+        const int nid = this->position_[ridx];
        if (nid >= 0) {
-          const bst_gpair &e = gpair[ridx];
+          const GradientPair &e = gpair[ridx];
          if (e.GetGrad() >= 0.0f) {
            sbuilder[3 * nid + 0].sum_total += e.GetGrad();
          } else {
@ -208,8 +206,7 @@ class SketchMaker: public BaseMaker {
        }
      }
    } else {
-      for (size_t i = 0; i < this->qexpand.size(); ++i) {
-        const unsigned nid = this->qexpand[i];
+      for (unsigned int nid : this->qexpand_) {
        sbuilder[3 * nid + 0].sum_total = static_cast<bst_float>(nstats[nid].pos_grad);
        sbuilder[3 * nid + 1].sum_total = static_cast<bst_float>(nstats[nid].neg_grad);
        sbuilder[3 * nid + 2].sum_total = static_cast<bst_float>(nstats[nid].sum_hess);
@ -217,8 +214,7 @@ class SketchMaker: public BaseMaker {
    }
    // if only one value, no need to do second pass
    if (c[0].fvalue  == c[c.length-1].fvalue) {
-      for (size_t i = 0; i < this->qexpand.size(); ++i) {
-        const int nid = this->qexpand[i];
+      for (int nid : this->qexpand_) {
        for (int k = 0; k < 3; ++k) {
          sbuilder[3 * nid + k].sketch->Push(c[0].fvalue,
                                             static_cast<bst_float>(
@ -228,9 +224,8 @@ class SketchMaker: public BaseMaker {
      return;
    }
    // two pass scan
-    unsigned max_size = param.max_sketch_size();
-    for (size_t i = 0; i < this->qexpand.size(); ++i) {
-      const int nid = this->qexpand[i];
+    unsigned max_size = param_.MaxSketchSize();
+    for (int nid : this->qexpand_) {
      for (int k = 0; k < 3; ++k) {
        sbuilder[3 * nid + k].Init(max_size);
      }
@ -238,9 +233,9 @@ class SketchMaker: public BaseMaker {
    // second pass, build the sketch
    for (bst_uint j = 0; j < c.length; ++j) {
      const bst_uint ridx = c[j].index;
-      const int nid = this->position[ridx];
+      const int nid = this->position_[ridx];
      if (nid >= 0) {
-        const bst_gpair &e = gpair[ridx];
+        const GradientPair &e = gpair[ridx];
        if (e.GetGrad() >= 0.0f) {
          sbuilder[3 * nid + 0].Push(c[j].fvalue, e.GetGrad(), max_size);
        } else {
@ -249,70 +244,69 @@ class SketchMaker: public BaseMaker {
        sbuilder[3 * nid + 2].Push(c[j].fvalue, e.GetHess(), max_size);
      }
    }
-    for (size_t i = 0; i < this->qexpand.size(); ++i) {
-      const int nid = this->qexpand[i];
+    for (int nid : this->qexpand_) {
      for (int k = 0; k < 3; ++k) {
        sbuilder[3 * nid + k].Finalize(max_size);
      }
    }
  }
-  inline void SyncNodeStats(void) {
-    CHECK_NE(qexpand.size(), 0U);
-    std::vector<SKStats> tmp(qexpand.size());
-    for (size_t i = 0; i < qexpand.size(); ++i) {
-      tmp[i] = node_stats[qexpand[i]];
+  inline void SyncNodeStats() {
+    CHECK_NE(qexpand_.size(), 0U);
+    std::vector<SKStats> tmp(qexpand_.size());
+    for (size_t i = 0; i < qexpand_.size(); ++i) {
+      tmp[i] = node_stats_[qexpand_[i]];
    }
-    stats_reducer.Allreduce(dmlc::BeginPtr(tmp), tmp.size());
-    for (size_t i = 0; i < qexpand.size(); ++i) {
-      node_stats[qexpand[i]] = tmp[i];
+    stats_reducer_.Allreduce(dmlc::BeginPtr(tmp), tmp.size());
+    for (size_t i = 0; i < qexpand_.size(); ++i) {
+      node_stats_[qexpand_[i]] = tmp[i];
    }
  }
  inline void FindSplit(int depth,
-                        const std::vector<bst_gpair> &gpair,
+                        const std::vector<GradientPair> &gpair,
                        DMatrix *p_fmat,
                        RegTree *p_tree) {
    const bst_uint num_feature = p_tree->param.num_feature;
    // get the best split condition for each node
-    std::vector<SplitEntry> sol(qexpand.size());
-    bst_omp_uint nexpand = static_cast<bst_omp_uint>(qexpand.size());
+    std::vector<SplitEntry> sol(qexpand_.size());
+    auto nexpand = static_cast<bst_omp_uint>(qexpand_.size());
    #pragma omp parallel for schedule(dynamic, 1)
    for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
-      const int nid = qexpand[wid];
-      CHECK_EQ(node2workindex[nid], static_cast<int>(wid));
+      const int nid = qexpand_[wid];
+      CHECK_EQ(node2workindex_[nid], static_cast<int>(wid));
      SplitEntry &best = sol[wid];
      for (bst_uint fid = 0; fid < num_feature; ++fid) {
        unsigned base = (wid * p_tree->param.num_feature + fid) * 3;
-        EnumerateSplit(summary_array[base + 0],
-                       summary_array[base + 1],
-                       summary_array[base + 2],
-                       node_stats[nid], fid, &best);
+        EnumerateSplit(summary_array_[base + 0],
+                       summary_array_[base + 1],
+                       summary_array_[base + 2],
+                       node_stats_[nid], fid, &best);
      }
    }
    // get the best result, we can synchronize the solution
    for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
-      const int nid = qexpand[wid];
+      const int nid = qexpand_[wid];
      const SplitEntry &best = sol[wid];
      // set up the values
-      p_tree->stat(nid).loss_chg = best.loss_chg;
-      this->SetStats(nid, node_stats[nid], p_tree);
+      p_tree->Stat(nid).loss_chg = best.loss_chg;
+      this->SetStats(nid, node_stats_[nid], p_tree);
      // now we know the solution in snode[nid], set split
-      if (best.loss_chg > rt_eps) {
+      if (best.loss_chg > kRtEps) {
        p_tree->AddChilds(nid);
-        (*p_tree)[nid].set_split(best.split_index(),
-                                 best.split_value, best.default_left());
+        (*p_tree)[nid].SetSplit(best.SplitIndex(),
+                                 best.split_value, best.DefaultLeft());
        // mark right child as 0, to indicate fresh leaf
-        (*p_tree)[(*p_tree)[nid].cleft()].set_leaf(0.0f, 0);
-        (*p_tree)[(*p_tree)[nid].cright()].set_leaf(0.0f, 0);
+        (*p_tree)[(*p_tree)[nid].LeftChild()].SetLeaf(0.0f, 0);
+        (*p_tree)[(*p_tree)[nid].RightChild()].SetLeaf(0.0f, 0);
      } else {
-        (*p_tree)[nid].set_leaf(p_tree->stat(nid).base_weight * param.learning_rate);
+        (*p_tree)[nid].SetLeaf(p_tree->Stat(nid).base_weight * param_.learning_rate);
      }
    }
  }
  // set statistics on ptree
  inline void SetStats(int nid, const SKStats &node_sum, RegTree *p_tree) {
-    p_tree->stat(nid).base_weight = static_cast<bst_float>(node_sum.CalcWeight(param));
-    p_tree->stat(nid).sum_hess = static_cast<bst_float>(node_sum.sum_hess);
-    node_sum.SetLeafVec(param, p_tree->leafvec(nid));
+    p_tree->Stat(nid).base_weight = static_cast<bst_float>(node_sum.CalcWeight(param_));
+    p_tree->Stat(nid).sum_hess = static_cast<bst_float>(node_sum.sum_hess);
+    node_sum.SetLeafVec(param_, p_tree->Leafvec(nid));
  }
  inline void EnumerateSplit(const WXQSketch::Summary &pos_grad,
                             const WXQSketch::Summary &neg_grad,
@ -321,7 +315,7 @@ class SketchMaker: public BaseMaker {
                             bst_uint fid,
                             SplitEntry *best) {
    if (sum_hess.size == 0) return;
-    double root_gain = node_sum.CalcGain(param);
+    double root_gain = node_sum.CalcGain(param_);
    std::vector<bst_float> fsplits;
    for (size_t i = 0; i < pos_grad.size; ++i) {
      fsplits.push_back(pos_grad.data[i].value);
@ -350,17 +344,17 @@ class SketchMaker: public BaseMaker {
      s.sum_hess = 0.5f * (hess.rmin + hess.rmax - hess.wmin);
      c.SetSubstract(node_sum, s);
      // forward
-      if (s.sum_hess >= param.min_child_weight &&
-          c.sum_hess >= param.min_child_weight) {
-        double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
+      if (s.sum_hess >= param_.min_child_weight &&
+          c.sum_hess >= param_.min_child_weight) {
+        double loss_chg = s.CalcGain(param_) + c.CalcGain(param_) - root_gain;
        best->Update(static_cast<bst_float>(loss_chg), fid, fsplits[i], false);
      }
      // backward
      c.SetSubstract(feat_sum, s);
      s.SetSubstract(node_sum, c);
-      if (s.sum_hess >= param.min_child_weight &&
-          c.sum_hess >= param.min_child_weight) {
-        double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
+      if (s.sum_hess >= param_.min_child_weight &&
+          c.sum_hess >= param_.min_child_weight) {
+        double loss_chg = s.CalcGain(param_) + c.CalcGain(param_) - root_gain;
        best->Update(static_cast<bst_float>(loss_chg), fid, fsplits[i], true);
      }
    }
@ -368,10 +362,10 @@ class SketchMaker: public BaseMaker {
      // all including
      SKStats s = feat_sum, c;
      c.SetSubstract(node_sum, s);
-      if (s.sum_hess >= param.min_child_weight &&
-          c.sum_hess >= param.min_child_weight) {
+      if (s.sum_hess >= param_.min_child_weight &&
+          c.sum_hess >= param_.min_child_weight) {
        bst_float cpt = fsplits.back();
-        double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
+        double loss_chg = s.CalcGain(param_) + c.CalcGain(param_) - root_gain;
        best->Update(static_cast<bst_float>(loss_chg),
                     fid, cpt + std::abs(cpt) + 1.0f, false);
      }
@ -380,19 +374,19 @@ class SketchMaker: public BaseMaker {

  // thread temp data
  // used to hold temporal sketch
-  std::vector<std::vector<SketchEntry> > thread_sketch;
+  std::vector<std::vector<SketchEntry> > thread_sketch_;
  // used to hold statistics
-  std::vector<std::vector<SKStats> > thread_stats;
+  std::vector<std::vector<SKStats> > thread_stats_;
  // node statistics
-  std::vector<SKStats> node_stats;
+  std::vector<SKStats> node_stats_;
  // summary array
-  std::vector<WXQSketch::SummaryContainer> summary_array;
+  std::vector<WXQSketch::SummaryContainer> summary_array_;
  // reducer for summary
-  rabit::Reducer<SKStats, SKStats::Reduce> stats_reducer;
+  rabit::Reducer<SKStats, SKStats::Reduce> stats_reducer_;
  // reducer for summary
-  rabit::SerializeReducer<WXQSketch::SummaryContainer> sketch_reducer;
+  rabit::SerializeReducer<WXQSketch::SummaryContainer> sketch_reducer_;
  // per node, per feature sketch
-  std::vector<common::WXQuantileSketch<bst_float, bst_float> > sketchs;
+  std::vector<common::WXQuantileSketch<bst_float, bst_float> > sketchs_;
 };

 XGBOOST_REGISTER_TREE_UPDATER(SketchMaker, "grow_skmaker")
--- a/src/tree/updater_sync.cc
+++ b/src/tree/updater_sync.cc
@ -23,7 +23,7 @@ class TreeSyncher: public TreeUpdater {
 public:
  void Init(const std::vector<std::pair<std::string, std::string> >& args) override {}

-  void Update(HostDeviceVector<bst_gpair> *gpair,
+  void Update(HostDeviceVector<GradientPair> *gpair,
              DMatrix* dmat,
              const std::vector<RegTree*> &trees) override {
    if (rabit::GetWorldSize() == 1) return;
@ -31,14 +31,14 @@ class TreeSyncher: public TreeUpdater {
    common::MemoryBufferStream fs(&s_model);
    int rank = rabit::GetRank();
    if (rank == 0) {
-      for (size_t i = 0; i < trees.size(); ++i) {
-        trees[i]->Save(&fs);
+      for (auto tree : trees) {
+        tree->Save(&fs);
      }
    }
    fs.Seek(0);
    rabit::Broadcast(&s_model, 0);
-    for (size_t i = 0; i < trees.size(); ++i) {
-      trees[i]->Load(&fs);
+    for (auto tree : trees) {
+      tree->Load(&fs);
    }
  }
 };
--- a/tests/cpp/c_api/test_c_api.cc
+++ b/tests/cpp/c_api/test_c_api.cc
@ -20,10 +20,10 @@ TEST(c_api, XGDMatrixCreateFromMat_omp) {

    std::shared_ptr<xgboost::DMatrix> dmat =
        *static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
-    xgboost::MetaInfo &info = dmat->info();
-    ASSERT_EQ(info.num_col, num_cols);
-    ASSERT_EQ(info.num_row, row);
-    ASSERT_EQ(info.num_nonzero, num_cols * row - num_missing);
+    xgboost::MetaInfo &info = dmat->Info();
+    ASSERT_EQ(info.num_col_, num_cols);
+    ASSERT_EQ(info.num_row_, row);
+    ASSERT_EQ(info.num_nonzero_, num_cols * row - num_missing);

    auto iter = dmat->RowIterator();
    iter->BeforeFirst();
--- a/tests/cpp/common/test_device_helpers.cu
+++ b/tests/cpp/common/test_device_helpers.cu
@ -38,7 +38,7 @@ void SpeedTest() {

  xgboost::common::Timer t;
  dh::TransformLbs(
-      0, &temp_memory, h_rows.size(), dh::raw(row_ptr), row_ptr.size() - 1,
+      0, &temp_memory, h_rows.size(), dh::Raw(row_ptr), row_ptr.size() - 1,
      false,
      [=] __device__(size_t idx, size_t ridx) { d_output_row[idx] = ridx; });

@ -66,7 +66,7 @@ void TestLbs() {
      thrust::device_vector<int> output_row(h_rows.size());
      auto d_output_row = output_row.data();

-      dh::TransformLbs(0, &temp_memory, h_rows.size(), dh::raw(row_ptr),
+      dh::TransformLbs(0, &temp_memory, h_rows.size(), dh::Raw(row_ptr),
                       row_ptr.size() - 1, false,
                       [=] __device__(size_t idx, size_t ridx) {
                         d_output_row[idx] = ridx;
@ -83,6 +83,6 @@ TEST(cub_lbs, Test) { TestLbs(); }
 TEST(sumReduce, Test) {
  thrust::device_vector<float> data(100, 1.0f);
  dh::CubMemory temp;
-  auto sum = dh::sumReduction(temp, dh::raw(data), data.size());
+  auto sum = dh::SumReduction(temp, dh::Raw(data), data.size());
  ASSERT_NEAR(sum, 100.0f, 1e-5);
 }
--- a/tests/cpp/data/test_metainfo.cc
+++ b/tests/cpp/data/test_metainfo.cc
@ -12,9 +12,9 @@ TEST(MetaInfo, GetSet) {
  info.SetInfo("root_index", double2, xgboost::kDouble, 2);
  EXPECT_EQ(info.GetRoot(1), 2.0f);

-  EXPECT_EQ(info.labels.size(), 0);
+  EXPECT_EQ(info.labels_.size(), 0);
  info.SetInfo("label", double2, xgboost::kFloat32, 2);
-  EXPECT_EQ(info.labels.size(), 2);
+  EXPECT_EQ(info.labels_.size(), 2);

  float float2[2] = {1.0f, 2.0f};
  EXPECT_EQ(info.GetWeight(1), 1.0f)
@ -23,26 +23,26 @@ TEST(MetaInfo, GetSet) {
  EXPECT_EQ(info.GetWeight(1), 2.0f);

  uint32_t uint32_t2[2] = {1U, 2U};
-  EXPECT_EQ(info.base_margin.size(), 0);
+  EXPECT_EQ(info.base_margin_.size(), 0);
  info.SetInfo("base_margin", uint32_t2, xgboost::kUInt32, 2);
-  EXPECT_EQ(info.base_margin.size(), 2);
+  EXPECT_EQ(info.base_margin_.size(), 2);

  uint64_t uint64_t2[2] = {1U, 2U};
-  EXPECT_EQ(info.group_ptr.size(), 0);
+  EXPECT_EQ(info.group_ptr_.size(), 0);
  info.SetInfo("group", uint64_t2, xgboost::kUInt64, 2);
-  ASSERT_EQ(info.group_ptr.size(), 3);
-  EXPECT_EQ(info.group_ptr[2], 3);
+  ASSERT_EQ(info.group_ptr_.size(), 3);
+  EXPECT_EQ(info.group_ptr_[2], 3);

  info.Clear();
-  ASSERT_EQ(info.group_ptr.size(), 0);
+  ASSERT_EQ(info.group_ptr_.size(), 0);
 }

 TEST(MetaInfo, SaveLoadBinary) {
  xgboost::MetaInfo info;
  double vals[2] = {1.0, 2.0};
  info.SetInfo("label", vals, xgboost::kDouble, 2);
-  info.num_row = 2;
-  info.num_col = 1;
+  info.num_row_ = 2;
+  info.num_col_ = 1;

  std::string tmp_file = TempFileName();
  dmlc::Stream * fs = dmlc::Stream::Create(tmp_file.c_str(), "w");
@ -55,9 +55,9 @@ TEST(MetaInfo, SaveLoadBinary) {
  fs = dmlc::Stream::Create(tmp_file.c_str(), "r");
  xgboost::MetaInfo inforead;
  inforead.LoadBinary(fs);
-  EXPECT_EQ(inforead.labels, info.labels);
-  EXPECT_EQ(inforead.num_col, info.num_col);
-  EXPECT_EQ(inforead.num_row, info.num_row);
+  EXPECT_EQ(inforead.labels_, info.labels_);
+  EXPECT_EQ(inforead.num_col_, info.num_col_);
+  EXPECT_EQ(inforead.num_row_, info.num_row_);

  std::remove(tmp_file.c_str());
 }
--- a/tests/cpp/data/test_simple_csr_source.cc
+++ b/tests/cpp/data/test_simple_csr_source.cc
@ -14,9 +14,9 @@ TEST(SimpleCSRSource, SaveLoadBinary) {
  xgboost::DMatrix * dmat_read = xgboost::DMatrix::Load(tmp_binfile, true, false);
  std::remove(tmp_binfile.c_str());

-  EXPECT_EQ(dmat->info().num_col, dmat_read->info().num_col);
-  EXPECT_EQ(dmat->info().num_row, dmat_read->info().num_row);
-  EXPECT_EQ(dmat->info().num_row, dmat_read->info().num_row);
+  EXPECT_EQ(dmat->Info().num_col_, dmat_read->Info().num_col_);
+  EXPECT_EQ(dmat->Info().num_row_, dmat_read->Info().num_row_);
+  EXPECT_EQ(dmat->Info().num_row_, dmat_read->Info().num_row_);

  dmlc::DataIter<xgboost::RowBatch> * row_iter = dmat->RowIterator();
  dmlc::DataIter<xgboost::RowBatch> * row_iter_read = dmat_read->RowIterator();
--- a/tests/cpp/data/test_simple_dmatrix.cc
+++ b/tests/cpp/data/test_simple_dmatrix.cc
@ -10,10 +10,10 @@ TEST(SimpleDMatrix, MetaInfo) {
  std::remove(tmp_file.c_str());

  // Test the metadata that was parsed
-  EXPECT_EQ(dmat->info().num_row, 2);
-  EXPECT_EQ(dmat->info().num_col, 5);
-  EXPECT_EQ(dmat->info().num_nonzero, 6);
-  EXPECT_EQ(dmat->info().labels.size(), dmat->info().num_row);
+  EXPECT_EQ(dmat->Info().num_row_, 2);
+  EXPECT_EQ(dmat->Info().num_col_, 5);
+  EXPECT_EQ(dmat->Info().num_nonzero_, 6);
+  EXPECT_EQ(dmat->Info().labels_.size(), dmat->Info().num_row_);
 }

 TEST(SimpleDMatrix, RowAccess) {
@ -26,7 +26,7 @@ TEST(SimpleDMatrix, RowAccess) {
  long row_count = 0;
  row_iter->BeforeFirst();
  while (row_iter->Next()) row_count += row_iter->Value().size;
-  EXPECT_EQ(row_count, dmat->info().num_row);
+  EXPECT_EQ(row_count, dmat->Info().num_row_);
  // Test the data read into the first row
  row_iter->BeforeFirst();
  row_iter->Next();
@ -43,15 +43,15 @@ TEST(SimpleDMatrix, ColAccessWithoutBatches) {
  std::remove(tmp_file.c_str());

  // Unsorted column access
-  const std::vector<bool> enable(dmat->info().num_col, true);
+  const std::vector<bool> enable(dmat->Info().num_col_, true);
  EXPECT_EQ(dmat->HaveColAccess(false), false);
-  dmat->InitColAccess(enable, 1, dmat->info().num_row, false);
+  dmat->InitColAccess(enable, 1, dmat->Info().num_row_, false);
  dmat->InitColAccess(enable, 0, 0, false); // Calling it again should not change it
  ASSERT_EQ(dmat->HaveColAccess(false), true);

  // Sorted column access
  EXPECT_EQ(dmat->HaveColAccess(true), false);
-  dmat->InitColAccess(enable, 1, dmat->info().num_row, true);
+  dmat->InitColAccess(enable, 1, dmat->Info().num_row_, true);
  dmat->InitColAccess(enable, 0, 0, true); // Calling it again should not change it
  ASSERT_EQ(dmat->HaveColAccess(true), true);

@ -67,7 +67,7 @@ TEST(SimpleDMatrix, ColAccessWithoutBatches) {
  col_iter->BeforeFirst();
  while (col_iter->Next()) {
    num_col_batch += 1;
-    EXPECT_EQ(col_iter->Value().size, dmat->info().num_col)
+    EXPECT_EQ(col_iter->Value().size, dmat->Info().num_col_)
      << "Expected batch size = number of cells as #batches is 1.";
    for (int i = 0; i < static_cast<int>(col_iter->Value().size); ++i) {
      EXPECT_EQ(col_iter->Value()[i].length, dmat->GetColSize(i))
@ -94,7 +94,7 @@ TEST(SimpleDMatrix, ColAccessWithBatches) {
  std::remove(tmp_file.c_str());

  // Unsorted column access
-  const std::vector<bool> enable(dmat->info().num_col, true);
+  const std::vector<bool> enable(dmat->Info().num_col_, true);
  EXPECT_EQ(dmat->HaveColAccess(false), false);
  dmat->InitColAccess(enable, 1, 1, false);
  dmat->InitColAccess(enable, 0, 0, false); // Calling it again should not change it
@ -118,20 +118,20 @@ TEST(SimpleDMatrix, ColAccessWithBatches) {
  col_iter->BeforeFirst();
  while (col_iter->Next()) {
    num_col_batch += 1;
-    EXPECT_EQ(col_iter->Value().size, dmat->info().num_col)
+    EXPECT_EQ(col_iter->Value().size, dmat->Info().num_col_)
      << "Expected batch size = num_cols as max_row_perbatch is 1.";
    for (int i = 0; i < static_cast<int>(col_iter->Value().size); ++i) {
      EXPECT_LE(col_iter->Value()[i].length, 1)
        << "Expected length of each colbatch <=1 as max_row_perbatch is 1.";
    }
  }
-  EXPECT_EQ(num_col_batch, dmat->info().num_row)
+  EXPECT_EQ(num_col_batch, dmat->Info().num_row_)
    << "Expected num batches = num_rows as max_row_perbatch is 1";
  col_iter = nullptr;

  // The iterator feats should ignore any numbers larger than the num_col
  std::vector<xgboost::bst_uint> sub_feats = {
-    4, 3, static_cast<unsigned int>(dmat->info().num_col + 1)};
+    4, 3, static_cast<unsigned int>(dmat->Info().num_col_ + 1)};
  dmlc::DataIter<xgboost::ColBatch> * sub_col_iter = dmat->ColIterator(sub_feats);
  // Loop over the batches and assert the data is as expected
  sub_col_iter->BeforeFirst();
--- a/tests/cpp/data/test_sparse_page_dmatrix.cc
+++ b/tests/cpp/data/test_sparse_page_dmatrix.cc
@ -12,10 +12,10 @@ TEST(SparsePageDMatrix, MetaInfo) {
  EXPECT_TRUE(FileExists(tmp_file + ".cache"));

  // Test the metadata that was parsed
-  EXPECT_EQ(dmat->info().num_row, 2);
-  EXPECT_EQ(dmat->info().num_col, 5);
-  EXPECT_EQ(dmat->info().num_nonzero, 6);
-  EXPECT_EQ(dmat->info().labels.size(), dmat->info().num_row);
+  EXPECT_EQ(dmat->Info().num_row_, 2);
+  EXPECT_EQ(dmat->Info().num_col_, 5);
+  EXPECT_EQ(dmat->Info().num_nonzero_, 6);
+  EXPECT_EQ(dmat->Info().labels_.size(), dmat->Info().num_row_);

  // Clean up of external memory files
  std::remove((tmp_file + ".cache").c_str());
@ -34,7 +34,7 @@ TEST(SparsePageDMatrix, RowAccess) {
  long row_count = 0;
  row_iter->BeforeFirst();
  while (row_iter->Next()) row_count += row_iter->Value().size;
-  EXPECT_EQ(row_count, dmat->info().num_row);
+  EXPECT_EQ(row_count, dmat->Info().num_row_);
  // Test the data read into the first row
  row_iter->BeforeFirst();
  row_iter->Next();
@ -57,7 +57,7 @@ TEST(SparsePageDMatrix, ColAcess) {
  EXPECT_FALSE(FileExists(tmp_file + ".cache.col.page"));

  EXPECT_EQ(dmat->HaveColAccess(true), false);
-  const std::vector<bool> enable(dmat->info().num_col, true);
+  const std::vector<bool> enable(dmat->Info().num_col_, true);
  dmat->InitColAccess(enable, 1, 1, true); // Max 1 row per patch
  ASSERT_EQ(dmat->HaveColAccess(true), true);
  EXPECT_TRUE(FileExists(tmp_file + ".cache.col.page"));
@ -73,10 +73,10 @@ TEST(SparsePageDMatrix, ColAcess) {
  col_iter->BeforeFirst();
  while (col_iter->Next()) {
    num_col_batch += 1;
-    EXPECT_EQ(col_iter->Value().size, dmat->info().num_col)
+    EXPECT_EQ(col_iter->Value().size, dmat->Info().num_col_)
      << "Expected batch size to be same as num_cols as max_row_perbatch is 1.";
  }
-  EXPECT_EQ(num_col_batch, dmat->info().num_row)
+  EXPECT_EQ(num_col_batch, dmat->Info().num_row_)
    << "Expected num batches to be same as num_rows as max_row_perbatch is 1";
  col_iter = nullptr;

--- a/tests/cpp/helpers.cc
+++ b/tests/cpp/helpers.cc
@ -34,17 +34,17 @@ void CheckObjFunction(xgboost::ObjFunction * obj,
                      std::vector<xgboost::bst_float> out_grad,
                      std::vector<xgboost::bst_float> out_hess) {
  xgboost::MetaInfo info;
-  info.num_row = labels.size();
-  info.labels = labels;
-  info.weights = weights;
+  info.num_row_ = labels.size();
+  info.labels_ = labels;
+  info.weights_ = weights;

  xgboost::HostDeviceVector<xgboost::bst_float> in_preds(preds);

-  xgboost::HostDeviceVector<xgboost::bst_gpair> out_gpair;
+  xgboost::HostDeviceVector<xgboost::GradientPair> out_gpair;
  obj->GetGradient(&in_preds, info, 1, &out_gpair);
-  std::vector<xgboost::bst_gpair>& gpair = out_gpair.data_h();
+  std::vector<xgboost::GradientPair>& gpair = out_gpair.HostVector();

-  ASSERT_EQ(gpair.size(), in_preds.size());
+  ASSERT_EQ(gpair.size(), in_preds.Size());
  for (int i = 0; i < static_cast<int>(gpair.size()); ++i) {
    EXPECT_NEAR(gpair[i].GetGrad(), out_grad[i], 0.01)
      << "Unexpected grad for pred=" << preds[i] << " label=" << labels[i]
@ -60,9 +60,9 @@ xgboost::bst_float GetMetricEval(xgboost::Metric * metric,
                                 std::vector<xgboost::bst_float> labels,
                                 std::vector<xgboost::bst_float> weights) {
  xgboost::MetaInfo info;
-  info.num_row = labels.size();
-  info.labels = labels;
-  info.weights = weights;
+  info.num_row_ = labels.size();
+  info.labels_ = labels;
+  info.weights_ = weights;
  return metric->Eval(preds, info, false);
 }

--- a/tests/cpp/linear/test_linear.cc
+++ b/tests/cpp/linear/test_linear.cc
@ -8,15 +8,15 @@ typedef std::pair<std::string, std::string> arg;
 TEST(Linear, shotgun) {
  typedef std::pair<std::string, std::string> arg;
  auto mat = CreateDMatrix(10, 10, 0);
-  std::vector<bool> enabled(mat->info().num_col, true);
+  std::vector<bool> enabled(mat->Info().num_col_, true);
  mat->InitColAccess(enabled, 1.0f, 1 << 16, false);
  auto updater = std::unique_ptr<xgboost::LinearUpdater>(
      xgboost::LinearUpdater::Create("shotgun"));
  updater->Init({{"eta", "1."}});
-  std::vector<xgboost::bst_gpair> gpair(mat->info().num_row,
-                                        xgboost::bst_gpair(-5, 1.0));
+  std::vector<xgboost::GradientPair> gpair(mat->Info().num_row_,
+                                        xgboost::GradientPair(-5, 1.0));
  xgboost::gbm::GBLinearModel model;
-  model.param.num_feature = mat->info().num_col;
+  model.param.num_feature = mat->Info().num_col_;
  model.param.num_output_group = 1;
  model.LazyInitModel();
  updater->Update(&gpair, mat.get(), &model, gpair.size());
@ -27,15 +27,15 @@ TEST(Linear, shotgun) {
 TEST(Linear, coordinate) {
  typedef std::pair<std::string, std::string> arg;
  auto mat = CreateDMatrix(10, 10, 0);
-  std::vector<bool> enabled(mat->info().num_col, true);
+  std::vector<bool> enabled(mat->Info().num_col_, true);
  mat->InitColAccess(enabled, 1.0f, 1 << 16, false);
  auto updater = std::unique_ptr<xgboost::LinearUpdater>(
      xgboost::LinearUpdater::Create("coord_descent"));
  updater->Init({});
-  std::vector<xgboost::bst_gpair> gpair(mat->info().num_row,
-                                        xgboost::bst_gpair(-5, 1.0));
+  std::vector<xgboost::GradientPair> gpair(mat->Info().num_row_,
+                                        xgboost::GradientPair(-5, 1.0));
  xgboost::gbm::GBLinearModel model;
-  model.param.num_feature = mat->info().num_col;
+  model.param.num_feature = mat->Info().num_col_;
  model.param.num_output_group = 1;
  model.LazyInitModel();
  updater->Update(&gpair, mat.get(), &model, gpair.size());
--- a/tests/cpp/objective/test_regression_obj.cc
+++ b/tests/cpp/objective/test_regression_obj.cc
@ -49,8 +49,8 @@ TEST(Objective, LogisticRegressionBasic) {
  xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
  std::vector<xgboost::bst_float> out_preds = {0.5f, 0.524f, 0.622f, 0.710f, 0.731f};
  obj->PredTransform(&io_preds);
-  auto& preds = io_preds.data_h();
-  for (int i = 0; i < static_cast<int>(io_preds.size()); ++i) {
+  auto& preds = io_preds.HostVector();
+  for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
    EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
  }
 }
@ -98,8 +98,8 @@ TEST(Objective, PoissonRegressionBasic) {
  xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
  std::vector<xgboost::bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
  obj->PredTransform(&io_preds);
-  auto& preds = io_preds.data_h();
-  for (int i = 0; i < static_cast<int>(io_preds.size()); ++i) {
+  auto& preds = io_preds.HostVector();
+  for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
    EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
  }
 }
@ -134,8 +134,8 @@ TEST(Objective, GammaRegressionBasic) {
  xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
  std::vector<xgboost::bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
  obj->PredTransform(&io_preds);
-  auto& preds = io_preds.data_h();
-  for (int i = 0; i < static_cast<int>(io_preds.size()); ++i) {
+  auto& preds = io_preds.HostVector();
+  for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
    EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
  }
 }
@ -171,8 +171,8 @@ TEST(Objective, TweedieRegressionBasic) {
  xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
  std::vector<xgboost::bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
  obj->PredTransform(&io_preds);
-  auto& preds = io_preds.data_h();
-  for (int i = 0; i < static_cast<int>(io_preds.size()); ++i) {
+  auto& preds = io_preds.HostVector();
+  for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
    EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
  }
 }
--- a/tests/cpp/objective/test_regression_obj_gpu.cu
+++ b/tests/cpp/objective/test_regression_obj_gpu.cu
@ -51,8 +51,8 @@ TEST(Objective, GPULogisticRegressionBasic) {
  xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
  std::vector<xgboost::bst_float> out_preds = {0.5f, 0.524f, 0.622f, 0.710f, 0.731f};
  obj->PredTransform(&io_preds);
-  auto& preds = io_preds.data_h();
-  for (int i = 0; i < static_cast<int>(io_preds.size()); ++i) {
+  auto& preds = io_preds.HostVector();
+  for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
    EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
  }
 }
--- a/tests/cpp/predictor/test_cpu_predictor.cc
+++ b/tests/cpp/predictor/test_cpu_predictor.cc
@ -11,8 +11,8 @@ TEST(cpu_predictor, Test) {
  std::vector<std::unique_ptr<RegTree>> trees;
  trees.push_back(std::unique_ptr<RegTree>(new RegTree));
  trees.back()->InitModel();
-  (*trees.back())[0].set_leaf(1.5f);
-  (*trees.back()).stat(0).sum_hess = 1.0f;
+  (*trees.back())[0].SetLeaf(1.5f);
+  (*trees.back()).Stat(0).sum_hess = 1.0f;
  gbm::GBTreeModel model(0.5);
  model.CommitModel(std::move(trees), 0);
  model.param.num_output_group = 1;
@ -26,8 +26,8 @@ TEST(cpu_predictor, Test) {
  // Test predict batch
  HostDeviceVector<float> out_predictions;
  cpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
-  std::vector<float>& out_predictions_h = out_predictions.data_h();
-  for (int i = 0; i < out_predictions.size(); i++) {
+  std::vector<float>& out_predictions_h = out_predictions.HostVector();
+  for (int i = 0; i < out_predictions.Size(); i++) {
    ASSERT_EQ(out_predictions_h[i], 1.5);
  }

--- a/tests/cpp/predictor/test_gpu_predictor.cu
+++ b/tests/cpp/predictor/test_gpu_predictor.cu
@ -21,8 +21,8 @@ TEST(gpu_predictor, Test) {
  std::vector<std::unique_ptr<RegTree>> trees;
  trees.push_back(std::unique_ptr<RegTree>(new RegTree()));
  trees.back()->InitModel();
-  (*trees.back())[0].set_leaf(1.5f);
-  (*trees.back()).stat(0).sum_hess = 1.0f;
+  (*trees.back())[0].SetLeaf(1.5f);
+  (*trees.back()).Stat(0).sum_hess = 1.0f;
  gbm::GBTreeModel model(0.5);
  model.CommitModel(std::move(trees), 0);
  model.param.num_output_group = 1;
@ -37,10 +37,10 @@ TEST(gpu_predictor, Test) {
  HostDeviceVector<float> cpu_out_predictions;
  gpu_predictor->PredictBatch(dmat.get(), &gpu_out_predictions, model, 0);
  cpu_predictor->PredictBatch(dmat.get(), &cpu_out_predictions, model, 0);
-  std::vector<float>& gpu_out_predictions_h = gpu_out_predictions.data_h();
-  std::vector<float>& cpu_out_predictions_h = cpu_out_predictions.data_h();
+  std::vector<float>& gpu_out_predictions_h = gpu_out_predictions.HostVector();
+  std::vector<float>& cpu_out_predictions_h = cpu_out_predictions.HostVector();
  float abs_tolerance = 0.001;
-  for (int i = 0; i < gpu_out_predictions.size(); i++) {
+  for (int i = 0; i < gpu_out_predictions.Size(); i++) {
    ASSERT_LT(std::abs(gpu_out_predictions_h[i] - cpu_out_predictions_h[i]),
              abs_tolerance);
  }
--- a/tests/cpp/tree/test_gpu_hist.cu
+++ b/tests/cpp/tree/test_gpu_hist.cu
@ -29,7 +29,7 @@ TEST(gpu_hist_experimental, TestSparseShard) {

  ASSERT_LT(shard.row_stride, columns);

-  auto host_gidx_buffer = shard.gidx_buffer.as_vector();
+  auto host_gidx_buffer = shard.gidx_buffer.AsVector();

  common::CompressedIterator<uint32_t> gidx(host_gidx_buffer.data(),
                                            hmat.row_ptr.back() + 1);
@ -64,7 +64,7 @@ TEST(gpu_hist_experimental, TestDenseShard) {

  ASSERT_EQ(shard.row_stride, columns);

-  auto host_gidx_buffer = shard.gidx_buffer.as_vector();
+  auto host_gidx_buffer = shard.gidx_buffer.AsVector();

  common::CompressedIterator<uint32_t> gidx(host_gidx_buffer.data(),
                                            hmat.row_ptr.back() + 1);
--- a/tests/cpp/tree/test_param.cc
+++ b/tests/cpp/tree/test_param.cc
@ -89,8 +89,8 @@ TEST(Param, SplitEntry) {
  xgboost::tree::SplitEntry se3;
  se3.Update(2, 101, 0, false);
  xgboost::tree::SplitEntry::Reduce(se2, se3);
-  EXPECT_EQ(se2.split_index(), 101);
-  EXPECT_FALSE(se2.default_left());
+  EXPECT_EQ(se2.SplitIndex(), 101);
+  EXPECT_FALSE(se2.DefaultLeft());

  EXPECT_TRUE(se1.NeedReplace(3, 1));
 }
--- a/tests/travis/run_test.sh
+++ b/tests/travis/run_test.sh
@ -10,6 +10,21 @@ if [ ${TASK} == "lint" ]; then
    echo "----------------------------"
    (cat logclean.txt|grep warning) && exit -1
    (cat logclean.txt|grep error) && exit -1
+
+    # Rename cuda files for static analysis
+    for file in  $(find src -name '*.cu'); do
+        cp "$file" "${file/.cu/_tmp.cc}"
+    done
+
+    header_filter='(xgboost\/src|xgboost\/include)'
+    for filename in $(find src -name '*.cc'); do
+	    clang-tidy $filename -header-filter=$header_filter -- -Iinclude -Idmlc-core/include -Irabit/include -std=c++11 >> logtidy.txt
+    done
+    echo "---------clang-tidy log----------"
+    cat logtidy.txt
+    echo "----------------------------"
+    # Fail only on warnings related to XGBoost source files
+    (cat logtidy.txt|grep -E 'dmlc/xgboost.*warning'|grep -v dmlc-core) && exit -1
    exit 0
 fi