diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 000000000..6000c08a6 --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,22 @@ +Checks: 'modernize-*,-modernize-make-*,-modernize-raw-string-literal,google-*,-google-default-arguments,-clang-diagnostic-#pragma-messages,readability-identifier-naming' +CheckOptions: + - { key: readability-identifier-naming.ClassCase, value: CamelCase } + - { key: readability-identifier-naming.StructCase, value: CamelCase } + - { key: readability-identifier-naming.TypeAliasCase, value: CamelCase } + - { key: readability-identifier-naming.TypedefCase, value: CamelCase } + - { key: readability-identifier-naming.TypeTemplateParameterCase, value: CamelCase } + - { key: readability-identifier-naming.LocalVariableCase, value: lower_case } + - { key: readability-identifier-naming.MemberCase, value: lower_case } + - { key: readability-identifier-naming.PrivateMemberSuffix, value: '_' } + - { key: readability-identifier-naming.ProtectedMemberSuffix, value: '_' } + - { key: readability-identifier-naming.EnumCase, value: CamelCase } + - { key: readability-identifier-naming.EnumConstant, value: CamelCase } + - { key: readability-identifier-naming.EnumConstantPrefix, value: k } + - { key: readability-identifier-naming.GlobalConstantCase, value: CamelCase } + - { key: readability-identifier-naming.GlobalConstantPrefix, value: k } + - { key: readability-identifier-naming.StaticConstantCase, value: CamelCase } + - { key: readability-identifier-naming.StaticConstantPrefix, value: k } + - { key: readability-identifier-naming.ConstexprVariableCase, value: CamelCase } + - { key: readability-identifier-naming.ConstexprVariablePrefix, value: k } + - { key: readability-identifier-naming.FunctionCase, value: CamelCase } + - { key: readability-identifier-naming.NamespaceCase, value: lower_case } diff --git a/.travis.yml b/.travis.yml index 19216a6b2..d607f8d6e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -44,10 +44,12 @@ matrix: addons: apt: sources: + - llvm-toolchain-trusty-5.0 - ubuntu-toolchain-r-test - george-edison55-precise-backports packages: - - cmake + - clang + - clang-tidy-5.0 - cmake-data - doxygen - wget diff --git a/include/xgboost/base.h b/include/xgboost/base.h index 3d346b43e..3a2a10dbd 100644 --- a/include/xgboost/base.h +++ b/include/xgboost/base.h @@ -81,20 +81,19 @@ namespace xgboost { * \brief unsigned integer type used in boost, * used for feature index and row index. */ -typedef uint32_t bst_uint; -typedef int32_t bst_int; +using bst_uint = uint32_t; // NOLINT +using bst_int = int32_t; // NOLINT /*! \brief long integers */ typedef uint64_t bst_ulong; // NOLINT(*) /*! \brief float type, used for storing statistics */ -typedef float bst_float; - +using bst_float = float; // NOLINT namespace detail { /*! \brief Implementation of gradient statistics pair. Template specialisation * may be used to overload different gradients types e.g. low precision, high * precision, integer, floating point. */ template -class bst_gpair_internal { +class GradientPairInternal { /*! \brief gradient statistics */ T grad_; /*! \brief second order gradient statistics */ @@ -104,23 +103,23 @@ class bst_gpair_internal { XGBOOST_DEVICE void SetHess(float h) { hess_ = h; } public: - typedef T value_t; + using ValueT = T; - XGBOOST_DEVICE bst_gpair_internal() : grad_(0), hess_(0) {} + XGBOOST_DEVICE GradientPairInternal() : grad_(0), hess_(0) {} - XGBOOST_DEVICE bst_gpair_internal(float grad, float hess) { + XGBOOST_DEVICE GradientPairInternal(float grad, float hess) { SetGrad(grad); SetHess(hess); } // Copy constructor if of same value type - XGBOOST_DEVICE bst_gpair_internal(const bst_gpair_internal &g) - : grad_(g.grad_), hess_(g.hess_) {} + XGBOOST_DEVICE GradientPairInternal(const GradientPairInternal &g) + : grad_(g.grad_), hess_(g.hess_) {} // NOLINT // Copy constructor if different value type - use getters and setters to // perform conversion template - XGBOOST_DEVICE bst_gpair_internal(const bst_gpair_internal &g) { + XGBOOST_DEVICE explicit GradientPairInternal(const GradientPairInternal &g) { SetGrad(g.GetGrad()); SetHess(g.GetHess()); } @@ -128,85 +127,85 @@ class bst_gpair_internal { XGBOOST_DEVICE float GetGrad() const { return grad_; } XGBOOST_DEVICE float GetHess() const { return hess_; } - XGBOOST_DEVICE bst_gpair_internal &operator+=( - const bst_gpair_internal &rhs) { + XGBOOST_DEVICE GradientPairInternal &operator+=( + const GradientPairInternal &rhs) { grad_ += rhs.grad_; hess_ += rhs.hess_; return *this; } - XGBOOST_DEVICE bst_gpair_internal operator+( - const bst_gpair_internal &rhs) const { - bst_gpair_internal g; + XGBOOST_DEVICE GradientPairInternal operator+( + const GradientPairInternal &rhs) const { + GradientPairInternal g; g.grad_ = grad_ + rhs.grad_; g.hess_ = hess_ + rhs.hess_; return g; } - XGBOOST_DEVICE bst_gpair_internal &operator-=( - const bst_gpair_internal &rhs) { + XGBOOST_DEVICE GradientPairInternal &operator-=( + const GradientPairInternal &rhs) { grad_ -= rhs.grad_; hess_ -= rhs.hess_; return *this; } - XGBOOST_DEVICE bst_gpair_internal operator-( - const bst_gpair_internal &rhs) const { - bst_gpair_internal g; + XGBOOST_DEVICE GradientPairInternal operator-( + const GradientPairInternal &rhs) const { + GradientPairInternal g; g.grad_ = grad_ - rhs.grad_; g.hess_ = hess_ - rhs.hess_; return g; } - XGBOOST_DEVICE bst_gpair_internal(int value) { - *this = bst_gpair_internal(static_cast(value), + XGBOOST_DEVICE explicit GradientPairInternal(int value) { + *this = GradientPairInternal(static_cast(value), static_cast(value)); } friend std::ostream &operator<<(std::ostream &os, - const bst_gpair_internal &g) { + const GradientPairInternal &g) { os << g.GetGrad() << "/" << g.GetHess(); return os; } }; template<> -inline XGBOOST_DEVICE float bst_gpair_internal::GetGrad() const { +inline XGBOOST_DEVICE float GradientPairInternal::GetGrad() const { return grad_ * 1e-4f; } template<> -inline XGBOOST_DEVICE float bst_gpair_internal::GetHess() const { +inline XGBOOST_DEVICE float GradientPairInternal::GetHess() const { return hess_ * 1e-4f; } template<> -inline XGBOOST_DEVICE void bst_gpair_internal::SetGrad(float g) { +inline XGBOOST_DEVICE void GradientPairInternal::SetGrad(float g) { grad_ = static_cast(std::round(g * 1e4)); } template<> -inline XGBOOST_DEVICE void bst_gpair_internal::SetHess(float h) { +inline XGBOOST_DEVICE void GradientPairInternal::SetHess(float h) { hess_ = static_cast(std::round(h * 1e4)); } } // namespace detail /*! \brief gradient statistics pair usually needed in gradient boosting */ -typedef detail::bst_gpair_internal bst_gpair; +using GradientPair = detail::GradientPairInternal; /*! \brief High precision gradient statistics pair */ -typedef detail::bst_gpair_internal bst_gpair_precise; +using GradientPairPrecise = detail::GradientPairInternal; /*! \brief High precision gradient statistics pair with integer backed * storage. Operators are associative where floating point versions are not * associative. */ -typedef detail::bst_gpair_internal bst_gpair_integer; +using GradientPairInteger = detail::GradientPairInternal; /*! \brief small eps gap for minimum split decision. */ -const bst_float rt_eps = 1e-6f; +const bst_float kRtEps = 1e-6f; /*! \brief define unsigned long for openmp loop */ -typedef dmlc::omp_ulong omp_ulong; +using omp_ulong = dmlc::omp_ulong; // NOLINT /*! \brief define unsigned int for openmp loop */ -typedef dmlc::omp_uint bst_omp_uint; +using bst_omp_uint = dmlc::omp_uint; // NOLINT /*! * \brief define compatible keywords in g++ diff --git a/include/xgboost/c_api.h b/include/xgboost/c_api.h index 02da26b05..a375ffb0c 100644 --- a/include/xgboost/c_api.h +++ b/include/xgboost/c_api.h @@ -30,16 +30,16 @@ typedef uint64_t bst_ulong; // NOLINT(*) /*! \brief handle to DMatrix */ -typedef void *DMatrixHandle; +typedef void *DMatrixHandle; // NOLINT(*) /*! \brief handle to Booster */ -typedef void *BoosterHandle; +typedef void *BoosterHandle; // NOLINT(*) /*! \brief handle to a data iterator */ -typedef void *DataIterHandle; +typedef void *DataIterHandle; // NOLINT(*) /*! \brief handle to a internal data holder. */ -typedef void *DataHolderHandle; +typedef void *DataHolderHandle; // NOLINT(*) /*! \brief Mini batch used in XGBoost Data Iteration */ -typedef struct { +typedef struct { // NOLINT(*) /*! \brief number of rows in the minibatch */ size_t size; /*! \brief row pointer to the rows in the data */ @@ -66,7 +66,7 @@ typedef struct { * \param handle The handle to the callback. * \param batch The data content to be set. */ -XGB_EXTERN_C typedef int XGBCallbackSetData( +XGB_EXTERN_C typedef int XGBCallbackSetData( // NOLINT(*) DataHolderHandle handle, XGBoostBatchCSR batch); /*! @@ -80,9 +80,8 @@ XGB_EXTERN_C typedef int XGBCallbackSetData( * \param set_function_handle The handle to be passed to set function. * \return 0 if we are reaching the end and batch is not returned. */ -XGB_EXTERN_C typedef int XGBCallbackDataIterNext( - DataIterHandle data_handle, - XGBCallbackSetData* set_function, +XGB_EXTERN_C typedef int XGBCallbackDataIterNext( // NOLINT(*) + DataIterHandle data_handle, XGBCallbackSetData *set_function, DataHolderHandle set_function_handle); /*! @@ -216,11 +215,9 @@ XGB_DLL int XGDMatrixCreateFromMat(const float *data, * \param nthread number of threads (up to maximum cores available, if <=0 use all cores) * \return 0 when success, -1 when failure happens */ -XGB_DLL int XGDMatrixCreateFromMat_omp(const float *data, - bst_ulong nrow, - bst_ulong ncol, - float missing, - DMatrixHandle *out, +XGB_DLL int XGDMatrixCreateFromMat_omp(const float *data, // NOLINT + bst_ulong nrow, bst_ulong ncol, + float missing, DMatrixHandle *out, int nthread); /*! * \brief create a new dmatrix from sliced content of existing matrix diff --git a/include/xgboost/data.h b/include/xgboost/data.h index 24a3a1f3f..cc7904583 100644 --- a/include/xgboost/data.h +++ b/include/xgboost/data.h @@ -30,44 +30,45 @@ enum DataType { /*! * \brief Meta information about dataset, always sit in memory. */ -struct MetaInfo { +class MetaInfo { + public: /*! \brief number of rows in the data */ - uint64_t num_row; + uint64_t num_row_{0}; /*! \brief number of columns in the data */ - uint64_t num_col; + uint64_t num_col_{0}; /*! \brief number of nonzero entries in the data */ - uint64_t num_nonzero; + uint64_t num_nonzero_{0}; /*! \brief label of each instance */ - std::vector labels; + std::vector labels_; /*! * \brief specified root index of each instance, * can be used for multi task setting */ - std::vector root_index; + std::vector root_index_; /*! * \brief the index of begin and end of a group * needed when the learning task is ranking. */ - std::vector group_ptr; + std::vector group_ptr_; /*! \brief weights of each instance, optional */ - std::vector weights; + std::vector weights_; /*! * \brief initialized margins, * if specified, xgboost will start from this init margin * can be used to specify initial prediction to boost from. */ - std::vector base_margin; + std::vector base_margin_; /*! \brief version flag, used to check version of this info */ static const int kVersion = 1; /*! \brief default constructor */ - MetaInfo() : num_row(0), num_col(0), num_nonzero(0) {} + MetaInfo() = default; /*! * \brief Get weight of each instances. * \param i Instance index. * \return The weight. */ inline bst_float GetWeight(size_t i) const { - return weights.size() != 0 ? weights[i] : 1.0f; + return weights_.size() != 0 ? weights_[i] : 1.0f; } /*! * \brief Get the root index of i-th instance. @@ -75,20 +76,20 @@ struct MetaInfo { * \return The pre-defined root index of i-th instance. */ inline unsigned GetRoot(size_t i) const { - return root_index.size() != 0 ? root_index[i] : 0U; + return root_index_.size() != 0 ? root_index_[i] : 0U; } /*! \brief get sorted indexes (argsort) of labels by absolute value (used by cox loss) */ inline const std::vector& LabelAbsSort() const { - if (label_order_cache.size() == labels.size()) { - return label_order_cache; + if (label_order_cache_.size() == labels_.size()) { + return label_order_cache_; } - label_order_cache.resize(labels.size()); - std::iota(label_order_cache.begin(), label_order_cache.end(), 0); - const auto l = labels; - XGBOOST_PARALLEL_SORT(label_order_cache.begin(), label_order_cache.end(), + label_order_cache_.resize(labels_.size()); + std::iota(label_order_cache_.begin(), label_order_cache_.end(), 0); + const auto l = labels_; + XGBOOST_PARALLEL_SORT(label_order_cache_.begin(), label_order_cache_.end(), [&l](size_t i1, size_t i2) {return std::abs(l[i1]) < std::abs(l[i2]);}); - return label_order_cache; + return label_order_cache_; } /*! \brief clear all the information */ void Clear(); @@ -113,7 +114,7 @@ struct MetaInfo { private: /*! \brief argsort of labels */ - mutable std::vector label_order_cache; + mutable std::vector label_order_cache_; }; /*! \brief read-only sparse instance batch in CSR format */ @@ -125,7 +126,7 @@ struct SparseBatch { /*! \brief feature value */ bst_float fvalue; /*! \brief default constructor */ - Entry() {} + Entry() = default; /*! * \brief constructor with index and value * \param index The feature or row index. @@ -141,11 +142,11 @@ struct SparseBatch { /*! \brief an instance of sparse vector in the batch */ struct Inst { /*! \brief pointer to the elements*/ - const Entry *data; + const Entry *data{nullptr}; /*! \brief length of the instance */ - bst_uint length; + bst_uint length{0}; /*! \brief constructor */ - Inst() : data(0), length(0) {} + Inst() = default; Inst(const Entry *data, bst_uint length) : data(data), length(length) {} /*! \brief get i-th pair in the sparse vector*/ inline const Entry& operator[](size_t i) const { @@ -167,7 +168,7 @@ struct RowBatch : public SparseBatch { const Entry *data_ptr; /*! \brief get i-th row from the batch */ inline Inst operator[](size_t i) const { - return Inst(data_ptr + ind_ptr[i], static_cast(ind_ptr[i + 1] - ind_ptr[i])); + return {data_ptr + ind_ptr[i], static_cast(ind_ptr[i + 1] - ind_ptr[i])}; } }; @@ -206,16 +207,16 @@ class DataSource : public dmlc::DataIter { * \brief A vector-like structure to represent set of rows. * But saves the memory when all rows are in the set (common case in xgb) */ -struct RowSet { +class RowSet { public: /*! \return i-th row index */ inline bst_uint operator[](size_t i) const; /*! \return the size of the set. */ - inline size_t size() const; + inline size_t Size() const; /*! \brief push the index back to the set */ - inline void push_back(bst_uint i); + inline void PushBack(bst_uint i); /*! \brief clear the set */ - inline void clear(); + inline void Clear(); /*! * \brief save rowset to file. * \param fo The file to be saved. @@ -228,11 +229,11 @@ struct RowSet { */ inline bool Load(dmlc::Stream* fi); /*! \brief constructor */ - RowSet() : size_(0) {} + RowSet() = default; private: /*! \brief The internal data structure of size */ - uint64_t size_; + uint64_t size_{0}; /*! \brief The internal data structure of row set if not all*/ std::vector rows_; }; @@ -250,11 +251,11 @@ struct RowSet { class DMatrix { public: /*! \brief default constructor */ - DMatrix() : cache_learner_ptr_(nullptr) {} + DMatrix() = default; /*! \brief meta information of the dataset */ - virtual MetaInfo& info() = 0; + virtual MetaInfo& Info() = 0; /*! \brief meta information of the dataset */ - virtual const MetaInfo& info() const = 0; + virtual const MetaInfo& Info() const = 0; /*! * \brief get the row iterator, reset to beginning position * \note Only either RowIterator or column Iterator can be active. @@ -291,9 +292,9 @@ class DMatrix { /*! \brief get column density */ virtual float GetColDensity(size_t cidx) const = 0; /*! \return reference of buffered rowset, in column access */ - virtual const RowSet& buffered_rowset() const = 0; + virtual const RowSet& BufferedRowset() const = 0; /*! \brief virtual destructor */ - virtual ~DMatrix() {} + virtual ~DMatrix() = default; /*! * \brief Save DMatrix to local file. * The saved file only works for non-sharded dataset(single machine training). @@ -343,7 +344,7 @@ class DMatrix { // allow learner class to access this field. friend class LearnerImpl; /*! \brief public field to back ref cached matrix. */ - LearnerImpl* cache_learner_ptr_; + LearnerImpl* cache_learner_ptr_{nullptr}; }; // implementation of inline functions @@ -351,15 +352,15 @@ inline bst_uint RowSet::operator[](size_t i) const { return rows_.size() == 0 ? static_cast(i) : rows_[i]; } -inline size_t RowSet::size() const { +inline size_t RowSet::Size() const { return size_; } -inline void RowSet::clear() { +inline void RowSet::Clear() { rows_.clear(); size_ = 0; } -inline void RowSet::push_back(bst_uint i) { +inline void RowSet::PushBack(bst_uint i) { if (rows_.size() == 0) { if (i == size_) { ++size_; return; diff --git a/include/xgboost/feature_map.h b/include/xgboost/feature_map.h index 7b7b2da35..2ccc16530 100644 --- a/include/xgboost/feature_map.h +++ b/include/xgboost/feature_map.h @@ -45,7 +45,7 @@ class FeatureMap { */ inline void PushBack(int fid, const char *fname, const char *ftype) { CHECK_EQ(fid, static_cast(names_.size())); - names_.push_back(std::string(fname)); + names_.emplace_back(fname); types_.push_back(GetType(ftype)); } /*! \brief clear the feature map */ @@ -54,11 +54,11 @@ class FeatureMap { types_.clear(); } /*! \return number of known features */ - inline size_t size() const { + inline size_t Size() const { return names_.size(); } /*! \return name of specific feature */ - inline const char* name(size_t idx) const { + inline const char* Name(size_t idx) const { CHECK_LT(idx, names_.size()) << "FeatureMap feature index exceed bound"; return names_[idx].c_str(); } @@ -75,7 +75,7 @@ class FeatureMap { * \return The translated type. */ inline static Type GetType(const char* tname) { - using namespace std; + using std::strcmp; if (!strcmp("i", tname)) return kIndicator; if (!strcmp("q", tname)) return kQuantitive; if (!strcmp("int", tname)) return kInteger; diff --git a/include/xgboost/gbm.h b/include/xgboost/gbm.h index 867fee6a9..5ec57d765 100644 --- a/include/xgboost/gbm.h +++ b/include/xgboost/gbm.h @@ -27,7 +27,7 @@ namespace xgboost { class GradientBooster { public: /*! \brief virtual destructor */ - virtual ~GradientBooster() {} + virtual ~GradientBooster() = default; /*! * \brief set configuration from pair iterators. * \param begin The beginning iterator. @@ -69,7 +69,7 @@ class GradientBooster { * the booster may change content of gpair */ virtual void DoBoost(DMatrix* p_fmat, - HostDeviceVector* in_gpair, + HostDeviceVector* in_gpair, ObjFunction* obj = nullptr) = 0; /*! diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h index 3981940d2..3f79a637c 100644 --- a/include/xgboost/learner.h +++ b/include/xgboost/learner.h @@ -37,7 +37,7 @@ namespace xgboost { class Learner : public rabit::Serializable { public: /*! \brief virtual destructor */ - virtual ~Learner() {} + ~Learner() override = default; /*! * \brief set configuration from pair iterators. * \param begin The beginning iterator. @@ -62,12 +62,12 @@ class Learner : public rabit::Serializable { * \brief load model from stream * \param fi input stream. */ - virtual void Load(dmlc::Stream* fi) = 0; + void Load(dmlc::Stream* fi) override = 0; /*! * \brief save model to stream. * \param fo output stream */ - virtual void Save(dmlc::Stream* fo) const = 0; + void Save(dmlc::Stream* fo) const override = 0; /*! * \brief update the model for one iteration * With the specified objective function. @@ -84,7 +84,7 @@ class Learner : public rabit::Serializable { */ virtual void BoostOneIter(int iter, DMatrix* train, - HostDeviceVector* in_gpair) = 0; + HostDeviceVector* in_gpair) = 0; /*! * \brief evaluate the model for specific iteration using the configured metrics. * \param iter iteration number @@ -194,7 +194,7 @@ inline void Learner::Predict(const SparseBatch::Inst& inst, bool output_margin, HostDeviceVector* out_preds, unsigned ntree_limit) const { - gbm_->PredictInstance(inst, &out_preds->data_h(), ntree_limit); + gbm_->PredictInstance(inst, &out_preds->HostVector(), ntree_limit); if (!output_margin) { obj_->PredTransform(out_preds); } diff --git a/include/xgboost/linear_updater.h b/include/xgboost/linear_updater.h index b91d598ee..3d5d75f13 100644 --- a/include/xgboost/linear_updater.h +++ b/include/xgboost/linear_updater.h @@ -19,7 +19,7 @@ namespace xgboost { class LinearUpdater { public: /*! \brief virtual destructor */ - virtual ~LinearUpdater() {} + virtual ~LinearUpdater() = default; /*! * \brief Initialize the updater with given arguments. * \param args arguments to the objective function. @@ -36,7 +36,7 @@ class LinearUpdater { * \param sum_instance_weight The sum instance weights, used to normalise l1/l2 penalty. */ - virtual void Update(std::vector* in_gpair, DMatrix* data, + virtual void Update(std::vector* in_gpair, DMatrix* data, gbm::GBLinearModel* model, double sum_instance_weight) = 0; diff --git a/include/xgboost/logging.h b/include/xgboost/logging.h index 03887fb61..4228087e1 100644 --- a/include/xgboost/logging.h +++ b/include/xgboost/logging.h @@ -21,7 +21,7 @@ class BaseLogger { log_stream_ << "[" << dmlc::DateLogger().HumanDate() << "] "; #endif } - std::ostream& stream() { return log_stream_; } + std::ostream& stream() { return log_stream_; } // NOLINT protected: std::ostringstream log_stream_; diff --git a/include/xgboost/metric.h b/include/xgboost/metric.h index 3b31690a7..80adec194 100644 --- a/include/xgboost/metric.h +++ b/include/xgboost/metric.h @@ -35,7 +35,7 @@ class Metric { /*! \return name of metric */ virtual const char* Name() const = 0; /*! \brief virtual destructor */ - virtual ~Metric() {} + virtual ~Metric() = default; /*! * \brief create a metric according to name. * \param name name of the metric. diff --git a/include/xgboost/objective.h b/include/xgboost/objective.h index 63e4c4d14..fa536e7e6 100644 --- a/include/xgboost/objective.h +++ b/include/xgboost/objective.h @@ -23,7 +23,7 @@ namespace xgboost { class ObjFunction { public: /*! \brief virtual destructor */ - virtual ~ObjFunction() {} + virtual ~ObjFunction() = default; /*! * \brief set configuration from pair iterators. * \param begin The beginning iterator. @@ -47,7 +47,7 @@ class ObjFunction { virtual void GetGradient(HostDeviceVector* preds, const MetaInfo& info, int iteration, - HostDeviceVector* out_gpair) = 0; + HostDeviceVector* out_gpair) = 0; /*! \return the default evaluation metric for the objective */ virtual const char* DefaultEvalMetric() const = 0; diff --git a/include/xgboost/predictor.h b/include/xgboost/predictor.h index c8abd4b69..eb1acfaf2 100644 --- a/include/xgboost/predictor.h +++ b/include/xgboost/predictor.h @@ -36,7 +36,7 @@ namespace xgboost { class Predictor { public: - virtual ~Predictor() {} + virtual ~Predictor() = default; /** * \fn virtual void Predictor::Init(const std::vector class TreeModel { public: /*! \brief data type to indicate split condition */ - typedef TNodeStat NodeStat; + using NodeStat = TNodeStat; /*! \brief auxiliary statistics of node to help tree building */ - typedef TSplitCond SplitCond; + using SplitCond = TSplitCond; /*! \brief tree node */ class Node { public: - Node() : sindex_(0) { + Node() { // assert compact alignment static_assert(sizeof(Node) == 4 * sizeof(int) + sizeof(Info), "Node: 64 bit align"); } /*! \brief index of left child */ - inline int cleft() const { + inline int LeftChild() const { return this->cleft_; } /*! \brief index of right child */ - inline int cright() const { + inline int RightChild() const { return this->cright_; } /*! \brief index of default child when feature is missing */ - inline int cdefault() const { - return this->default_left() ? this->cleft() : this->cright(); + inline int DefaultChild() const { + return this->DefaultLeft() ? this->LeftChild() : this->RightChild(); } /*! \brief feature index of split condition */ - inline unsigned split_index() const { + inline unsigned SplitIndex() const { return sindex_ & ((1U << 31) - 1U); } /*! \brief when feature is unknown, whether goes to left child */ - inline bool default_left() const { + inline bool DefaultLeft() const { return (sindex_ >> 31) != 0; } /*! \brief whether current node is leaf node */ - inline bool is_leaf() const { + inline bool IsLeaf() const { return cleft_ == -1; } /*! \return get leaf value of leaf node */ - inline bst_float leaf_value() const { + inline bst_float LeafValue() const { return (this->info_).leaf_value; } /*! \return get split condition of the node */ - inline TSplitCond split_cond() const { + inline TSplitCond SplitCond() const { return (this->info_).split_cond; } /*! \brief get parent of the node */ - inline int parent() const { + inline int Parent() const { return parent_ & ((1U << 31) - 1); } /*! \brief whether current node is left child */ - inline bool is_left_child() const { + inline bool IsLeftChild() const { return (parent_ & (1U << 31)) != 0; } /*! \brief whether this node is deleted */ - inline bool is_deleted() const { + inline bool IsDeleted() const { return sindex_ == std::numeric_limits::max(); } /*! \brief whether current node is root */ - inline bool is_root() const { + inline bool IsRoot() const { return parent_ == -1; } /*! * \brief set the right child * \param nid node id to right child */ - inline void set_right_child(int nid) { + inline void SetRightChild(int nid) { this->cright_ = nid; } /*! @@ -143,7 +143,7 @@ class TreeModel { * \param split_cond split condition * \param default_left the default direction when feature is unknown */ - inline void set_split(unsigned split_index, TSplitCond split_cond, + inline void SetSplit(unsigned split_index, TSplitCond split_cond, bool default_left = false) { if (default_left) split_index |= (1U << 31); this->sindex_ = split_index; @@ -155,13 +155,13 @@ class TreeModel { * \param right right index, could be used to store * additional information */ - inline void set_leaf(bst_float value, int right = -1) { + inline void SetLeaf(bst_float value, int right = -1) { (this->info_).leaf_value = value; this->cleft_ = -1; this->cright_ = right; } /*! \brief mark that this node is deleted */ - inline void mark_delete() { + inline void MarkDelete() { this->sindex_ = std::numeric_limits::max(); } @@ -181,11 +181,11 @@ class TreeModel { // pointer to left, right int cleft_, cright_; // split feature index, left split or right split depends on the highest bit - unsigned sindex_; + unsigned sindex_{0}; // extra info Info info_; // set parent - inline void set_parent(int pidx, bool is_left_child = true) { + inline void SetParent(int pidx, bool is_left_child = true) { if (is_left_child) pidx |= (1U << 31); this->parent_ = pidx; } @@ -193,35 +193,35 @@ class TreeModel { protected: // vector of nodes - std::vector nodes; + std::vector nodes_; // free node space, used during training process - std::vector deleted_nodes; + std::vector deleted_nodes_; // stats of nodes - std::vector stats; + std::vector stats_; // leaf vector, that is used to store additional information - std::vector leaf_vector; + std::vector leaf_vector_; // allocate a new node, // !!!!!! NOTE: may cause BUG here, nodes.resize inline int AllocNode() { if (param.num_deleted != 0) { - int nd = deleted_nodes.back(); - deleted_nodes.pop_back(); + int nd = deleted_nodes_.back(); + deleted_nodes_.pop_back(); --param.num_deleted; return nd; } int nd = param.num_nodes++; CHECK_LT(param.num_nodes, std::numeric_limits::max()) << "number of nodes in the tree exceed 2^31"; - nodes.resize(param.num_nodes); - stats.resize(param.num_nodes); - leaf_vector.resize(param.num_nodes * param.size_leaf_vector); + nodes_.resize(param.num_nodes); + stats_.resize(param.num_nodes); + leaf_vector_.resize(param.num_nodes * param.size_leaf_vector); return nd; } // delete a tree node, keep the parent field to allow trace back inline void DeleteNode(int nid) { CHECK_GE(nid, param.num_roots); - deleted_nodes.push_back(nid); - nodes[nid].mark_delete(); + deleted_nodes_.push_back(nid); + nodes_[nid].MarkDelete(); ++param.num_deleted; } @@ -232,11 +232,11 @@ class TreeModel { * \param value new leaf value */ inline void ChangeToLeaf(int rid, bst_float value) { - CHECK(nodes[nodes[rid].cleft() ].is_leaf()); - CHECK(nodes[nodes[rid].cright()].is_leaf()); - this->DeleteNode(nodes[rid].cleft()); - this->DeleteNode(nodes[rid].cright()); - nodes[rid].set_leaf(value); + CHECK(nodes_[nodes_[rid].LeftChild() ].IsLeaf()); + CHECK(nodes_[nodes_[rid].RightChild()].IsLeaf()); + this->DeleteNode(nodes_[rid].LeftChild()); + this->DeleteNode(nodes_[rid].RightChild()); + nodes_[rid].SetLeaf(value); } /*! * \brief collapse a non leaf node to a leaf node, delete its children @@ -244,12 +244,12 @@ class TreeModel { * \param value new leaf value */ inline void CollapseToLeaf(int rid, bst_float value) { - if (nodes[rid].is_leaf()) return; - if (!nodes[nodes[rid].cleft() ].is_leaf()) { - CollapseToLeaf(nodes[rid].cleft(), 0.0f); + if (nodes_[rid].IsLeaf()) return; + if (!nodes_[nodes_[rid].LeftChild() ].IsLeaf()) { + CollapseToLeaf(nodes_[rid].LeftChild(), 0.0f); } - if (!nodes[nodes[rid].cright() ].is_leaf()) { - CollapseToLeaf(nodes[rid].cright(), 0.0f); + if (!nodes_[nodes_[rid].RightChild() ].IsLeaf()) { + CollapseToLeaf(nodes_[rid].RightChild(), 0.0f); } this->ChangeToLeaf(rid, value); } @@ -262,47 +262,47 @@ class TreeModel { param.num_nodes = 1; param.num_roots = 1; param.num_deleted = 0; - nodes.resize(1); + nodes_.resize(1); } /*! \brief get node given nid */ inline Node& operator[](int nid) { - return nodes[nid]; + return nodes_[nid]; } /*! \brief get node given nid */ inline const Node& operator[](int nid) const { - return nodes[nid]; + return nodes_[nid]; } /*! \brief get const reference to nodes */ - inline const std::vector& GetNodes() const { return nodes; } + inline const std::vector& GetNodes() const { return nodes_; } /*! \brief get node statistics given nid */ - inline NodeStat& stat(int nid) { - return stats[nid]; + inline NodeStat& Stat(int nid) { + return stats_[nid]; } /*! \brief get node statistics given nid */ - inline const NodeStat& stat(int nid) const { - return stats[nid]; + inline const NodeStat& Stat(int nid) const { + return stats_[nid]; } /*! \brief get leaf vector given nid */ - inline bst_float* leafvec(int nid) { - if (leaf_vector.size() == 0) return nullptr; - return& leaf_vector[nid * param.size_leaf_vector]; + inline bst_float* Leafvec(int nid) { + if (leaf_vector_.size() == 0) return nullptr; + return& leaf_vector_[nid * param.size_leaf_vector]; } /*! \brief get leaf vector given nid */ - inline const bst_float* leafvec(int nid) const { - if (leaf_vector.size() == 0) return nullptr; - return& leaf_vector[nid * param.size_leaf_vector]; + inline const bst_float* Leafvec(int nid) const { + if (leaf_vector_.size() == 0) return nullptr; + return& leaf_vector_[nid * param.size_leaf_vector]; } /*! \brief initialize the model */ inline void InitModel() { param.num_nodes = param.num_roots; - nodes.resize(param.num_nodes); - stats.resize(param.num_nodes); - leaf_vector.resize(param.num_nodes * param.size_leaf_vector, 0.0f); + nodes_.resize(param.num_nodes); + stats_.resize(param.num_nodes); + leaf_vector_.resize(param.num_nodes * param.size_leaf_vector, 0.0f); for (int i = 0; i < param.num_nodes; i ++) { - nodes[i].set_leaf(0.0f); - nodes[i].set_parent(-1); + nodes_[i].SetLeaf(0.0f); + nodes_[i].SetParent(-1); } } /*! @@ -311,35 +311,35 @@ class TreeModel { */ inline void Load(dmlc::Stream* fi) { CHECK_EQ(fi->Read(¶m, sizeof(TreeParam)), sizeof(TreeParam)); - nodes.resize(param.num_nodes); - stats.resize(param.num_nodes); + nodes_.resize(param.num_nodes); + stats_.resize(param.num_nodes); CHECK_NE(param.num_nodes, 0); - CHECK_EQ(fi->Read(dmlc::BeginPtr(nodes), sizeof(Node) * nodes.size()), - sizeof(Node) * nodes.size()); - CHECK_EQ(fi->Read(dmlc::BeginPtr(stats), sizeof(NodeStat) * stats.size()), - sizeof(NodeStat) * stats.size()); + CHECK_EQ(fi->Read(dmlc::BeginPtr(nodes_), sizeof(Node) * nodes_.size()), + sizeof(Node) * nodes_.size()); + CHECK_EQ(fi->Read(dmlc::BeginPtr(stats_), sizeof(NodeStat) * stats_.size()), + sizeof(NodeStat) * stats_.size()); if (param.size_leaf_vector != 0) { - CHECK(fi->Read(&leaf_vector)); + CHECK(fi->Read(&leaf_vector_)); } // chg deleted nodes - deleted_nodes.resize(0); + deleted_nodes_.resize(0); for (int i = param.num_roots; i < param.num_nodes; ++i) { - if (nodes[i].is_deleted()) deleted_nodes.push_back(i); + if (nodes_[i].IsDeleted()) deleted_nodes_.push_back(i); } - CHECK_EQ(static_cast(deleted_nodes.size()), param.num_deleted); + CHECK_EQ(static_cast(deleted_nodes_.size()), param.num_deleted); } /*! * \brief save model to stream * \param fo output stream */ inline void Save(dmlc::Stream* fo) const { - CHECK_EQ(param.num_nodes, static_cast(nodes.size())); - CHECK_EQ(param.num_nodes, static_cast(stats.size())); + CHECK_EQ(param.num_nodes, static_cast(nodes_.size())); + CHECK_EQ(param.num_nodes, static_cast(stats_.size())); fo->Write(¶m, sizeof(TreeParam)); CHECK_NE(param.num_nodes, 0); - fo->Write(dmlc::BeginPtr(nodes), sizeof(Node) * nodes.size()); - fo->Write(dmlc::BeginPtr(stats), sizeof(NodeStat) * nodes.size()); - if (param.size_leaf_vector != 0) fo->Write(leaf_vector); + fo->Write(dmlc::BeginPtr(nodes_), sizeof(Node) * nodes_.size()); + fo->Write(dmlc::BeginPtr(stats_), sizeof(NodeStat) * nodes_.size()); + if (param.size_leaf_vector != 0) fo->Write(leaf_vector_); } /*! * \brief add child nodes to node @@ -348,10 +348,10 @@ class TreeModel { inline void AddChilds(int nid) { int pleft = this->AllocNode(); int pright = this->AllocNode(); - nodes[nid].cleft_ = pleft; - nodes[nid].cright_ = pright; - nodes[nodes[nid].cleft() ].set_parent(nid, true); - nodes[nodes[nid].cright()].set_parent(nid, false); + nodes_[nid].cleft_ = pleft; + nodes_[nid].cright_ = pright; + nodes_[nodes_[nid].LeftChild() ].SetParent(nid, true); + nodes_[nodes_[nid].RightChild()].SetParent(nid, false); } /*! * \brief only add a right child to a leaf node @@ -359,8 +359,8 @@ class TreeModel { */ inline void AddRightChild(int nid) { int pright = this->AllocNode(); - nodes[nid].right = pright; - nodes[nodes[nid].right].set_parent(nid, false); + nodes_[nid].right = pright; + nodes_[nodes_[nid].right].SetParent(nid, false); } /*! * \brief get current depth @@ -369,9 +369,9 @@ class TreeModel { */ inline int GetDepth(int nid, bool pass_rchild = false) const { int depth = 0; - while (!nodes[nid].is_root()) { - if (!pass_rchild || nodes[nid].is_left_child()) ++depth; - nid = nodes[nid].parent(); + while (!nodes_[nid].IsRoot()) { + if (!pass_rchild || nodes_[nid].IsLeftChild()) ++depth; + nid = nodes_[nid].Parent(); } return depth; } @@ -380,9 +380,9 @@ class TreeModel { * \param nid node id */ inline int MaxDepth(int nid) const { - if (nodes[nid].is_leaf()) return 0; - return std::max(MaxDepth(nodes[nid].cleft())+1, - MaxDepth(nodes[nid].cright())+1); + if (nodes_[nid].IsLeaf()) return 0; + return std::max(MaxDepth(nodes_[nid].LeftChild())+1, + MaxDepth(nodes_[nid].RightChild())+1); } /*! * \brief get maximum depth @@ -395,7 +395,7 @@ class TreeModel { return maxd; } /*! \brief number of extra nodes besides the root */ - inline int num_extra_nodes() const { + inline int NumExtraNodes() const { return param.num_nodes - param.num_roots - param.num_deleted; } }; @@ -421,7 +421,7 @@ struct PathElement { bst_float zero_fraction; bst_float one_fraction; bst_float pweight; - PathElement() {} + PathElement() = default; PathElement(int i, bst_float z, bst_float o, bst_float w) : feature_index(i), zero_fraction(z), one_fraction(o), pweight(w) {} }; @@ -457,19 +457,19 @@ class RegTree: public TreeModel { * \brief returns the size of the feature vector * \return the size of the feature vector */ - inline size_t size() const; + inline size_t Size() const; /*! * \brief get ith value * \param i feature index. * \return the i-th feature value */ - inline bst_float fvalue(size_t i) const; + inline bst_float Fvalue(size_t i) const; /*! * \brief check whether i-th entry is missing * \param i feature index. * \return whether i-th value is missing. */ - inline bool is_missing(size_t i) const; + inline bool IsMissing(size_t i) const; private: /*! @@ -480,7 +480,7 @@ class RegTree: public TreeModel { bst_float fvalue; int flag; }; - std::vector data; + std::vector data_; }; /*! * \brief get the leaf index @@ -562,63 +562,63 @@ class RegTree: public TreeModel { private: inline bst_float FillNodeMeanValue(int nid); - std::vector node_mean_values; + std::vector node_mean_values_; }; // implementations of inline functions // do not need to read if only use the model inline void RegTree::FVec::Init(size_t size) { Entry e; e.flag = -1; - data.resize(size); - std::fill(data.begin(), data.end(), e); + data_.resize(size); + std::fill(data_.begin(), data_.end(), e); } inline void RegTree::FVec::Fill(const RowBatch::Inst& inst) { for (bst_uint i = 0; i < inst.length; ++i) { - if (inst[i].index >= data.size()) continue; - data[inst[i].index].fvalue = inst[i].fvalue; + if (inst[i].index >= data_.size()) continue; + data_[inst[i].index].fvalue = inst[i].fvalue; } } inline void RegTree::FVec::Drop(const RowBatch::Inst& inst) { for (bst_uint i = 0; i < inst.length; ++i) { - if (inst[i].index >= data.size()) continue; - data[inst[i].index].flag = -1; + if (inst[i].index >= data_.size()) continue; + data_[inst[i].index].flag = -1; } } -inline size_t RegTree::FVec::size() const { - return data.size(); +inline size_t RegTree::FVec::Size() const { + return data_.size(); } -inline bst_float RegTree::FVec::fvalue(size_t i) const { - return data[i].fvalue; +inline bst_float RegTree::FVec::Fvalue(size_t i) const { + return data_[i].fvalue; } -inline bool RegTree::FVec::is_missing(size_t i) const { - return data[i].flag == -1; +inline bool RegTree::FVec::IsMissing(size_t i) const { + return data_[i].flag == -1; } inline int RegTree::GetLeafIndex(const RegTree::FVec& feat, unsigned root_id) const { - int pid = static_cast(root_id); - while (!(*this)[pid].is_leaf()) { - unsigned split_index = (*this)[pid].split_index(); - pid = this->GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index)); + auto pid = static_cast(root_id); + while (!(*this)[pid].IsLeaf()) { + unsigned split_index = (*this)[pid].SplitIndex(); + pid = this->GetNext(pid, feat.Fvalue(split_index), feat.IsMissing(split_index)); } return pid; } inline bst_float RegTree::Predict(const RegTree::FVec& feat, unsigned root_id) const { int pid = this->GetLeafIndex(feat, root_id); - return (*this)[pid].leaf_value(); + return (*this)[pid].LeafValue(); } inline void RegTree::FillNodeMeanValues() { size_t num_nodes = this->param.num_nodes; - if (this->node_mean_values.size() == num_nodes) { + if (this->node_mean_values_.size() == num_nodes) { return; } - this->node_mean_values.resize(num_nodes); + this->node_mean_values_.resize(num_nodes); for (int root_id = 0; root_id < param.num_roots; ++root_id) { this->FillNodeMeanValue(root_id); } @@ -627,40 +627,39 @@ inline void RegTree::FillNodeMeanValues() { inline bst_float RegTree::FillNodeMeanValue(int nid) { bst_float result; auto& node = (*this)[nid]; - if (node.is_leaf()) { - result = node.leaf_value(); + if (node.IsLeaf()) { + result = node.LeafValue(); } else { - result = this->FillNodeMeanValue(node.cleft()) * this->stat(node.cleft()).sum_hess; - result += this->FillNodeMeanValue(node.cright()) * this->stat(node.cright()).sum_hess; - result /= this->stat(nid).sum_hess; + result = this->FillNodeMeanValue(node.LeftChild()) * this->Stat(node.LeftChild()).sum_hess; + result += this->FillNodeMeanValue(node.RightChild()) * this->Stat(node.RightChild()).sum_hess; + result /= this->Stat(nid).sum_hess; } - this->node_mean_values[nid] = result; + this->node_mean_values_[nid] = result; return result; } inline void RegTree::CalculateContributionsApprox(const RegTree::FVec& feat, unsigned root_id, bst_float *out_contribs) const { - CHECK_GT(this->node_mean_values.size(), 0U); + CHECK_GT(this->node_mean_values_.size(), 0U); // this follows the idea of http://blog.datadive.net/interpreting-random-forests/ - bst_float node_value; - unsigned split_index; - int pid = static_cast(root_id); + unsigned split_index = 0; + auto pid = static_cast(root_id); // update bias value - node_value = this->node_mean_values[pid]; - out_contribs[feat.size()] += node_value; - if ((*this)[pid].is_leaf()) { + bst_float node_value = this->node_mean_values_[pid]; + out_contribs[feat.Size()] += node_value; + if ((*this)[pid].IsLeaf()) { // nothing to do anymore return; } - while (!(*this)[pid].is_leaf()) { - split_index = (*this)[pid].split_index(); - pid = this->GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index)); - bst_float new_value = this->node_mean_values[pid]; + while (!(*this)[pid].IsLeaf()) { + split_index = (*this)[pid].SplitIndex(); + pid = this->GetNext(pid, feat.Fvalue(split_index), feat.IsMissing(split_index)); + bst_float new_value = this->node_mean_values_[pid]; // update feature weight out_contribs[split_index] += new_value - node_value; node_value = new_value; } - bst_float leaf_value = (*this)[pid].leaf_value(); + bst_float leaf_value = (*this)[pid].LeafValue(); // update leaf feature weight out_contribs[split_index] += leaf_value - node_value; } @@ -749,33 +748,33 @@ inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi, ExtendPath(unique_path, unique_depth, parent_zero_fraction, parent_one_fraction, parent_feature_index); } - const unsigned split_index = node.split_index(); + const unsigned split_index = node.SplitIndex(); // leaf node - if (node.is_leaf()) { + if (node.IsLeaf()) { for (unsigned i = 1; i <= unique_depth; ++i) { const bst_float w = UnwoundPathSum(unique_path, unique_depth, i); const PathElement &el = unique_path[i]; phi[el.feature_index] += w * (el.one_fraction - el.zero_fraction) - * node.leaf_value() * condition_fraction; + * node.LeafValue() * condition_fraction; } // internal node } else { // find which branch is "hot" (meaning x would follow it) unsigned hot_index = 0; - if (feat.is_missing(split_index)) { - hot_index = node.cdefault(); - } else if (feat.fvalue(split_index) < node.split_cond()) { - hot_index = node.cleft(); + if (feat.IsMissing(split_index)) { + hot_index = node.DefaultChild(); + } else if (feat.Fvalue(split_index) < node.SplitCond()) { + hot_index = node.LeftChild(); } else { - hot_index = node.cright(); + hot_index = node.RightChild(); } - const unsigned cold_index = (static_cast(hot_index) == node.cleft() ? - node.cright() : node.cleft()); - const bst_float w = this->stat(node_index).sum_hess; - const bst_float hot_zero_fraction = this->stat(hot_index).sum_hess / w; - const bst_float cold_zero_fraction = this->stat(cold_index).sum_hess / w; + const unsigned cold_index = (static_cast(hot_index) == node.LeftChild() ? + node.RightChild() : node.LeftChild()); + const bst_float w = this->Stat(node_index).sum_hess; + const bst_float hot_zero_fraction = this->Stat(hot_index).sum_hess / w; + const bst_float cold_zero_fraction = this->Stat(cold_index).sum_hess / w; bst_float incoming_zero_fraction = 1; bst_float incoming_one_fraction = 1; @@ -820,13 +819,13 @@ inline void RegTree::CalculateContributions(const RegTree::FVec& feat, unsigned unsigned condition_feature) const { // find the expected value of the tree's predictions if (condition == 0) { - bst_float node_value = this->node_mean_values[static_cast(root_id)]; - out_contribs[feat.size()] += node_value; + bst_float node_value = this->node_mean_values_[static_cast(root_id)]; + out_contribs[feat.Size()] += node_value; } // Preallocate space for the unique path data const int maxd = this->MaxDepth(root_id) + 2; - PathElement *unique_path_data = new PathElement[(maxd * (maxd + 1)) / 2]; + auto *unique_path_data = new PathElement[(maxd * (maxd + 1)) / 2]; TreeShap(feat, out_contribs, root_id, 0, unique_path_data, 1, 1, -1, condition, condition_feature, 1); @@ -835,14 +834,14 @@ inline void RegTree::CalculateContributions(const RegTree::FVec& feat, unsigned /*! \brief get next position of the tree given current pid */ inline int RegTree::GetNext(int pid, bst_float fvalue, bool is_unknown) const { - bst_float split_value = (*this)[pid].split_cond(); + bst_float split_value = (*this)[pid].SplitCond(); if (is_unknown) { - return (*this)[pid].cdefault(); + return (*this)[pid].DefaultChild(); } else { if (fvalue < split_value) { - return (*this)[pid].cleft(); + return (*this)[pid].LeftChild(); } else { - return (*this)[pid].cright(); + return (*this)[pid].RightChild(); } } } diff --git a/include/xgboost/tree_updater.h b/include/xgboost/tree_updater.h index 07e44a64e..ec79d391a 100644 --- a/include/xgboost/tree_updater.h +++ b/include/xgboost/tree_updater.h @@ -25,7 +25,7 @@ namespace xgboost { class TreeUpdater { public: /*! \brief virtual destructor */ - virtual ~TreeUpdater() {} + virtual ~TreeUpdater() = default; /*! * \brief Initialize the updater with given arguments. * \param args arguments to the objective function. @@ -40,7 +40,7 @@ class TreeUpdater { * but maybe different random seeds, usually one tree is passed in at a time, * there can be multiple trees when we train random forest style model */ - virtual void Update(HostDeviceVector* gpair, + virtual void Update(HostDeviceVector* gpair, DMatrix* data, const std::vector& trees) = 0; diff --git a/plugin/example/custom_obj.cc b/plugin/example/custom_obj.cc index 5446ea9b1..e2e502b3e 100644 --- a/plugin/example/custom_obj.cc +++ b/plugin/example/custom_obj.cc @@ -36,21 +36,21 @@ class MyLogistic : public ObjFunction { void GetGradient(HostDeviceVector *preds, const MetaInfo &info, int iter, - HostDeviceVector *out_gpair) override { - out_gpair->resize(preds->size()); - std::vector& preds_h = preds->data_h(); - std::vector& out_gpair_h = out_gpair->data_h(); + HostDeviceVector *out_gpair) override { + out_gpair->Resize(preds->Size()); + std::vector& preds_h = preds->HostVector(); + std::vector& out_gpair_h = out_gpair->HostVector(); for (size_t i = 0; i < preds_h.size(); ++i) { bst_float w = info.GetWeight(i); // scale the negative examples! - if (info.labels[i] == 0.0f) w *= param_.scale_neg_weight; + if (info.labels_[i] == 0.0f) w *= param_.scale_neg_weight; // logistic transformation bst_float p = 1.0f / (1.0f + std::exp(-preds_h[i])); // this is the gradient - bst_float grad = (p - info.labels[i]) * w; + bst_float grad = (p - info.labels_[i]) * w; // this is the second order gradient bst_float hess = p * (1.0f - p) * w; - out_gpair_h.at(i) = bst_gpair(grad, hess); + out_gpair_h.at(i) = GradientPair(grad, hess); } } const char* DefaultEvalMetric() const override { @@ -58,7 +58,7 @@ class MyLogistic : public ObjFunction { } void PredTransform(HostDeviceVector *io_preds) override { // transform margin value to probability. - std::vector &preds = io_preds->data_h(); + std::vector &preds = io_preds->HostVector(); for (size_t i = 0; i < preds.size(); ++i) { preds[i] = 1.0f / (1.0f + std::exp(-preds[i])); } diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 47d4a9668..09f5c5fa1 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -27,7 +27,7 @@ class Booster { initialized_(false), learner_(Learner::Create(cache_mats)) {} - inline Learner* learner() { + inline Learner* learner() { // NOLINT return learner_.get(); } @@ -40,7 +40,7 @@ class Booster { return x.first == name; }); if (it == cfg_.end()) { - cfg_.push_back(std::make_pair(name, val)); + cfg_.emplace_back(name, val); } else { (*it).second = val; } @@ -193,11 +193,11 @@ struct XGBAPIThreadLocalEntry { /*! \brief returning float vector. */ HostDeviceVector ret_vec_float; /*! \brief temp variable of gradient pairs. */ - HostDeviceVector tmp_gpair; + HostDeviceVector tmp_gpair; }; // define the threadlocal store. -typedef dmlc::ThreadLocalStore XGBAPIThreadLocalStore; +using XGBAPIThreadLocalStore = dmlc::ThreadLocalStore; int XGDMatrixCreateFromFile(const char *fname, int silent, @@ -254,14 +254,14 @@ XGB_DLL int XGDMatrixCreateFromCSREx(const size_t* indptr, mat.row_ptr_.push_back(mat.row_data_.size()); } - mat.info.num_col = num_column; + mat.info.num_col_ = num_column; if (num_col > 0) { - CHECK_LE(mat.info.num_col, num_col) - << "num_col=" << num_col << " vs " << mat.info.num_col; - mat.info.num_col = num_col; + CHECK_LE(mat.info.num_col_, num_col) + << "num_col=" << num_col << " vs " << mat.info.num_col_; + mat.info.num_col_ = num_col; } - mat.info.num_row = nindptr - 1; - mat.info.num_nonzero = mat.row_data_.size(); + mat.info.num_row_ = nindptr - 1; + mat.info.num_nonzero_ = mat.row_data_.size(); *out = new std::shared_ptr(DMatrix::Create(std::move(source))); API_END(); } @@ -317,13 +317,13 @@ XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t* col_ptr, } } } - mat.info.num_row = mat.row_ptr_.size() - 1; + mat.info.num_row_ = mat.row_ptr_.size() - 1; if (num_row > 0) { - CHECK_LE(mat.info.num_row, num_row); - mat.info.num_row = num_row; + CHECK_LE(mat.info.num_row_, num_row); + mat.info.num_row_ = num_row; } - mat.info.num_col = ncol; - mat.info.num_nonzero = nelem; + mat.info.num_col_ = ncol; + mat.info.num_nonzero_ = nelem; *out = new std::shared_ptr(DMatrix::Create(std::move(source))); API_END(); } @@ -353,8 +353,8 @@ XGB_DLL int XGDMatrixCreateFromMat(const bst_float* data, data::SimpleCSRSource& mat = *source; mat.row_ptr_.resize(1+nrow); bool nan_missing = common::CheckNAN(missing); - mat.info.num_row = nrow; - mat.info.num_col = ncol; + mat.info.num_row_ = nrow; + mat.info.num_col_ = ncol; const bst_float* data0 = data; // count elements for sizing data @@ -389,12 +389,12 @@ XGB_DLL int XGDMatrixCreateFromMat(const bst_float* data, } } - mat.info.num_nonzero = mat.row_data_.size(); + mat.info.num_nonzero_ = mat.row_data_.size(); *out = new std::shared_ptr(DMatrix::Create(std::move(source))); API_END(); } -void prefixsum_inplace(size_t *x, size_t N) { +void PrefixSum(size_t *x, size_t N) { size_t *suma; #pragma omp parallel { @@ -425,12 +425,10 @@ void prefixsum_inplace(size_t *x, size_t N) { delete[] suma; } - -XGB_DLL int XGDMatrixCreateFromMat_omp(const bst_float* data, +XGB_DLL int XGDMatrixCreateFromMat_omp(const bst_float* data, // NOLINT xgboost::bst_ulong nrow, xgboost::bst_ulong ncol, - bst_float missing, - DMatrixHandle* out, + bst_float missing, DMatrixHandle* out, int nthread) { // avoid openmp unless enough data to be worth it to avoid overhead costs if (nrow*ncol <= 10000*50) { @@ -446,8 +444,8 @@ XGB_DLL int XGDMatrixCreateFromMat_omp(const bst_float* data, std::unique_ptr source(new data::SimpleCSRSource()); data::SimpleCSRSource& mat = *source; mat.row_ptr_.resize(1+nrow); - mat.info.num_row = nrow; - mat.info.num_col = ncol; + mat.info.num_row_ = nrow; + mat.info.num_col_ = ncol; // Check for errors in missing elements // Count elements per row (to avoid otherwise need to copy) @@ -480,7 +478,7 @@ XGB_DLL int XGDMatrixCreateFromMat_omp(const bst_float* data, } // do cumulative sum (to avoid otherwise need to copy) - prefixsum_inplace(&mat.row_ptr_[0], mat.row_ptr_.size()); + PrefixSum(&mat.row_ptr_[0], mat.row_ptr_.size()); mat.row_data_.resize(mat.row_data_.size() + mat.row_ptr_.back()); // Fill data matrix (now that know size, no need for slow push_back()) @@ -500,7 +498,7 @@ XGB_DLL int XGDMatrixCreateFromMat_omp(const bst_float* data, } } - mat.info.num_nonzero = mat.row_data_.size(); + mat.info.num_nonzero_ = mat.row_data_.size(); *out = new std::shared_ptr(DMatrix::Create(std::move(source))); API_END(); } @@ -516,12 +514,12 @@ XGB_DLL int XGDMatrixSliceDMatrix(DMatrixHandle handle, src.CopyFrom(static_cast*>(handle)->get()); data::SimpleCSRSource& ret = *source; - CHECK_EQ(src.info.group_ptr.size(), 0U) + CHECK_EQ(src.info.group_ptr_.size(), 0U) << "slice does not support group structure"; ret.Clear(); - ret.info.num_row = len; - ret.info.num_col = src.info.num_col; + ret.info.num_row_ = len; + ret.info.num_col_ = src.info.num_col_; dmlc::DataIter* iter = &src; iter->BeforeFirst(); @@ -532,23 +530,22 @@ XGB_DLL int XGDMatrixSliceDMatrix(DMatrixHandle handle, const int ridx = idxset[i]; RowBatch::Inst inst = batch[ridx]; CHECK_LT(static_cast(ridx), batch.size); - ret.row_data_.resize(ret.row_data_.size() + inst.length); - std::memcpy(dmlc::BeginPtr(ret.row_data_) + ret.row_ptr_.back(), inst.data, - sizeof(RowBatch::Entry) * inst.length); + ret.row_data_.insert(ret.row_data_.end(), inst.data, + inst.data + inst.length); ret.row_ptr_.push_back(ret.row_ptr_.back() + inst.length); - ret.info.num_nonzero += inst.length; + ret.info.num_nonzero_ += inst.length; - if (src.info.labels.size() != 0) { - ret.info.labels.push_back(src.info.labels[ridx]); + if (src.info.labels_.size() != 0) { + ret.info.labels_.push_back(src.info.labels_[ridx]); } - if (src.info.weights.size() != 0) { - ret.info.weights.push_back(src.info.weights[ridx]); + if (src.info.weights_.size() != 0) { + ret.info.weights_.push_back(src.info.weights_[ridx]); } - if (src.info.base_margin.size() != 0) { - ret.info.base_margin.push_back(src.info.base_margin[ridx]); + if (src.info.base_margin_.size() != 0) { + ret.info.base_margin_.push_back(src.info.base_margin_[ridx]); } - if (src.info.root_index.size() != 0) { - ret.info.root_index.push_back(src.info.root_index[ridx]); + if (src.info.root_index_.size() != 0) { + ret.info.root_index_.push_back(src.info.root_index_[ridx]); } } *out = new std::shared_ptr(DMatrix::Create(std::move(source))); @@ -575,7 +572,7 @@ XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle, xgboost::bst_ulong len) { API_BEGIN(); static_cast*>(handle) - ->get()->info().SetInfo(field, info, kFloat32, len); + ->get()->Info().SetInfo(field, info, kFloat32, len); API_END(); } @@ -585,7 +582,7 @@ XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle, xgboost::bst_ulong len) { API_BEGIN(); static_cast*>(handle) - ->get()->info().SetInfo(field, info, kUInt32, len); + ->get()->Info().SetInfo(field, info, kUInt32, len); API_END(); } @@ -593,12 +590,12 @@ XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle, const unsigned* group, xgboost::bst_ulong len) { API_BEGIN(); - std::shared_ptr *pmat = static_cast*>(handle); - MetaInfo& info = pmat->get()->info(); - info.group_ptr.resize(len + 1); - info.group_ptr[0] = 0; + auto *pmat = static_cast*>(handle); + MetaInfo& info = pmat->get()->Info(); + info.group_ptr_.resize(len + 1); + info.group_ptr_[0] = 0; for (uint64_t i = 0; i < len; ++i) { - info.group_ptr[i + 1] = info.group_ptr[i] + group[i]; + info.group_ptr_[i + 1] = info.group_ptr_[i] + group[i]; } API_END(); } @@ -608,18 +605,18 @@ XGB_DLL int XGDMatrixGetFloatInfo(const DMatrixHandle handle, xgboost::bst_ulong* out_len, const bst_float** out_dptr) { API_BEGIN(); - const MetaInfo& info = static_cast*>(handle)->get()->info(); + const MetaInfo& info = static_cast*>(handle)->get()->Info(); const std::vector* vec = nullptr; if (!std::strcmp(field, "label")) { - vec = &info.labels; + vec = &info.labels_; } else if (!std::strcmp(field, "weight")) { - vec = &info.weights; + vec = &info.weights_; } else if (!std::strcmp(field, "base_margin")) { - vec = &info.base_margin; + vec = &info.base_margin_; } else { LOG(FATAL) << "Unknown float field name " << field; } - *out_len = static_cast(vec->size()); + *out_len = static_cast(vec->size()); // NOLINT *out_dptr = dmlc::BeginPtr(*vec); API_END(); } @@ -629,15 +626,15 @@ XGB_DLL int XGDMatrixGetUIntInfo(const DMatrixHandle handle, xgboost::bst_ulong *out_len, const unsigned **out_dptr) { API_BEGIN(); - const MetaInfo& info = static_cast*>(handle)->get()->info(); + const MetaInfo& info = static_cast*>(handle)->get()->Info(); const std::vector* vec = nullptr; if (!std::strcmp(field, "root_index")) { - vec = &info.root_index; + vec = &info.root_index_; + *out_len = static_cast(vec->size()); + *out_dptr = dmlc::BeginPtr(*vec); } else { LOG(FATAL) << "Unknown uint field name " << field; } - *out_len = static_cast(vec->size()); - *out_dptr = dmlc::BeginPtr(*vec); API_END(); } @@ -645,7 +642,7 @@ XGB_DLL int XGDMatrixNumRow(const DMatrixHandle handle, xgboost::bst_ulong *out) { API_BEGIN(); *out = static_cast( - static_cast*>(handle)->get()->info().num_row); + static_cast*>(handle)->get()->Info().num_row_); API_END(); } @@ -653,7 +650,7 @@ XGB_DLL int XGDMatrixNumCol(const DMatrixHandle handle, xgboost::bst_ulong *out) { API_BEGIN(); *out = static_cast( - static_cast*>(handle)->get()->info().num_col); + static_cast*>(handle)->get()->Info().num_col_); API_END(); } @@ -688,8 +685,8 @@ XGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle, int iter, DMatrixHandle dtrain) { API_BEGIN(); - Booster* bst = static_cast(handle); - std::shared_ptr *dtr = + auto* bst = static_cast(handle); + auto *dtr = static_cast*>(dtrain); bst->LazyInit(); @@ -702,15 +699,15 @@ XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle, bst_float *grad, bst_float *hess, xgboost::bst_ulong len) { - HostDeviceVector& tmp_gpair = XGBAPIThreadLocalStore::Get()->tmp_gpair; + HostDeviceVector& tmp_gpair = XGBAPIThreadLocalStore::Get()->tmp_gpair; API_BEGIN(); - Booster* bst = static_cast(handle); - std::shared_ptr* dtr = + auto* bst = static_cast(handle); + auto* dtr = static_cast*>(dtrain); - tmp_gpair.resize(len); - std::vector& tmp_gpair_h = tmp_gpair.data_h(); + tmp_gpair.Resize(len); + std::vector& tmp_gpair_h = tmp_gpair.HostVector(); for (xgboost::bst_ulong i = 0; i < len; ++i) { - tmp_gpair_h[i] = bst_gpair(grad[i], hess[i]); + tmp_gpair_h[i] = GradientPair(grad[i], hess[i]); } bst->LazyInit(); @@ -726,13 +723,13 @@ XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle, const char** out_str) { std::string& eval_str = XGBAPIThreadLocalStore::Get()->ret_str; API_BEGIN(); - Booster* bst = static_cast(handle); + auto* bst = static_cast(handle); std::vector data_sets; std::vector data_names; for (xgboost::bst_ulong i = 0; i < len; ++i) { data_sets.push_back(static_cast*>(dmats[i])->get()); - data_names.push_back(std::string(evnames[i])); + data_names.emplace_back(evnames[i]); } bst->LazyInit(); @@ -750,7 +747,7 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle, HostDeviceVector& preds = XGBAPIThreadLocalStore::Get()->ret_vec_float; API_BEGIN(); - Booster *bst = static_cast(handle); + auto *bst = static_cast(handle); bst->LazyInit(); bst->learner()->Predict( static_cast*>(dmat)->get(), @@ -760,8 +757,8 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle, (option_mask & 4) != 0, (option_mask & 8) != 0, (option_mask & 16) != 0); - *out_result = dmlc::BeginPtr(preds.data_h()); - *len = static_cast(preds.size()); + *out_result = dmlc::BeginPtr(preds.HostVector()); + *len = static_cast(preds.Size()); API_END(); } @@ -775,7 +772,7 @@ XGB_DLL int XGBoosterLoadModel(BoosterHandle handle, const char* fname) { XGB_DLL int XGBoosterSaveModel(BoosterHandle handle, const char* fname) { API_BEGIN(); std::unique_ptr fo(dmlc::Stream::Create(fname, "w")); - Booster *bst = static_cast(handle); + auto *bst = static_cast(handle); bst->LazyInit(); bst->learner()->Save(fo.get()); API_END(); @@ -798,7 +795,7 @@ XGB_DLL int XGBoosterGetModelRaw(BoosterHandle handle, API_BEGIN(); common::MemoryBufferStream fo(&raw_str); - Booster *bst = static_cast(handle); + auto *bst = static_cast(handle); bst->LazyInit(); bst->learner()->Save(&fo); *out_dptr = dmlc::BeginPtr(raw_str); @@ -815,7 +812,7 @@ inline void XGBoostDumpModelImpl( const char*** out_models) { std::vector& str_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_str; std::vector& charp_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_charp; - Booster *bst = static_cast(handle); + auto *bst = static_cast(handle); bst->LazyInit(); str_vecs = bst->learner()->DumpModel(fmap, with_stats != 0, format); charp_vecs.resize(str_vecs.size()); @@ -881,7 +878,7 @@ XGB_DLL int XGBoosterGetAttr(BoosterHandle handle, const char* key, const char** out, int* success) { - Booster* bst = static_cast(handle); + auto* bst = static_cast(handle); std::string& ret_str = XGBAPIThreadLocalStore::Get()->ret_str; API_BEGIN(); if (bst->learner()->GetAttr(key, &ret_str)) { @@ -897,7 +894,7 @@ XGB_DLL int XGBoosterGetAttr(BoosterHandle handle, XGB_DLL int XGBoosterSetAttr(BoosterHandle handle, const char* key, const char* value) { - Booster* bst = static_cast(handle); + auto* bst = static_cast(handle); API_BEGIN(); if (value == nullptr) { bst->learner()->DelAttr(key); @@ -912,7 +909,7 @@ XGB_DLL int XGBoosterGetAttrNames(BoosterHandle handle, const char*** out) { std::vector& str_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_str; std::vector& charp_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_charp; - Booster *bst = static_cast(handle); + auto *bst = static_cast(handle); API_BEGIN(); str_vecs = bst->learner()->GetAttrNames(); charp_vecs.resize(str_vecs.size()); @@ -927,7 +924,7 @@ XGB_DLL int XGBoosterGetAttrNames(BoosterHandle handle, XGB_DLL int XGBoosterLoadRabitCheckpoint(BoosterHandle handle, int* version) { API_BEGIN(); - Booster* bst = static_cast(handle); + auto* bst = static_cast(handle); *version = rabit::LoadCheckPoint(bst->learner()); if (*version != 0) { bst->initialized_ = true; @@ -937,7 +934,7 @@ XGB_DLL int XGBoosterLoadRabitCheckpoint(BoosterHandle handle, XGB_DLL int XGBoosterSaveRabitCheckpoint(BoosterHandle handle) { API_BEGIN(); - Booster* bst = static_cast(handle); + auto* bst = static_cast(handle); if (bst->learner()->AllowLazyCheckPoint()) { rabit::LazyCheckPoint(bst->learner()); } else { diff --git a/src/c_api/c_api_error.cc b/src/c_api/c_api_error.cc index 19cd75af6..5e6a2c621 100644 --- a/src/c_api/c_api_error.cc +++ b/src/c_api/c_api_error.cc @@ -10,7 +10,7 @@ struct XGBAPIErrorEntry { std::string last_error; }; -typedef dmlc::ThreadLocalStore XGBAPIErrorStore; +using XGBAPIErrorStore = dmlc::ThreadLocalStore; const char *XGBGetLastError() { return XGBAPIErrorStore::Get()->last_error.c_str(); diff --git a/src/cli_main.cc b/src/cli_main.cc index 59eafc581..068cf0de4 100644 --- a/src/cli_main.cc +++ b/src/cli_main.cc @@ -134,7 +134,7 @@ struct CLIParam : public dmlc::Parameter { char evname[256]; CHECK_EQ(sscanf(kv.first.c_str(), "eval[%[^]]", evname), 1) << "must specify evaluation name for display"; - eval_data_names.push_back(std::string(evname)); + eval_data_names.emplace_back(evname); eval_data_paths.push_back(kv.second); } } @@ -177,7 +177,7 @@ void CLITrain(const CLIParam& param) { std::vector eval_data_names = param.eval_data_names; if (param.eval_train) { eval_datasets.push_back(dtrain.get()); - eval_data_names.push_back(std::string("train")); + eval_data_names.emplace_back("train"); } // initialize the learner. std::unique_ptr learner(Learner::Create(cache_mats)); @@ -332,7 +332,7 @@ void CLIPredict(const CLIParam& param) { std::unique_ptr fo( dmlc::Stream::Create(param.name_pred.c_str(), "w")); dmlc::ostream os(fo.get()); - for (bst_float p : preds.data_h()) { + for (bst_float p : preds.HostVector()) { os << p << '\n'; } // force flush before fo destruct. @@ -347,17 +347,17 @@ int CLIRunTask(int argc, char *argv[]) { rabit::Init(argc, argv); std::vector > cfg; - cfg.push_back(std::make_pair("seed", "0")); + cfg.emplace_back("seed", "0"); common::ConfigIterator itr(argv[1]); while (itr.Next()) { - cfg.push_back(std::make_pair(std::string(itr.name()), std::string(itr.val()))); + cfg.emplace_back(std::string(itr.Name()), std::string(itr.Val())); } for (int i = 2; i < argc; ++i) { char name[256], val[256]; if (sscanf(argv[i], "%[^=]=%s", name, val) == 2) { - cfg.push_back(std::make_pair(std::string(name), std::string(val))); + cfg.emplace_back(std::string(name), std::string(val)); } } CLIParam param; diff --git a/src/common/avx_helpers.h b/src/common/avx_helpers.h index 034a90e26..218cd8fe8 100644 --- a/src/common/avx_helpers.h +++ b/src/common/avx_helpers.h @@ -68,10 +68,10 @@ inline Float8 round(const Float8& x) { // Overload std::max/min namespace std { -inline avx::Float8 max(const avx::Float8& a, const avx::Float8& b) { +inline avx::Float8 max(const avx::Float8& a, const avx::Float8& b) { // NOLINT return avx::Float8(_mm256_max_ps(a.x, b.x)); } -inline avx::Float8 min(const avx::Float8& a, const avx::Float8& b) { +inline avx::Float8 min(const avx::Float8& a, const avx::Float8& b) { // NOLINT return avx::Float8(_mm256_min_ps(a.x, b.x)); } } // namespace std @@ -172,7 +172,7 @@ inline Float8 Sigmoid(Float8 x) { } // Store 8 gradient pairs given vectors containing gradient and Hessian -inline void StoreGpair(xgboost::bst_gpair* dst, const Float8& grad, +inline void StoreGpair(xgboost::GradientPair* dst, const Float8& grad, const Float8& hess) { float* ptr = reinterpret_cast(dst); __m256 gpair_low = _mm256_unpacklo_ps(grad.x, hess.x); @@ -190,11 +190,11 @@ namespace avx { * \brief Fallback implementation not using AVX. */ -struct Float8 { +struct Float8 { // NOLINT float x[8]; explicit Float8(const float& val) { - for (int i = 0; i < 8; i++) { - x[i] = val; + for (float & i : x) { + i = val; } } explicit Float8(const float* vec) { @@ -202,7 +202,7 @@ struct Float8 { x[i] = vec[i]; } } - Float8() {} + Float8() = default; Float8& operator+=(const Float8& rhs) { for (int i = 0; i < 8; i++) { x[i] += rhs.x[i]; @@ -228,7 +228,7 @@ struct Float8 { return *this; } void Print() { - float* f = reinterpret_cast(&x); + auto* f = reinterpret_cast(&x); printf("%f %f %f %f %f %f %f %f\n", f[0], f[1], f[2], f[3], f[4], f[5], f[6], f[7]); } @@ -252,10 +252,10 @@ inline Float8 operator/(Float8 lhs, const Float8& rhs) { } // Store 8 gradient pairs given vectors containing gradient and Hessian -inline void StoreGpair(xgboost::bst_gpair* dst, const Float8& grad, +inline void StoreGpair(xgboost::GradientPair* dst, const Float8& grad, const Float8& hess) { for (int i = 0; i < 8; i++) { - dst[i] = xgboost::bst_gpair(grad.x[i], hess.x[i]); + dst[i] = xgboost::GradientPair(grad.x[i], hess.x[i]); } } @@ -269,14 +269,14 @@ inline Float8 Sigmoid(Float8 x) { } // namespace avx namespace std { -inline avx::Float8 max(const avx::Float8& a, const avx::Float8& b) { +inline avx::Float8 max(const avx::Float8& a, const avx::Float8& b) { // NOLINT avx::Float8 max; for (int i = 0; i < 8; i++) { max.x[i] = std::max(a.x[i], b.x[i]); } return max; } -inline avx::Float8 min(const avx::Float8& a, const avx::Float8& b) { +inline avx::Float8 min(const avx::Float8& a, const avx::Float8& b) { // NOLINT avx::Float8 min; for (int i = 0; i < 8; i++) { min.x[i] = std::min(a.x[i], b.x[i]); diff --git a/src/common/bitmap.h b/src/common/bitmap.h index 4f14d8b37..d8d53c8db 100644 --- a/src/common/bitmap.h +++ b/src/common/bitmap.h @@ -42,7 +42,7 @@ struct BitMap { inline void InitFromBool(const std::vector& vec) { this->Resize(vec.size()); // parallel over the full cases - bst_omp_uint nsize = static_cast(vec.size() / 32); + auto nsize = static_cast(vec.size() / 32); #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < nsize; ++i) { uint32_t res = 0; diff --git a/src/common/column_matrix.h b/src/common/column_matrix.h index cbf3a368b..c8363054c 100644 --- a/src/common/column_matrix.h +++ b/src/common/column_matrix.h @@ -8,21 +8,27 @@ #ifndef XGBOOST_COMMON_COLUMN_MATRIX_H_ #define XGBOOST_COMMON_COLUMN_MATRIX_H_ -#define XGBOOST_TYPE_SWITCH(dtype, OP) \ -switch (dtype) { \ - case xgboost::common::uint32 : { \ - typedef uint32_t DType; \ - OP; break; \ - } \ - case xgboost::common::uint16 : { \ - typedef uint16_t DType; \ - OP; break; \ - } \ - case xgboost::common::uint8 : { \ - typedef uint8_t DType; \ - OP; break; \ - default: LOG(FATAL) << "don't recognize type flag" << dtype; \ - } \ +#define XGBOOST_TYPE_SWITCH(dtype, OP) \ + \ +switch(dtype) { \ + case xgboost::common::uint32: { \ + using DType = uint32_t; \ + OP; \ + break; \ + } \ + case xgboost::common::uint16: { \ + using DType = uint16_t; \ + OP; \ + break; \ + } \ + case xgboost::common::uint8: { \ + using DType = uint8_t; \ + OP; \ + break; \ + default: \ + LOG(FATAL) << "don't recognize type flag" << dtype; \ + } \ + \ } #include @@ -31,11 +37,12 @@ switch (dtype) { \ #include "hist_util.h" #include "../tree/fast_hist_param.h" -using xgboost::tree::FastHistParam; namespace xgboost { namespace common { +using tree::FastHistParam; + /*! \brief indicator of data type used for storing bin id's in a column. */ enum DataType { uint8 = 1, @@ -78,7 +85,7 @@ class ColumnMatrix { slot of internal buffer. */ packing_factor_ = sizeof(uint32_t) / static_cast(this->dtype); - const bst_uint nfeature = static_cast(gmat.cut->row_ptr.size() - 1); + const auto nfeature = static_cast(gmat.cut->row_ptr.size() - 1); const size_t nrow = gmat.row_ptr.size() - 1; // identify type of each column diff --git a/src/common/common.cc b/src/common/common.cc index f53ff752f..fdada302d 100644 --- a/src/common/common.cc +++ b/src/common/common.cc @@ -14,7 +14,7 @@ struct RandomThreadLocalEntry { GlobalRandomEngine engine; }; -typedef dmlc::ThreadLocalStore RandomThreadLocalStore; +using RandomThreadLocalStore = dmlc::ThreadLocalStore; GlobalRandomEngine& GlobalRandom() { return RandomThreadLocalStore::Get()->engine; diff --git a/src/common/compressed_iterator.h b/src/common/compressed_iterator.h index 42f75b81f..512b75fbf 100644 --- a/src/common/compressed_iterator.h +++ b/src/common/compressed_iterator.h @@ -11,20 +11,20 @@ namespace xgboost { namespace common { -typedef unsigned char compressed_byte_t; +using CompressedByteT = unsigned char; namespace detail { -inline void SetBit(compressed_byte_t *byte, int bit_idx) { +inline void SetBit(CompressedByteT *byte, int bit_idx) { *byte |= 1 << bit_idx; } template inline T CheckBit(const T &byte, int bit_idx) { return byte & (1 << bit_idx); } -inline void ClearBit(compressed_byte_t *byte, int bit_idx) { +inline void ClearBit(CompressedByteT *byte, int bit_idx) { *byte &= ~(1 << bit_idx); } -static const int padding = 4; // Assign padding so we can read slightly off +static const int kPadding = 4; // Assign padding so we can read slightly off // the beginning of the array // The number of bits required to represent a given unsigned range @@ -76,16 +76,16 @@ class CompressedBufferWriter { size_t compressed_size = static_cast(std::ceil( static_cast(detail::SymbolBits(num_symbols) * num_elements) / bits_per_byte)); - return compressed_size + detail::padding; + return compressed_size + detail::kPadding; } template - void WriteSymbol(compressed_byte_t *buffer, T symbol, size_t offset) { + void WriteSymbol(CompressedByteT *buffer, T symbol, size_t offset) { const int bits_per_byte = 8; for (size_t i = 0; i < symbol_bits_; i++) { size_t byte_idx = ((offset + 1) * symbol_bits_ - (i + 1)) / bits_per_byte; - byte_idx += detail::padding; + byte_idx += detail::kPadding; size_t bit_idx = ((bits_per_byte + i) - ((offset + 1) * symbol_bits_)) % bits_per_byte; @@ -96,20 +96,20 @@ class CompressedBufferWriter { } } } - template - void Write(compressed_byte_t *buffer, iter_t input_begin, iter_t input_end) { + template + void Write(CompressedByteT *buffer, IterT input_begin, IterT input_end) { uint64_t tmp = 0; size_t stored_bits = 0; const size_t max_stored_bits = 64 - symbol_bits_; - size_t buffer_position = detail::padding; + size_t buffer_position = detail::kPadding; const size_t num_symbols = input_end - input_begin; for (size_t i = 0; i < num_symbols; i++) { - typename std::iterator_traits::value_type symbol = input_begin[i]; + typename std::iterator_traits::value_type symbol = input_begin[i]; if (stored_bits > max_stored_bits) { // Eject only full bytes size_t tmp_bytes = stored_bits / 8; for (size_t j = 0; j < tmp_bytes; j++) { - buffer[buffer_position] = static_cast( + buffer[buffer_position] = static_cast( tmp >> (stored_bits - (j + 1) * 8)); buffer_position++; } @@ -129,10 +129,10 @@ class CompressedBufferWriter { int shift_bits = static_cast(stored_bits) - (j + 1) * 8; if (shift_bits >= 0) { buffer[buffer_position] = - static_cast(tmp >> shift_bits); + static_cast(tmp >> shift_bits); } else { buffer[buffer_position] = - static_cast(tmp << std::abs(shift_bits)); + static_cast(tmp << std::abs(shift_bits)); } buffer_position++; } @@ -153,23 +153,21 @@ template class CompressedIterator { public: - typedef CompressedIterator self_type; ///< My own type - typedef ptrdiff_t - difference_type; ///< Type to express the result of subtracting - /// one iterator from another - typedef T value_type; ///< The type of the element the iterator can point to - typedef value_type *pointer; ///< The type of a pointer to an element the - /// iterator can point to - typedef value_type reference; ///< The type of a reference to an element the - /// iterator can point to + // Type definitions for thrust + typedef CompressedIterator self_type; // NOLINT + typedef ptrdiff_t difference_type; // NOLINT + typedef T value_type; // NOLINT + typedef value_type *pointer; // NOLINT + typedef value_type reference; // NOLINT + private: - compressed_byte_t *buffer_; + CompressedByteT *buffer_; size_t symbol_bits_; size_t offset_; public: CompressedIterator() : buffer_(nullptr), symbol_bits_(0), offset_(0) {} - CompressedIterator(compressed_byte_t *buffer, int num_symbols) + CompressedIterator(CompressedByteT *buffer, int num_symbols) : buffer_(buffer), offset_(0) { symbol_bits_ = detail::SymbolBits(num_symbols); } @@ -178,7 +176,7 @@ class CompressedIterator { const int bits_per_byte = 8; size_t start_bit_idx = ((offset_ + 1) * symbol_bits_ - 1); size_t start_byte_idx = start_bit_idx / bits_per_byte; - start_byte_idx += detail::padding; + start_byte_idx += detail::kPadding; // Read 5 bytes - the maximum we will need uint64_t tmp = static_cast(buffer_[start_byte_idx - 4]) << 32 | diff --git a/src/common/config.h b/src/common/config.h index 7385dff4e..68d857cad 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -24,33 +24,33 @@ class ConfigReaderBase { * \brief get current name, called after Next returns true * \return current parameter name */ - inline const char *name(void) const { - return s_name.c_str(); + inline const char *Name() const { + return s_name_.c_str(); } /*! * \brief get current value, called after Next returns true * \return current parameter value */ - inline const char *val(void) const { - return s_val.c_str(); + inline const char *Val() const { + return s_val_.c_str(); } /*! * \brief move iterator to next position * \return true if there is value in next position */ - inline bool Next(void) { + inline bool Next() { while (!this->IsEnd()) { - GetNextToken(&s_name); - if (s_name == "=") return false; - if (GetNextToken(&s_buf) || s_buf != "=") return false; - if (GetNextToken(&s_val) || s_val == "=") return false; + GetNextToken(&s_name_); + if (s_name_ == "=") return false; + if (GetNextToken(&s_buf_) || s_buf_ != "=") return false; + if (GetNextToken(&s_val_) || s_val_ == "=") return false; return true; } return false; } // called before usage - inline void Init(void) { - ch_buf = this->GetChar(); + inline void Init() { + ch_buf_ = this->GetChar(); } protected: @@ -58,38 +58,38 @@ class ConfigReaderBase { * \brief to be implemented by subclass, * get next token, return EOF if end of file */ - virtual char GetChar(void) = 0; + virtual char GetChar() = 0; /*! \brief to be implemented by child, check if end of stream */ - virtual bool IsEnd(void) = 0; + virtual bool IsEnd() = 0; private: - char ch_buf; - std::string s_name, s_val, s_buf; + char ch_buf_; + std::string s_name_, s_val_, s_buf_; - inline void SkipLine(void) { + inline void SkipLine() { do { - ch_buf = this->GetChar(); - } while (ch_buf != EOF && ch_buf != '\n' && ch_buf != '\r'); + ch_buf_ = this->GetChar(); + } while (ch_buf_ != EOF && ch_buf_ != '\n' && ch_buf_ != '\r'); } inline void ParseStr(std::string *tok) { - while ((ch_buf = this->GetChar()) != EOF) { - switch (ch_buf) { + while ((ch_buf_ = this->GetChar()) != EOF) { + switch (ch_buf_) { case '\\': *tok += this->GetChar(); break; case '\"': return; case '\r': case '\n': LOG(FATAL)<< "ConfigReader: unterminated string"; - default: *tok += ch_buf; + default: *tok += ch_buf_; } } LOG(FATAL) << "ConfigReader: unterminated string"; } inline void ParseStrML(std::string *tok) { - while ((ch_buf = this->GetChar()) != EOF) { - switch (ch_buf) { + while ((ch_buf_ = this->GetChar()) != EOF) { + switch (ch_buf_) { case '\\': *tok += this->GetChar(); break; case '\'': return; - default: *tok += ch_buf; + default: *tok += ch_buf_; } } LOG(FATAL) << "unterminated string"; @@ -98,24 +98,24 @@ class ConfigReaderBase { inline bool GetNextToken(std::string *tok) { tok->clear(); bool new_line = false; - while (ch_buf != EOF) { - switch (ch_buf) { + while (ch_buf_ != EOF) { + switch (ch_buf_) { case '#' : SkipLine(); new_line = true; break; case '\"': if (tok->length() == 0) { - ParseStr(tok); ch_buf = this->GetChar(); return new_line; + ParseStr(tok); ch_buf_ = this->GetChar(); return new_line; } else { LOG(FATAL) << "ConfigReader: token followed directly by string"; } case '\'': if (tok->length() == 0) { - ParseStrML(tok); ch_buf = this->GetChar(); return new_line; + ParseStrML(tok); ch_buf_ = this->GetChar(); return new_line; } else { LOG(FATAL) << "ConfigReader: token followed directly by string"; } case '=': if (tok->length() == 0) { - ch_buf = this->GetChar(); + ch_buf_ = this->GetChar(); *tok = '='; } return new_line; @@ -124,12 +124,12 @@ class ConfigReaderBase { if (tok->length() == 0) new_line = true; case '\t': case ' ' : - ch_buf = this->GetChar(); + ch_buf_ = this->GetChar(); if (tok->length() != 0) return new_line; break; default: - *tok += ch_buf; - ch_buf = this->GetChar(); + *tok += ch_buf_; + ch_buf_ = this->GetChar(); break; } } @@ -149,19 +149,19 @@ class ConfigStreamReader: public ConfigReaderBase { * \brief constructor * \param fin istream input stream */ - explicit ConfigStreamReader(std::istream &fin) : fin(fin) {} + explicit ConfigStreamReader(std::istream &fin) : fin_(fin) {} protected: - virtual char GetChar(void) { - return fin.get(); + char GetChar() override { + return fin_.get(); } /*! \brief to be implemented by child, check if end of stream */ - virtual bool IsEnd(void) { - return fin.eof(); + bool IsEnd() override { + return fin_.eof(); } private: - std::istream &fin; + std::istream &fin_; }; /*! @@ -173,20 +173,20 @@ class ConfigIterator: public ConfigStreamReader { * \brief constructor * \param fname name of configure file */ - explicit ConfigIterator(const char *fname) : ConfigStreamReader(fi) { - fi.open(fname); - if (fi.fail()) { + explicit ConfigIterator(const char *fname) : ConfigStreamReader(fi_) { + fi_.open(fname); + if (fi_.fail()) { LOG(FATAL) << "cannot open file " << fname; } ConfigReaderBase::Init(); } /*! \brief destructor */ - ~ConfigIterator(void) { - fi.close(); + ~ConfigIterator() { + fi_.close(); } private: - std::ifstream fi; + std::ifstream fi_; }; } // namespace common } // namespace xgboost diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh index a171d15d3..2413e065a 100644 --- a/src/common/device_helpers.cuh +++ b/src/common/device_helpers.cuh @@ -25,16 +25,16 @@ namespace dh { -#define HOST_DEV_INLINE __host__ __device__ __forceinline__ +#define HOST_DEV_INLINE XGBOOST_DEVICE __forceinline__ #define DEV_INLINE __device__ __forceinline__ /* * Error handling functions */ -#define safe_cuda(ans) throw_on_cuda_error((ans), __FILE__, __LINE__) +#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__) -inline cudaError_t throw_on_cuda_error(cudaError_t code, const char *file, +inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file, int line) { if (code != cudaSuccess) { std::stringstream ss; @@ -48,9 +48,9 @@ inline cudaError_t throw_on_cuda_error(cudaError_t code, const char *file, } #ifdef XGBOOST_USE_NCCL -#define safe_nccl(ans) throw_on_nccl_error((ans), __FILE__, __LINE__) +#define safe_nccl(ans) ThrowOnNcclError((ans), __FILE__, __LINE__) -inline ncclResult_t throw_on_nccl_error(ncclResult_t code, const char *file, +inline ncclResult_t ThrowOnNcclError(ncclResult_t code, const char *file, int line) { if (code != ncclSuccess) { std::stringstream ss; @@ -64,16 +64,16 @@ inline ncclResult_t throw_on_nccl_error(ncclResult_t code, const char *file, #endif template -T *raw(thrust::device_vector &v) { // NOLINT +T *Raw(thrust::device_vector &v) { // NOLINT return raw_pointer_cast(v.data()); } template -const T *raw(const thrust::device_vector &v) { // NOLINT +const T *Raw(const thrust::device_vector &v) { // NOLINT return raw_pointer_cast(v.data()); } -inline int n_visible_devices() { +inline int NVisibleDevices() { int n_visgpus = 0; dh::safe_cuda(cudaGetDeviceCount(&n_visgpus)); @@ -81,40 +81,40 @@ inline int n_visible_devices() { return n_visgpus; } -inline int n_devices_all(int n_gpus) { - int n_devices_visible = dh::n_visible_devices(); +inline int NDevicesAll(int n_gpus) { + int n_devices_visible = dh::NVisibleDevices(); int n_devices = n_gpus < 0 ? n_devices_visible : n_gpus; return (n_devices); } -inline int n_devices(int n_gpus, int num_rows) { - int n_devices = dh::n_devices_all(n_gpus); +inline int NDevices(int n_gpus, int num_rows) { + int n_devices = dh::NDevicesAll(n_gpus); // fix-up device number to be limited by number of rows n_devices = n_devices > num_rows ? num_rows : n_devices; return (n_devices); } // if n_devices=-1, then use all visible devices -inline void synchronize_n_devices(int n_devices, std::vector dList) { +inline void SynchronizeNDevices(int n_devices, std::vector dList) { for (int d_idx = 0; d_idx < n_devices; d_idx++) { int device_idx = dList[d_idx]; safe_cuda(cudaSetDevice(device_idx)); safe_cuda(cudaDeviceSynchronize()); } } -inline void synchronize_all() { - for (int device_idx = 0; device_idx < n_visible_devices(); device_idx++) { +inline void SynchronizeAll() { + for (int device_idx = 0; device_idx < NVisibleDevices(); device_idx++) { safe_cuda(cudaSetDevice(device_idx)); safe_cuda(cudaDeviceSynchronize()); } } -inline std::string device_name(int device_idx) { +inline std::string DeviceName(int device_idx) { cudaDeviceProp prop; dh::safe_cuda(cudaGetDeviceProperties(&prop, device_idx)); return std::string(prop.name); } -inline size_t available_memory(int device_idx) { +inline size_t AvailableMemory(int device_idx) { size_t device_free = 0; size_t device_total = 0; safe_cuda(cudaSetDevice(device_idx)); @@ -130,20 +130,20 @@ inline size_t available_memory(int device_idx) { * \param device_idx Zero-based index of the device. */ -inline size_t max_shared_memory(int device_idx) { +inline size_t MaxSharedMemory(int device_idx) { cudaDeviceProp prop; dh::safe_cuda(cudaGetDeviceProperties(&prop, device_idx)); return prop.sharedMemPerBlock; } // ensure gpu_id is correct, so not dependent upon user knowing details -inline int get_device_idx(int gpu_id) { +inline int GetDeviceIdx(int gpu_id) { // protect against overrun for gpu_id - return (std::abs(gpu_id) + 0) % dh::n_visible_devices(); + return (std::abs(gpu_id) + 0) % dh::NVisibleDevices(); } -inline void check_compute_capability() { - int n_devices = n_visible_devices(); +inline void CheckComputeCapability() { + int n_devices = NVisibleDevices(); for (int d_idx = 0; d_idx < n_devices; ++d_idx) { cudaDeviceProp prop; safe_cuda(cudaGetDeviceProperties(&prop, d_idx)); @@ -159,72 +159,72 @@ inline void check_compute_capability() { * Range iterator */ -class range { +class Range { public: - class iterator { - friend class range; + class Iterator { + friend class Range; public: - __host__ __device__ int64_t operator*() const { return i_; } - __host__ __device__ const iterator &operator++() { + XGBOOST_DEVICE int64_t operator*() const { return i_; } + XGBOOST_DEVICE const Iterator &operator++() { i_ += step_; return *this; } - __host__ __device__ iterator operator++(int) { - iterator copy(*this); + XGBOOST_DEVICE Iterator operator++(int) { + Iterator copy(*this); i_ += step_; return copy; } - __host__ __device__ bool operator==(const iterator &other) const { + XGBOOST_DEVICE bool operator==(const Iterator &other) const { return i_ >= other.i_; } - __host__ __device__ bool operator!=(const iterator &other) const { + XGBOOST_DEVICE bool operator!=(const Iterator &other) const { return i_ < other.i_; } - __host__ __device__ void step(int s) { step_ = s; } + XGBOOST_DEVICE void Step(int s) { step_ = s; } protected: - __host__ __device__ explicit iterator(int64_t start) : i_(start) {} + XGBOOST_DEVICE explicit Iterator(int64_t start) : i_(start) {} public: uint64_t i_; int step_ = 1; }; - __host__ __device__ iterator begin() const { return begin_; } - __host__ __device__ iterator end() const { return end_; } - __host__ __device__ range(int64_t begin, int64_t end) + XGBOOST_DEVICE Iterator begin() const { return begin_; } // NOLINT + XGBOOST_DEVICE Iterator end() const { return end_; } // NOLINT + XGBOOST_DEVICE Range(int64_t begin, int64_t end) : begin_(begin), end_(end) {} - __host__ __device__ void step(int s) { begin_.step(s); } + XGBOOST_DEVICE void Step(int s) { begin_.Step(s); } private: - iterator begin_; - iterator end_; + Iterator begin_; + Iterator end_; }; template -__device__ range grid_stride_range(T begin, T end) { +__device__ Range GridStrideRange(T begin, T end) { begin += blockDim.x * blockIdx.x + threadIdx.x; - range r(begin, end); - r.step(gridDim.x * blockDim.x); + Range r(begin, end); + r.Step(gridDim.x * blockDim.x); return r; } template -__device__ range block_stride_range(T begin, T end) { +__device__ Range BlockStrideRange(T begin, T end) { begin += threadIdx.x; - range r(begin, end); - r.step(blockDim.x); + Range r(begin, end); + r.Step(blockDim.x); return r; } // Threadblock iterates over range, filling with value. Requires all threads in // block to be active. template -__device__ void block_fill(IterT begin, size_t n, ValueT value) { - for (auto i : block_stride_range(static_cast(0), n)) { +__device__ void BlockFill(IterT begin, size_t n, ValueT value) { + for (auto i : BlockStrideRange(static_cast(0), n)) { begin[i] = value; } } @@ -234,34 +234,34 @@ __device__ void block_fill(IterT begin, size_t n, ValueT value) { */ template -T1 div_round_up(const T1 a, const T2 b) { +T1 DivRoundUp(const T1 a, const T2 b) { return static_cast(ceil(static_cast(a) / b)); } template -__global__ void launch_n_kernel(size_t begin, size_t end, L lambda) { - for (auto i : grid_stride_range(begin, end)) { +__global__ void LaunchNKernel(size_t begin, size_t end, L lambda) { + for (auto i : GridStrideRange(begin, end)) { lambda(i); } } template -__global__ void launch_n_kernel(int device_idx, size_t begin, size_t end, +__global__ void LaunchNKernel(int device_idx, size_t begin, size_t end, L lambda) { - for (auto i : grid_stride_range(begin, end)) { + for (auto i : GridStrideRange(begin, end)) { lambda(i, device_idx); } } template -inline void launch_n(int device_idx, size_t n, L lambda) { +inline void LaunchN(int device_idx, size_t n, L lambda) { if (n == 0) { return; } safe_cuda(cudaSetDevice(device_idx)); const int GRID_SIZE = - static_cast(div_round_up(n, ITEMS_PER_THREAD * BLOCK_THREADS)); - launch_n_kernel<<>>(static_cast(0), n, + static_cast(DivRoundUp(n, ITEMS_PER_THREAD * BLOCK_THREADS)); + LaunchNKernel<<>>(static_cast(0), n, lambda); } @@ -269,91 +269,91 @@ inline void launch_n(int device_idx, size_t n, L lambda) { * Memory */ -enum memory_type { DEVICE, DEVICE_MANAGED }; +enum MemoryType { kDevice, kDeviceManaged }; -template -class bulk_allocator; +template +class BulkAllocator; template -class dvec2; +class DVec2; template -class dvec { - friend class dvec2; +class DVec { + friend class DVec2; private: - T *_ptr; - size_t _size; - int _device_idx; + T *ptr_; + size_t size_; + int device_idx_; public: - void external_allocate(int device_idx, void *ptr, size_t size) { - if (!empty()) { - throw std::runtime_error("Tried to allocate dvec but already allocated"); + void ExternalAllocate(int device_idx, void *ptr, size_t size) { + if (!Empty()) { + throw std::runtime_error("Tried to allocate DVec but already allocated"); } - _ptr = static_cast(ptr); - _size = size; - _device_idx = device_idx; - safe_cuda(cudaSetDevice(_device_idx)); + ptr_ = static_cast(ptr); + size_ = size; + device_idx_ = device_idx; + safe_cuda(cudaSetDevice(device_idx_)); } - dvec() : _ptr(NULL), _size(0), _device_idx(-1) {} - size_t size() const { return _size; } - int device_idx() const { return _device_idx; } - bool empty() const { return _ptr == NULL || _size == 0; } + DVec() : ptr_(NULL), size_(0), device_idx_(-1) {} + size_t Size() const { return size_; } + int DeviceIdx() const { return device_idx_; } + bool Empty() const { return ptr_ == NULL || size_ == 0; } - T *data() { return _ptr; } + T *Data() { return ptr_; } - const T *data() const { return _ptr; } + const T *Data() const { return ptr_; } - std::vector as_vector() const { - std::vector h_vector(size()); - safe_cuda(cudaSetDevice(_device_idx)); - safe_cuda(cudaMemcpy(h_vector.data(), _ptr, size() * sizeof(T), + std::vector AsVector() const { + std::vector h_vector(Size()); + safe_cuda(cudaSetDevice(device_idx_)); + safe_cuda(cudaMemcpy(h_vector.data(), ptr_, Size() * sizeof(T), cudaMemcpyDeviceToHost)); return h_vector; } - void fill(T value) { - auto d_ptr = _ptr; - launch_n(_device_idx, size(), + void Fill(T value) { + auto d_ptr = ptr_; + LaunchN(device_idx_, Size(), [=] __device__(size_t idx) { d_ptr[idx] = value; }); } - void print() { - auto h_vector = this->as_vector(); + void Print() { + auto h_vector = this->AsVector(); for (auto e : h_vector) { std::cout << e << " "; } std::cout << "\n"; } - thrust::device_ptr tbegin() { return thrust::device_pointer_cast(_ptr); } + thrust::device_ptr tbegin() { return thrust::device_pointer_cast(ptr_); } thrust::device_ptr tend() { - return thrust::device_pointer_cast(_ptr + size()); + return thrust::device_pointer_cast(ptr_ + Size()); } template - dvec &operator=(const std::vector &other) { + DVec &operator=(const std::vector &other) { this->copy(other.begin(), other.end()); return *this; } - dvec &operator=(dvec &other) { - if (other.size() != size()) { + DVec &operator=(DVec &other) { + if (other.Size() != Size()) { throw std::runtime_error( - "Cannot copy assign dvec to dvec, sizes are different"); + "Cannot copy assign DVec to DVec, sizes are different"); } - safe_cuda(cudaSetDevice(this->device_idx())); - if (other.device_idx() == this->device_idx()) { - dh::safe_cuda(cudaMemcpy(this->data(), other.data(), - other.size() * sizeof(T), + safe_cuda(cudaSetDevice(this->DeviceIdx())); + if (other.DeviceIdx() == this->DeviceIdx()) { + dh::safe_cuda(cudaMemcpy(this->Data(), other.Data(), + other.Size() * sizeof(T), cudaMemcpyDeviceToDevice)); } else { - std::cout << "deviceother: " << other.device_idx() - << " devicethis: " << this->device_idx() << std::endl; - std::cout << "size deviceother: " << other.size() - << " devicethis: " << this->device_idx() << std::endl; + std::cout << "deviceother: " << other.DeviceIdx() + << " devicethis: " << this->DeviceIdx() << std::endl; + std::cout << "size deviceother: " << other.Size() + << " devicethis: " << this->DeviceIdx() << std::endl; throw std::runtime_error("Cannot copy to/from different devices"); } @@ -362,177 +362,178 @@ class dvec { template void copy(IterT begin, IterT end) { - safe_cuda(cudaSetDevice(this->device_idx())); - if (end - begin != size()) { + safe_cuda(cudaSetDevice(this->DeviceIdx())); + if (end - begin != Size()) { throw std::runtime_error( - "Cannot copy assign vector to dvec, sizes are different"); + "Cannot copy assign vector to DVec, sizes are different"); } thrust::copy(begin, end, this->tbegin()); } void copy(thrust::device_ptr begin, thrust::device_ptr end) { - safe_cuda(cudaSetDevice(this->device_idx())); - if (end - begin != size()) { + safe_cuda(cudaSetDevice(this->DeviceIdx())); + if (end - begin != Size()) { throw std::runtime_error( - "Cannot copy assign vector to dvec, sizes are different"); + "Cannot copy assign vector to DVec, sizes are different"); } - safe_cuda(cudaMemcpy(this->data(), begin.get(), - size() * sizeof(T), cudaMemcpyDefault)); + safe_cuda(cudaMemcpy(this->Data(), begin.get(), + Size() * sizeof(T), cudaMemcpyDefault)); } }; /** - * @class dvec2 device_helpers.cuh - * @brief wrapper for storing 2 dvec's which are needed for cub::DoubleBuffer + * @class DVec2 device_helpers.cuh + * @brief wrapper for storing 2 DVec's which are needed for cub::DoubleBuffer */ template -class dvec2 { +class DVec2 { private: - dvec _d1, _d2; - cub::DoubleBuffer _buff; - int _device_idx; + DVec d1_, d2_; + cub::DoubleBuffer buff_; + int device_idx_; public: - void external_allocate(int device_idx, void *ptr1, void *ptr2, size_t size) { - if (!empty()) { - throw std::runtime_error("Tried to allocate dvec2 but already allocated"); + void ExternalAllocate(int device_idx, void *ptr1, void *ptr2, size_t size) { + if (!Empty()) { + throw std::runtime_error("Tried to allocate DVec2 but already allocated"); } - _device_idx = device_idx; - _d1.external_allocate(_device_idx, ptr1, size); - _d2.external_allocate(_device_idx, ptr2, size); - _buff.d_buffers[0] = static_cast(ptr1); - _buff.d_buffers[1] = static_cast(ptr2); - _buff.selector = 0; + device_idx_ = device_idx; + d1_.ExternalAllocate(device_idx_, ptr1, size); + d2_.ExternalAllocate(device_idx_, ptr2, size); + buff_.d_buffers[0] = static_cast(ptr1); + buff_.d_buffers[1] = static_cast(ptr2); + buff_.selector = 0; } - dvec2() : _d1(), _d2(), _buff(), _device_idx(-1) {} + DVec2() : d1_(), d2_(), buff_(), device_idx_(-1) {} - size_t size() const { return _d1.size(); } - int device_idx() const { return _device_idx; } - bool empty() const { return _d1.empty() || _d2.empty(); } + size_t Size() const { return d1_.Size(); } + int DeviceIdx() const { return device_idx_; } + bool Empty() const { return d1_.Empty() || d2_.Empty(); } - cub::DoubleBuffer &buff() { return _buff; } + cub::DoubleBuffer &buff() { return buff_; } - dvec &d1() { return _d1; } - dvec &d2() { return _d2; } + DVec &D1() { return d1_; } - T *current() { return _buff.Current(); } + DVec &D2() { return d2_; } - dvec ¤t_dvec() { return _buff.selector == 0 ? d1() : d2(); } + T *Current() { return buff_.Current(); } - T *other() { return _buff.Alternate(); } + DVec &CurrentDVec() { return buff_.selector == 0 ? D1() : D2(); } + + T *other() { return buff_.Alternate(); } }; -template -class bulk_allocator { - std::vector d_ptr; - std::vector _size; - std::vector _device_idx; +template +class BulkAllocator { + std::vector d_ptr_; + std::vector size_; + std::vector device_idx_; - const int align = 256; + static const int kAlign = 256; - size_t align_round_up(size_t n) const { - n = (n + align - 1) / align; - return n * align; + size_t AlignRoundUp(size_t n) const { + n = (n + kAlign - 1) / kAlign; + return n * kAlign; } template - size_t get_size_bytes(dvec *first_vec, size_t first_size) { - return align_round_up(first_size * sizeof(T)); + size_t GetSizeBytes(DVec *first_vec, size_t first_size) { + return AlignRoundUp(first_size * sizeof(T)); } template - size_t get_size_bytes(dvec *first_vec, size_t first_size, Args... args) { - return get_size_bytes(first_vec, first_size) + get_size_bytes(args...); + size_t GetSizeBytes(DVec *first_vec, size_t first_size, Args... args) { + return GetSizeBytes(first_vec, first_size) + GetSizeBytes(args...); } template - void allocate_dvec(int device_idx, char *ptr, dvec *first_vec, + void AllocateDVec(int device_idx, char *ptr, DVec *first_vec, size_t first_size) { - first_vec->external_allocate(device_idx, static_cast(ptr), + first_vec->ExternalAllocate(device_idx, static_cast(ptr), first_size); } template - void allocate_dvec(int device_idx, char *ptr, dvec *first_vec, + void AllocateDVec(int device_idx, char *ptr, DVec *first_vec, size_t first_size, Args... args) { - allocate_dvec(device_idx, ptr, first_vec, first_size); - ptr += align_round_up(first_size * sizeof(T)); - allocate_dvec(device_idx, ptr, args...); + AllocateDVec(device_idx, ptr, first_vec, first_size); + ptr += AlignRoundUp(first_size * sizeof(T)); + AllocateDVec(device_idx, ptr, args...); } - char *allocate_device(int device_idx, size_t bytes, memory_type t) { + char *AllocateDevice(int device_idx, size_t bytes, MemoryType t) { char *ptr; safe_cuda(cudaSetDevice(device_idx)); safe_cuda(cudaMalloc(&ptr, bytes)); return ptr; } template - size_t get_size_bytes(dvec2 *first_vec, size_t first_size) { - return 2 * align_round_up(first_size * sizeof(T)); + size_t GetSizeBytes(DVec2 *first_vec, size_t first_size) { + return 2 * AlignRoundUp(first_size * sizeof(T)); } template - size_t get_size_bytes(dvec2 *first_vec, size_t first_size, Args... args) { - return get_size_bytes(first_vec, first_size) + get_size_bytes(args...); + size_t GetSizeBytes(DVec2 *first_vec, size_t first_size, Args... args) { + return GetSizeBytes(first_vec, first_size) + GetSizeBytes(args...); } template - void allocate_dvec(int device_idx, char *ptr, dvec2 *first_vec, + void AllocateDVec(int device_idx, char *ptr, DVec2 *first_vec, size_t first_size) { - first_vec->external_allocate( + first_vec->ExternalAllocate( device_idx, static_cast(ptr), - static_cast(ptr + align_round_up(first_size * sizeof(T))), + static_cast(ptr + AlignRoundUp(first_size * sizeof(T))), first_size); } template - void allocate_dvec(int device_idx, char *ptr, dvec2 *first_vec, + void AllocateDVec(int device_idx, char *ptr, DVec2 *first_vec, size_t first_size, Args... args) { - allocate_dvec(device_idx, ptr, first_vec, first_size); - ptr += (align_round_up(first_size * sizeof(T)) * 2); - allocate_dvec(device_idx, ptr, args...); + AllocateDVec(device_idx, ptr, first_vec, first_size); + ptr += (AlignRoundUp(first_size * sizeof(T)) * 2); + AllocateDVec(device_idx, ptr, args...); } public: - bulk_allocator() {} + BulkAllocator() = default; // prevent accidental copying, moving or assignment of this object - bulk_allocator(const bulk_allocator&) = delete; - bulk_allocator(bulk_allocator&&) = delete; - void operator=(const bulk_allocator&) = delete; - void operator=(bulk_allocator&&) = delete; + BulkAllocator(const BulkAllocator&) = delete; + BulkAllocator(BulkAllocator&&) = delete; + void operator=(const BulkAllocator&) = delete; + void operator=(BulkAllocator&&) = delete; - ~bulk_allocator() { - for (size_t i = 0; i < d_ptr.size(); i++) { - if (!(d_ptr[i] == nullptr)) { - safe_cuda(cudaSetDevice(_device_idx[i])); - safe_cuda(cudaFree(d_ptr[i])); - d_ptr[i] = nullptr; + ~BulkAllocator() { + for (size_t i = 0; i < d_ptr_.size(); i++) { + if (!(d_ptr_[i] == nullptr)) { + safe_cuda(cudaSetDevice(device_idx_[i])); + safe_cuda(cudaFree(d_ptr_[i])); + d_ptr_[i] = nullptr; } } } // returns sum of bytes for all allocations - size_t size() { - return std::accumulate(_size.begin(), _size.end(), static_cast(0)); + size_t Size() { + return std::accumulate(size_.begin(), size_.end(), static_cast(0)); } template - void allocate(int device_idx, bool silent, Args... args) { - size_t size = get_size_bytes(args...); + void Allocate(int device_idx, bool silent, Args... args) { + size_t size = GetSizeBytes(args...); - char *ptr = allocate_device(device_idx, size, MemoryT); + char *ptr = AllocateDevice(device_idx, size, MemoryT); - allocate_dvec(device_idx, ptr, args...); + AllocateDVec(device_idx, ptr, args...); - d_ptr.push_back(ptr); - _size.push_back(size); - _device_idx.push_back(device_idx); + d_ptr_.push_back(ptr); + size_.push_back(size); + device_idx_.push_back(device_idx); if (!silent) { const int mb_size = 1048576; LOG(CONSOLE) << "Allocated " << size / mb_size << "MB on [" << device_idx - << "] " << device_name(device_idx) << ", " - << available_memory(device_idx) / mb_size << "MB remaining."; + << "] " << DeviceName(device_idx) << ", " + << AvailableMemory(device_idx) / mb_size << "MB remaining."; } } }; @@ -543,7 +544,7 @@ struct CubMemory { size_t temp_storage_bytes; // Thrust - typedef char value_type; + using ValueT = char; CubMemory() : d_temp_storage(nullptr), temp_storage_bytes(0) {} @@ -568,17 +569,18 @@ struct CubMemory { } } // Thrust - char *allocate(std::ptrdiff_t num_bytes) { + char *allocate(std::ptrdiff_t num_bytes) { // NOLINT LazyAllocate(num_bytes); return reinterpret_cast(d_temp_storage); } // Thrust - void deallocate(char *ptr, size_t n) { + void deallocate(char *ptr, size_t n) { // NOLINT + // Do nothing } - bool IsAllocated() { return d_temp_storage != NULL; } + bool IsAllocated() { return d_temp_storage != nullptr; } }; /* @@ -586,7 +588,7 @@ struct CubMemory { */ template -void print(const dvec &v, size_t max_items = 10) { +void Print(const DVec &v, size_t max_items = 10) { std::vector h = v.as_vector(); for (size_t i = 0; i < std::min(max_items, h.size()); i++) { std::cout << " " << h[i]; @@ -609,14 +611,14 @@ void print(const dvec &v, size_t max_items = 10) { // Load balancing search -template -void FindMergePartitions(int device_idx, coordinate_t *d_tile_coordinates, - size_t num_tiles, int tile_size, segments_t segments, - offset_t num_rows, offset_t num_elements) { - dh::launch_n(device_idx, num_tiles + 1, [=] __device__(int idx) { - offset_t diagonal = idx * tile_size; - coordinate_t tile_coordinate; - cub::CountingInputIterator nonzero_indices(0); +template +void FindMergePartitions(int device_idx, CoordinateT *d_tile_coordinates, + size_t num_tiles, int tile_size, SegmentT segments, + OffsetT num_rows, OffsetT num_elements) { + dh::LaunchN(device_idx, num_tiles + 1, [=] __device__(int idx) { + OffsetT diagonal = idx * tile_size; + CoordinateT tile_coordinate; + cub::CountingInputIterator nonzero_indices(0); // Search the merge path // Cast to signed integer as this function can have negatives @@ -630,27 +632,27 @@ void FindMergePartitions(int device_idx, coordinate_t *d_tile_coordinates, } template -__global__ void LbsKernel(coordinate_t *d_coordinates, - segments_iter segment_end_offsets, func_t f, - offset_t num_segments) { + typename OffsetT, typename CoordinateT, typename FunctionT, + typename SegmentIterT> +__global__ void LbsKernel(CoordinateT *d_coordinates, + SegmentIterT segment_end_offsets, FunctionT f, + OffsetT num_segments) { int tile = blockIdx.x; - coordinate_t tile_start_coord = d_coordinates[tile]; - coordinate_t tile_end_coord = d_coordinates[tile + 1]; + CoordinateT tile_start_coord = d_coordinates[tile]; + CoordinateT tile_end_coord = d_coordinates[tile + 1]; int64_t tile_num_rows = tile_end_coord.x - tile_start_coord.x; int64_t tile_num_elements = tile_end_coord.y - tile_start_coord.y; - cub::CountingInputIterator tile_element_indices(tile_start_coord.y); - coordinate_t thread_start_coord; + cub::CountingInputIterator tile_element_indices(tile_start_coord.y); + CoordinateT thread_start_coord; - typedef typename std::iterator_traits::value_type segment_t; + typedef typename std::iterator_traits::value_type SegmentT; __shared__ struct { - segment_t tile_segment_end_offsets[TILE_SIZE + 1]; - segment_t output_segment[TILE_SIZE]; + SegmentT tile_segment_end_offsets[TILE_SIZE + 1]; + SegmentT output_segment[TILE_SIZE]; } temp_storage; - for (auto item : dh::block_stride_range(int(0), int(tile_num_rows + 1))) { + for (auto item : dh::BlockStrideRange(int(0), int(tile_num_rows + 1))) { temp_storage.tile_segment_end_offsets[item] = segment_end_offsets[min(static_cast(tile_start_coord.x + item), static_cast(num_segments - 1))]; @@ -665,7 +667,7 @@ __global__ void LbsKernel(coordinate_t *d_coordinates, tile_element_indices, // List B tile_num_rows, tile_num_elements, thread_start_coord); - coordinate_t thread_current_coord = thread_start_coord; + CoordinateT thread_current_coord = thread_start_coord; #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { if (tile_element_indices[thread_current_coord.y] < @@ -679,50 +681,50 @@ __global__ void LbsKernel(coordinate_t *d_coordinates, } __syncthreads(); - for (auto item : dh::block_stride_range(int(0), int(tile_num_elements))) { + for (auto item : dh::BlockStrideRange(int(0), int(tile_num_elements))) { f(tile_start_coord.y + item, temp_storage.output_segment[item]); } } -template +template void SparseTransformLbs(int device_idx, dh::CubMemory *temp_memory, - offset_t count, segments_iter segments, - offset_t num_segments, func_t f) { - typedef typename cub::CubVector::Type coordinate_t; + OffsetT count, SegmentIterT segments, + OffsetT num_segments, FunctionT f) { + typedef typename cub::CubVector::Type CoordinateT; dh::safe_cuda(cudaSetDevice(device_idx)); const int BLOCK_THREADS = 256; const int ITEMS_PER_THREAD = 1; const int TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD; - auto num_tiles = dh::div_round_up(count + num_segments, BLOCK_THREADS); + auto num_tiles = dh::DivRoundUp(count + num_segments, BLOCK_THREADS); CHECK(num_tiles < std::numeric_limits::max()); - temp_memory->LazyAllocate(sizeof(coordinate_t) * (num_tiles + 1)); - coordinate_t *tmp_tile_coordinates = - reinterpret_cast(temp_memory->d_temp_storage); + temp_memory->LazyAllocate(sizeof(CoordinateT) * (num_tiles + 1)); + CoordinateT *tmp_tile_coordinates = + reinterpret_cast(temp_memory->d_temp_storage); FindMergePartitions(device_idx, tmp_tile_coordinates, num_tiles, BLOCK_THREADS, segments, num_segments, count); - LbsKernel + LbsKernel <<>>(tmp_tile_coordinates, segments + 1, f, num_segments); } -template -void DenseTransformLbs(int device_idx, offset_t count, offset_t num_segments, - func_t f) { +template +void DenseTransformLbs(int device_idx, OffsetT count, OffsetT num_segments, + FunctionT f) { CHECK(count % num_segments == 0) << "Data is not dense."; - launch_n(device_idx, count, [=] __device__(offset_t idx) { - offset_t segment = idx / (count / num_segments); + LaunchN(device_idx, count, [=] __device__(OffsetT idx) { + OffsetT segment = idx / (count / num_segments); f(idx, segment); }); } /** - * \fn template - * void TransformLbs(int device_idx, dh::CubMemory *temp_memory, offset_t count, - * segments_iter segments, offset_t num_segments, bool is_dense, func_t f) + * \fn template + * void TransformLbs(int device_idx, dh::CubMemory *temp_memory, OffsetT count, + * SegmentIterT segments, OffsetT num_segments, bool is_dense, FunctionT f) * * \brief Load balancing search function. Reads a CSR type matrix description * and allows a function to be executed on each element. Search 'modern GPU load @@ -731,9 +733,9 @@ void DenseTransformLbs(int device_idx, offset_t count, offset_t num_segments, * \author Rory * \date 7/9/2017 * - * \tparam func_t Type of the function t. - * \tparam segments_iter Type of the segments iterator. - * \tparam offset_t Type of the offset. + * \tparam FunctionT Type of the function t. + * \tparam SegmentIterT Type of the segments iterator. + * \tparam OffsetT Type of the offset. * \param device_idx Zero-based index of the device. * \param [in,out] temp_memory Temporary memory allocator. * \param count Number of elements. @@ -743,10 +745,10 @@ void DenseTransformLbs(int device_idx, offset_t count, offset_t num_segments, * \param f Lambda to be executed on matrix elements. */ -template -void TransformLbs(int device_idx, dh::CubMemory *temp_memory, offset_t count, - segments_iter segments, offset_t num_segments, bool is_dense, - func_t f) { +template +void TransformLbs(int device_idx, dh::CubMemory *temp_memory, OffsetT count, + SegmentIterT segments, OffsetT num_segments, bool is_dense, + FunctionT f) { if (is_dense) { DenseTransformLbs(device_idx, count, num_segments, f); } else { @@ -765,18 +767,18 @@ void TransformLbs(int device_idx, dh::CubMemory *temp_memory, offset_t count, * @param offsets the segments */ template -void segmentedSort(dh::CubMemory *tmp_mem, dh::dvec2 *keys, - dh::dvec2 *vals, int nVals, int nSegs, - const dh::dvec &offsets, int start = 0, +void SegmentedSort(dh::CubMemory *tmp_mem, dh::DVec2 *keys, + dh::DVec2 *vals, int nVals, int nSegs, + const dh::DVec &offsets, int start = 0, int end = sizeof(T1) * 8) { size_t tmpSize; dh::safe_cuda(cub::DeviceSegmentedRadixSort::SortPairs( - NULL, tmpSize, keys->buff(), vals->buff(), nVals, nSegs, offsets.data(), - offsets.data() + 1, start, end)); + NULL, tmpSize, keys->buff(), vals->buff(), nVals, nSegs, offsets.Data(), + offsets.Data() + 1, start, end)); tmp_mem->LazyAllocate(tmpSize); dh::safe_cuda(cub::DeviceSegmentedRadixSort::SortPairs( tmp_mem->d_temp_storage, tmpSize, keys->buff(), vals->buff(), nVals, - nSegs, offsets.data(), offsets.data() + 1, start, end)); + nSegs, offsets.Data(), offsets.Data() + 1, start, end)); } /** @@ -787,14 +789,14 @@ void segmentedSort(dh::CubMemory *tmp_mem, dh::dvec2 *keys, * @param nVals number of elements in the input array */ template -void sumReduction(dh::CubMemory &tmp_mem, dh::dvec &in, dh::dvec &out, +void SumReduction(dh::CubMemory &tmp_mem, dh::DVec &in, dh::DVec &out, int nVals) { size_t tmpSize; dh::safe_cuda( - cub::DeviceReduce::Sum(NULL, tmpSize, in.data(), out.data(), nVals)); + cub::DeviceReduce::Sum(NULL, tmpSize, in.Data(), out.Data(), nVals)); tmp_mem.LazyAllocate(tmpSize); dh::safe_cuda(cub::DeviceReduce::Sum(tmp_mem.d_temp_storage, tmpSize, - in.data(), out.data(), nVals)); + in.Data(), out.Data(), nVals)); } /** @@ -805,7 +807,7 @@ void sumReduction(dh::CubMemory &tmp_mem, dh::dvec &in, dh::dvec &out, * @param nVals number of elements in the input array */ template -T sumReduction(dh::CubMemory &tmp_mem, T *in, int nVals) { +T SumReduction(dh::CubMemory &tmp_mem, T *in, int nVals) { size_t tmpSize; dh::safe_cuda(cub::DeviceReduce::Sum(nullptr, tmpSize, in, in, nVals)); // Allocate small extra memory for the return value @@ -827,8 +829,8 @@ T sumReduction(dh::CubMemory &tmp_mem, T *in, int nVals) { * @param def default value to be filled */ template -void fillConst(int device_idx, T *out, int len, T def) { - dh::launch_n(device_idx, len, +void FillConst(int device_idx, T *out, int len, T def) { + dh::LaunchN(device_idx, len, [=] __device__(int i) { out[i] = def; }); } @@ -842,9 +844,9 @@ void fillConst(int device_idx, T *out, int len, T def) { * @param nVals length of the buffers */ template -void gather(int device_idx, T1 *out1, const T1 *in1, T2 *out2, const T2 *in2, +void Gather(int device_idx, T1 *out1, const T1 *in1, T2 *out2, const T2 *in2, const int *instId, int nVals) { - dh::launch_n(device_idx, nVals, + dh::LaunchN(device_idx, nVals, [=] __device__(int i) { int iid = instId[i]; T1 v1 = in1[iid]; @@ -862,8 +864,8 @@ void gather(int device_idx, T1 *out1, const T1 *in1, T2 *out2, const T2 *in2, * @param nVals length of the buffers */ template -void gather(int device_idx, T *out, const T *in, const int *instId, int nVals) { - dh::launch_n(device_idx, nVals, +void Gather(int device_idx, T *out, const T *in, const int *instId, int nVals) { + dh::LaunchN(device_idx, nVals, [=] __device__(int i) { int iid = instId[i]; out[i] = in[iid]; diff --git a/src/common/group_data.h b/src/common/group_data.h index 3759e1ee3..6b5f59c47 100644 --- a/src/common/group_data.h +++ b/src/common/group_data.h @@ -29,12 +29,12 @@ struct ParallelGroupBuilder { // parallel group builder of data ParallelGroupBuilder(std::vector *p_rptr, std::vector *p_data) - : rptr(*p_rptr), data(*p_data), thread_rptr(tmp_thread_rptr) { + : rptr_(*p_rptr), data_(*p_data), thread_rptr_(tmp_thread_rptr_) { } ParallelGroupBuilder(std::vector *p_rptr, std::vector *p_data, std::vector< std::vector > *p_thread_rptr) - : rptr(*p_rptr), data(*p_data), thread_rptr(*p_thread_rptr) { + : rptr_(*p_rptr), data_(*p_data), thread_rptr_(*p_thread_rptr) { } public: @@ -45,10 +45,10 @@ struct ParallelGroupBuilder { * \param nthread number of thread that will be used in construction */ inline void InitBudget(size_t nkeys, int nthread) { - thread_rptr.resize(nthread); - for (size_t i = 0; i < thread_rptr.size(); ++i) { - thread_rptr[i].resize(nkeys); - std::fill(thread_rptr[i].begin(), thread_rptr[i].end(), 0); + thread_rptr_.resize(nthread); + for (size_t i = 0; i < thread_rptr_.size(); ++i) { + thread_rptr_[i].resize(nkeys); + std::fill(thread_rptr_[i].begin(), thread_rptr_[i].end(), 0); } } /*! @@ -58,34 +58,34 @@ struct ParallelGroupBuilder { * \param nelem number of element budget add to this row */ inline void AddBudget(size_t key, int threadid, SizeType nelem = 1) { - std::vector &trptr = thread_rptr[threadid]; + std::vector &trptr = thread_rptr_[threadid]; if (trptr.size() < key + 1) { trptr.resize(key + 1, 0); } trptr[key] += nelem; } /*! \brief step 3: initialize the necessary storage */ - inline void InitStorage(void) { + inline void InitStorage() { // set rptr to correct size - for (size_t tid = 0; tid < thread_rptr.size(); ++tid) { - if (rptr.size() <= thread_rptr[tid].size()) { - rptr.resize(thread_rptr[tid].size() + 1); + for (size_t tid = 0; tid < thread_rptr_.size(); ++tid) { + if (rptr_.size() <= thread_rptr_[tid].size()) { + rptr_.resize(thread_rptr_[tid].size() + 1); } } // initialize rptr to be beginning of each segment size_t start = 0; - for (size_t i = 0; i + 1 < rptr.size(); ++i) { - for (size_t tid = 0; tid < thread_rptr.size(); ++tid) { - std::vector &trptr = thread_rptr[tid]; + for (size_t i = 0; i + 1 < rptr_.size(); ++i) { + for (size_t tid = 0; tid < thread_rptr_.size(); ++tid) { + std::vector &trptr = thread_rptr_[tid]; if (i < trptr.size()) { size_t ncnt = trptr[i]; trptr[i] = start; start += ncnt; } } - rptr[i + 1] = start; + rptr_[i + 1] = start; } - data.resize(start); + data_.resize(start); } /*! * \brief step 4: add data to the allocated space, @@ -96,19 +96,19 @@ struct ParallelGroupBuilder { * \param threadid the id of thread that calls this function */ inline void Push(size_t key, ValueType value, int threadid) { - SizeType &rp = thread_rptr[threadid][key]; - data[rp++] = value; + SizeType &rp = thread_rptr_[threadid][key]; + data_[rp++] = value; } private: /*! \brief pointer to the beginning and end of each continuous key */ - std::vector &rptr; + std::vector &rptr_; /*! \brief index of nonzero entries in each row */ - std::vector &data; + std::vector &data_; /*! \brief thread local data structure */ - std::vector > &thread_rptr; + std::vector > &thread_rptr_; /*! \brief local temp thread ptr, use this if not specified by the constructor */ - std::vector > tmp_thread_rptr; + std::vector > tmp_thread_rptr_; }; } // namespace common } // namespace xgboost diff --git a/src/common/hist_util.cc b/src/common/hist_util.cc index d9ce4ebcd..3deca15a9 100644 --- a/src/common/hist_util.cc +++ b/src/common/hist_util.cc @@ -17,20 +17,20 @@ namespace xgboost { namespace common { void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) { - typedef common::WXQuantileSketch WXQSketch; - const MetaInfo& info = p_fmat->info(); + using WXQSketch = common::WXQuantileSketch; + const MetaInfo& info = p_fmat->Info(); // safe factor for better accuracy - const int kFactor = 8; + constexpr int kFactor = 8; std::vector sketchs; const int nthread = omp_get_max_threads(); - unsigned nstep = static_cast((info.num_col + nthread - 1) / nthread); - unsigned ncol = static_cast(info.num_col); - sketchs.resize(info.num_col); + auto nstep = static_cast((info.num_col_ + nthread - 1) / nthread); + auto ncol = static_cast(info.num_col_); + sketchs.resize(info.num_col_); for (auto& s : sketchs) { - s.Init(info.num_row, 1.0 / (max_num_bins * kFactor)); + s.Init(info.num_row_, 1.0 / (max_num_bins * kFactor)); } dmlc::DataIter* iter = p_fmat->RowIterator(); @@ -40,7 +40,7 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) { #pragma omp parallel num_threads(nthread) { CHECK_EQ(nthread, omp_get_num_threads()); - unsigned tid = static_cast(omp_get_thread_num()); + auto tid = static_cast(omp_get_thread_num()); unsigned begin = std::min(nstep * tid, ncol); unsigned end = std::min(nstep * (tid + 1), ncol); for (size_t i = 0; i < batch.size; ++i) { // NOLINT(*) @@ -68,7 +68,7 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) { size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_num_bins * kFactor); sreducer.Allreduce(dmlc::BeginPtr(summary_array), nbytes, summary_array.size()); - this->min_val.resize(info.num_col); + this->min_val.resize(info.num_col_); row_ptr.push_back(0); for (size_t fid = 0; fid < summary_array.size(); ++fid) { WXQSketch::SummaryContainer a; @@ -105,7 +105,7 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) { } void GHistIndexMatrix::Init(DMatrix* p_fmat) { - CHECK(cut != nullptr); + CHECK(cut != nullptr); // NOLINT dmlc::DataIter* iter = p_fmat->RowIterator(); const int nthread = omp_get_max_threads(); @@ -126,7 +126,7 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat) { CHECK_GT(cut->cut.size(), 0U); CHECK_EQ(cut->row_ptr.back(), cut->cut.size()); - omp_ulong bsize = static_cast(batch.size); + auto bsize = static_cast(batch.size); #pragma omp parallel for num_threads(nthread) schedule(static) for (omp_ulong i = 0; i < bsize; ++i) { // NOLINT(*) const int tid = omp_get_thread_num(); @@ -217,7 +217,7 @@ FindGroups_(const std::vector& feature_list, std::vector> conflict_marks; std::vector group_nnz; std::vector group_conflict_cnt; - const size_t max_conflict_cnt + const auto max_conflict_cnt = static_cast(param.max_conflict_rate * nrow); for (auto fid : feature_list) { @@ -336,14 +336,14 @@ FastFeatureGrouping(const GHistIndexMatrix& gmat, void GHistIndexBlockMatrix::Init(const GHistIndexMatrix& gmat, const ColumnMatrix& colmat, const FastHistParam& param) { - cut = gmat.cut; + cut_ = gmat.cut; const size_t nrow = gmat.row_ptr.size() - 1; const uint32_t nbins = gmat.cut->row_ptr.back(); /* step 1: form feature groups */ auto groups = FastFeatureGrouping(gmat, colmat, param); - const uint32_t nblock = static_cast(groups.size()); + const auto nblock = static_cast(groups.size()); /* step 2: build a new CSR matrix for each feature group */ std::vector bin2block(nbins); // lookup table [bin id] => [block id] @@ -380,24 +380,24 @@ void GHistIndexBlockMatrix::Init(const GHistIndexMatrix& gmat, index_blk_ptr.push_back(0); row_ptr_blk_ptr.push_back(0); for (uint32_t block_id = 0; block_id < nblock; ++block_id) { - index.insert(index.end(), index_temp[block_id].begin(), index_temp[block_id].end()); - row_ptr.insert(row_ptr.end(), row_ptr_temp[block_id].begin(), row_ptr_temp[block_id].end()); - index_blk_ptr.push_back(index.size()); - row_ptr_blk_ptr.push_back(row_ptr.size()); + index_.insert(index_.end(), index_temp[block_id].begin(), index_temp[block_id].end()); + row_ptr_.insert(row_ptr_.end(), row_ptr_temp[block_id].begin(), row_ptr_temp[block_id].end()); + index_blk_ptr.push_back(index_.size()); + row_ptr_blk_ptr.push_back(row_ptr_.size()); } // save shortcut for each block for (uint32_t block_id = 0; block_id < nblock; ++block_id) { Block blk; - blk.index_begin = &index[index_blk_ptr[block_id]]; - blk.row_ptr_begin = &row_ptr[row_ptr_blk_ptr[block_id]]; - blk.index_end = &index[index_blk_ptr[block_id + 1]]; - blk.row_ptr_end = &row_ptr[row_ptr_blk_ptr[block_id + 1]]; - blocks.push_back(blk); + blk.index_begin = &index_[index_blk_ptr[block_id]]; + blk.row_ptr_begin = &row_ptr_[row_ptr_blk_ptr[block_id]]; + blk.index_end = &index_[index_blk_ptr[block_id + 1]]; + blk.row_ptr_end = &row_ptr_[row_ptr_blk_ptr[block_id + 1]]; + blocks_.push_back(blk); } } -void GHistBuilder::BuildHist(const std::vector& gpair, +void GHistBuilder::BuildHist(const std::vector& gpair, const RowSetCollection::Elem row_indices, const GHistIndexMatrix& gmat, const std::vector& feat_set, @@ -405,30 +405,30 @@ void GHistBuilder::BuildHist(const std::vector& gpair, data_.resize(nbins_ * nthread_, GHistEntry()); std::fill(data_.begin(), data_.end(), GHistEntry()); - const int K = 8; // loop unrolling factor - const bst_omp_uint nthread = static_cast(this->nthread_); + constexpr int kUnroll = 8; // loop unrolling factor + const auto nthread = static_cast(this->nthread_); const size_t nrows = row_indices.end - row_indices.begin; - const size_t rest = nrows % K; + const size_t rest = nrows % kUnroll; #pragma omp parallel for num_threads(nthread) schedule(guided) - for (bst_omp_uint i = 0; i < nrows - rest; i += K) { + for (bst_omp_uint i = 0; i < nrows - rest; i += kUnroll) { const bst_omp_uint tid = omp_get_thread_num(); const size_t off = tid * nbins_; - size_t rid[K]; - size_t ibegin[K]; - size_t iend[K]; - bst_gpair stat[K]; - for (int k = 0; k < K; ++k) { + size_t rid[kUnroll]; + size_t ibegin[kUnroll]; + size_t iend[kUnroll]; + GradientPair stat[kUnroll]; + for (int k = 0; k < kUnroll; ++k) { rid[k] = row_indices.begin[i + k]; } - for (int k = 0; k < K; ++k) { + for (int k = 0; k < kUnroll; ++k) { ibegin[k] = gmat.row_ptr[rid[k]]; iend[k] = gmat.row_ptr[rid[k] + 1]; } - for (int k = 0; k < K; ++k) { + for (int k = 0; k < kUnroll; ++k) { stat[k] = gpair[rid[k]]; } - for (int k = 0; k < K; ++k) { + for (int k = 0; k < kUnroll; ++k) { for (size_t j = ibegin[k]; j < iend[k]; ++j) { const uint32_t bin = gmat.index[j]; data_[off + bin].Add(stat[k]); @@ -439,7 +439,7 @@ void GHistBuilder::BuildHist(const std::vector& gpair, const size_t rid = row_indices.begin[i]; const size_t ibegin = gmat.row_ptr[rid]; const size_t iend = gmat.row_ptr[rid + 1]; - const bst_gpair stat = gpair[rid]; + const GradientPair stat = gpair[rid]; for (size_t j = ibegin; j < iend; ++j) { const uint32_t bin = gmat.index[j]; data_[bin].Add(stat); @@ -456,37 +456,40 @@ void GHistBuilder::BuildHist(const std::vector& gpair, } } -void GHistBuilder::BuildBlockHist(const std::vector& gpair, +void GHistBuilder::BuildBlockHist(const std::vector& gpair, const RowSetCollection::Elem row_indices, const GHistIndexBlockMatrix& gmatb, const std::vector& feat_set, GHistRow hist) { - const int K = 8; // loop unrolling factor - const bst_omp_uint nthread = static_cast(this->nthread_); + constexpr int kUnroll = 8; // loop unrolling factor const size_t nblock = gmatb.GetNumBlock(); const size_t nrows = row_indices.end - row_indices.begin; - const size_t rest = nrows % K; + const size_t rest = nrows % kUnroll; + +#if defined(_OPENMP) + const auto nthread = static_cast(this->nthread_); +#endif #pragma omp parallel for num_threads(nthread) schedule(guided) for (bst_omp_uint bid = 0; bid < nblock; ++bid) { auto gmat = gmatb[bid]; - for (size_t i = 0; i < nrows - rest; i += K) { - size_t rid[K]; - size_t ibegin[K]; - size_t iend[K]; - bst_gpair stat[K]; - for (int k = 0; k < K; ++k) { + for (size_t i = 0; i < nrows - rest; i += kUnroll) { + size_t rid[kUnroll]; + size_t ibegin[kUnroll]; + size_t iend[kUnroll]; + GradientPair stat[kUnroll]; + for (int k = 0; k < kUnroll; ++k) { rid[k] = row_indices.begin[i + k]; } - for (int k = 0; k < K; ++k) { + for (int k = 0; k < kUnroll; ++k) { ibegin[k] = gmat.row_ptr[rid[k]]; iend[k] = gmat.row_ptr[rid[k] + 1]; } - for (int k = 0; k < K; ++k) { + for (int k = 0; k < kUnroll; ++k) { stat[k] = gpair[rid[k]]; } - for (int k = 0; k < K; ++k) { + for (int k = 0; k < kUnroll; ++k) { for (size_t j = ibegin[k]; j < iend[k]; ++j) { const uint32_t bin = gmat.index[j]; hist.begin[bin].Add(stat[k]); @@ -497,7 +500,7 @@ void GHistBuilder::BuildBlockHist(const std::vector& gpair, const size_t rid = row_indices.begin[i]; const size_t ibegin = gmat.row_ptr[rid]; const size_t iend = gmat.row_ptr[rid + 1]; - const bst_gpair stat = gpair[rid]; + const GradientPair stat = gpair[rid]; for (size_t j = ibegin; j < iend; ++j) { const uint32_t bin = gmat.index[j]; hist.begin[bin].Add(stat); @@ -507,21 +510,26 @@ void GHistBuilder::BuildBlockHist(const std::vector& gpair, } void GHistBuilder::SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow parent) { - const bst_omp_uint nthread = static_cast(this->nthread_); const uint32_t nbins = static_cast(nbins_); - const int K = 8; // loop unrolling factor - const uint32_t rest = nbins % K; + constexpr int kUnroll = 8; // loop unrolling factor + const uint32_t rest = nbins % kUnroll; + +#if defined(_OPENMP) + const auto nthread = static_cast(this->nthread_); +#endif + #pragma omp parallel for num_threads(nthread) schedule(static) - for (bst_omp_uint bin_id = 0; bin_id < static_cast(nbins - rest); bin_id += K) { - GHistEntry pb[K]; - GHistEntry sb[K]; - for (int k = 0; k < K; ++k) { + for (bst_omp_uint bin_id = 0; + bin_id < static_cast(nbins - rest); bin_id += kUnroll) { + GHistEntry pb[kUnroll]; + GHistEntry sb[kUnroll]; + for (int k = 0; k < kUnroll; ++k) { pb[k] = parent.begin[bin_id + k]; } - for (int k = 0; k < K; ++k) { + for (int k = 0; k < kUnroll; ++k) { sb[k] = sibling.begin[bin_id + k]; } - for (int k = 0; k < K; ++k) { + for (int k = 0; k < kUnroll; ++k) { self.begin[bin_id + k].SetSubtract(pb[k], sb[k]); } } diff --git a/src/common/hist_util.h b/src/common/hist_util.h index 8b04410d0..a416d87fa 100644 --- a/src/common/hist_util.h +++ b/src/common/hist_util.h @@ -13,26 +13,26 @@ #include "row_set.h" #include "../tree/fast_hist_param.h" -using xgboost::tree::FastHistParam; - namespace xgboost { namespace common { +using tree::FastHistParam; + /*! \brief sums of gradient statistics corresponding to a histogram bin */ struct GHistEntry { /*! \brief sum of first-order gradient statistics */ - double sum_grad; + double sum_grad{0}; /*! \brief sum of second-order gradient statistics */ - double sum_hess; + double sum_hess{0}; - GHistEntry() : sum_grad(0), sum_hess(0) {} + GHistEntry() = default; inline void Clear() { sum_grad = sum_hess = 0; } - /*! \brief add a bst_gpair to the sum */ - inline void Add(const bst_gpair& e) { + /*! \brief add a GradientPair to the sum */ + inline void Add(const GradientPair& e) { sum_grad += e.GetGrad(); sum_hess += e.GetHess(); } @@ -58,7 +58,7 @@ struct HistCutUnit { /*! \brief number of cutting point, containing the maximum point */ uint32_t size; // default constructor - HistCutUnit() {} + HistCutUnit() = default; // constructor HistCutUnit(const bst_float* cut, uint32_t size) : cut(cut), size(size) {} @@ -74,8 +74,8 @@ struct HistCutMatrix { std::vector cut; /*! \brief Get histogram bound for fid */ inline HistCutUnit operator[](bst_uint fid) const { - return HistCutUnit(dmlc::BeginPtr(cut) + row_ptr[fid], - row_ptr[fid + 1] - row_ptr[fid]); + return {dmlc::BeginPtr(cut) + row_ptr[fid], + row_ptr[fid + 1] - row_ptr[fid]}; } // create histogram cut matrix given statistics from data // using approximate quantile sketch approach @@ -92,7 +92,7 @@ struct GHistIndexRow { const uint32_t* index; /*! \brief The size of the histogram */ size_t size; - GHistIndexRow() {} + GHistIndexRow() = default; GHistIndexRow(const uint32_t* index, size_t size) : index(index), size(size) {} }; @@ -115,7 +115,7 @@ struct GHistIndexMatrix { void Init(DMatrix* p_fmat); // get i-th row inline GHistIndexRow operator[](size_t i) const { - return GHistIndexRow(&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]); + return {&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]}; } inline void GetFeatureCounts(size_t* counts) const { auto nfeature = cut->row_ptr.size() - 1; @@ -141,7 +141,7 @@ struct GHistIndexBlock { // get i-th row inline GHistIndexRow operator[](size_t i) const { - return GHistIndexRow(&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]); + return {&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]}; } }; @@ -154,24 +154,24 @@ class GHistIndexBlockMatrix { const FastHistParam& param); inline GHistIndexBlock operator[](size_t i) const { - return GHistIndexBlock(blocks[i].row_ptr_begin, blocks[i].index_begin); + return {blocks_[i].row_ptr_begin, blocks_[i].index_begin}; } inline size_t GetNumBlock() const { - return blocks.size(); + return blocks_.size(); } private: - std::vector row_ptr; - std::vector index; - const HistCutMatrix* cut; + std::vector row_ptr_; + std::vector index_; + const HistCutMatrix* cut_; struct Block { const size_t* row_ptr_begin; const size_t* row_ptr_end; const uint32_t* index_begin; const uint32_t* index_end; }; - std::vector blocks; + std::vector blocks_; }; /*! @@ -186,7 +186,7 @@ struct GHistRow { /*! \brief number of entries */ uint32_t size; - GHistRow() {} + GHistRow() = default; GHistRow(GHistEntry* begin, uint32_t size) : begin(begin), size(size) {} }; @@ -198,15 +198,15 @@ class HistCollection { public: // access histogram for i-th node inline GHistRow operator[](bst_uint nid) const { - const uint32_t kMax = std::numeric_limits::max(); + constexpr uint32_t kMax = std::numeric_limits::max(); CHECK_NE(row_ptr_[nid], kMax); - return GHistRow(const_cast(dmlc::BeginPtr(data_) + row_ptr_[nid]), nbins_); + return {const_cast(dmlc::BeginPtr(data_) + row_ptr_[nid]), nbins_}; } // have we computed a histogram for i-th node? inline bool RowExists(bst_uint nid) const { - const uint32_t kMax = std::numeric_limits::max(); - return (nid < row_ptr_.size() && row_ptr_[nid] != kMax); + const uint32_t k_max = std::numeric_limits::max(); + return (nid < row_ptr_.size() && row_ptr_[nid] != k_max); } // initialize histogram collection @@ -218,7 +218,7 @@ class HistCollection { // create an empty histogram for i-th node inline void AddHistRow(bst_uint nid) { - const uint32_t kMax = std::numeric_limits::max(); + constexpr uint32_t kMax = std::numeric_limits::max(); if (nid >= row_ptr_.size()) { row_ptr_.resize(nid + 1, kMax); } @@ -250,13 +250,13 @@ class GHistBuilder { } // construct a histogram via histogram aggregation - void BuildHist(const std::vector& gpair, + void BuildHist(const std::vector& gpair, const RowSetCollection::Elem row_indices, const GHistIndexMatrix& gmat, const std::vector& feat_set, GHistRow hist); // same, with feature grouping - void BuildBlockHist(const std::vector& gpair, + void BuildBlockHist(const std::vector& gpair, const RowSetCollection::Elem row_indices, const GHistIndexBlockMatrix& gmatb, const std::vector& feat_set, diff --git a/src/common/host_device_vector.cc b/src/common/host_device_vector.cc index 41312d5c7..300694d76 100644 --- a/src/common/host_device_vector.cc +++ b/src/common/host_device_vector.cc @@ -6,6 +6,8 @@ // dummy implementation of HostDeviceVector in case CUDA is not used #include + +#include #include "./host_device_vector.h" namespace xgboost { @@ -13,8 +15,8 @@ namespace xgboost { template struct HostDeviceVectorImpl { explicit HostDeviceVectorImpl(size_t size, T v) : data_h_(size, v) {} - explicit HostDeviceVectorImpl(std::initializer_list init) : data_h_(init) {} - explicit HostDeviceVectorImpl(const std::vector& init) : data_h_(init) {} + HostDeviceVectorImpl(std::initializer_list init) : data_h_(init) {} + explicit HostDeviceVectorImpl(std::vector init) : data_h_(std::move(init)) {} std::vector data_h_; }; @@ -43,25 +45,25 @@ HostDeviceVector::~HostDeviceVector() { } template -size_t HostDeviceVector::size() const { return impl_->data_h_.size(); } +size_t HostDeviceVector::Size() const { return impl_->data_h_.size(); } template -int HostDeviceVector::device() const { return -1; } +int HostDeviceVector::DeviceIdx() const { return -1; } template -T* HostDeviceVector::ptr_d(int device) { return nullptr; } +T* HostDeviceVector::DevicePointer(int device) { return nullptr; } template -std::vector& HostDeviceVector::data_h() { return impl_->data_h_; } +std::vector& HostDeviceVector::HostVector() { return impl_->data_h_; } template -void HostDeviceVector::resize(size_t new_size, T v, int new_device) { +void HostDeviceVector::Resize(size_t new_size, T v, int new_device) { impl_->data_h_.resize(new_size, v); } // explicit instantiations are required, as HostDeviceVector isn't header-only template class HostDeviceVector; -template class HostDeviceVector; +template class HostDeviceVector; } // namespace xgboost diff --git a/src/common/host_device_vector.cu b/src/common/host_device_vector.cu index 9a2a63020..c1529f100 100644 --- a/src/common/host_device_vector.cu +++ b/src/common/host_device_vector.cu @@ -35,27 +35,27 @@ struct HostDeviceVectorImpl { void operator=(const HostDeviceVectorImpl&) = delete; void operator=(HostDeviceVectorImpl&&) = delete; - size_t size() const { return on_d_ ? data_d_.size() : data_h_.size(); } + size_t Size() const { return on_d_ ? data_d_.size() : data_h_.size(); } - int device() const { return device_; } + int DeviceIdx() const { return device_; } - T* ptr_d(int device) { - lazy_sync_device(device); + T* DevicePointer(int device) { + LazySyncDevice(device); return data_d_.data().get(); } - thrust::device_ptr tbegin(int device) { - return thrust::device_ptr(ptr_d(device)); + thrust::device_ptr tbegin(int device) { // NOLINT + return thrust::device_ptr(DevicePointer(device)); } - thrust::device_ptr tend(int device) { + thrust::device_ptr tend(int device) { // NOLINT auto begin = tbegin(device); - return begin + size(); + return begin + Size(); } - std::vector& data_h() { - lazy_sync_host(); + std::vector& HostVector() { + LazySyncHost(); return data_h_; } - void resize(size_t new_size, T v, int new_device) { - if (new_size == this->size() && new_device == device_) + void Resize(size_t new_size, T v, int new_device) { + if (new_size == this->Size() && new_device == device_) return; if (new_device != -1) device_ = new_device; @@ -70,26 +70,26 @@ struct HostDeviceVectorImpl { } } - void lazy_sync_host() { + void LazySyncHost() { if (!on_d_) return; - if (data_h_.size() != this->size()) - data_h_.resize(this->size()); + if (data_h_.size() != this->Size()) + data_h_.resize(this->Size()); dh::safe_cuda(cudaSetDevice(device_)); thrust::copy(data_d_.begin(), data_d_.end(), data_h_.begin()); on_d_ = false; } - void lazy_sync_device(int device) { + void LazySyncDevice(int device) { if (on_d_) return; if (device != device_) { CHECK_EQ(device_, -1); device_ = device; } - if (data_d_.size() != this->size()) { + if (data_d_.size() != this->Size()) { dh::safe_cuda(cudaSetDevice(device_)); - data_d_.resize(this->size()); + data_d_.resize(this->Size()); } dh::safe_cuda(cudaSetDevice(device_)); thrust::copy(data_h_.begin(), data_h_.end(), data_d_.begin()); @@ -128,34 +128,34 @@ HostDeviceVector::~HostDeviceVector() { } template -size_t HostDeviceVector::size() const { return impl_->size(); } +size_t HostDeviceVector::Size() const { return impl_->Size(); } template -int HostDeviceVector::device() const { return impl_->device(); } +int HostDeviceVector::DeviceIdx() const { return impl_->DeviceIdx(); } template -T* HostDeviceVector::ptr_d(int device) { return impl_->ptr_d(device); } +T* HostDeviceVector::DevicePointer(int device) { return impl_->DevicePointer(device); } template -thrust::device_ptr HostDeviceVector::tbegin(int device) { +thrust::device_ptr HostDeviceVector::tbegin(int device) { // NOLINT return impl_->tbegin(device); } template -thrust::device_ptr HostDeviceVector::tend(int device) { +thrust::device_ptr HostDeviceVector::tend(int device) { // NOLINT return impl_->tend(device); } template -std::vector& HostDeviceVector::data_h() { return impl_->data_h(); } +std::vector& HostDeviceVector::HostVector() { return impl_->HostVector(); } template -void HostDeviceVector::resize(size_t new_size, T v, int new_device) { - impl_->resize(new_size, v, new_device); +void HostDeviceVector::Resize(size_t new_size, T v, int new_device) { + impl_->Resize(new_size, v, new_device); } // explicit instantiations are required, as HostDeviceVector isn't header-only template class HostDeviceVector; -template class HostDeviceVector; +template class HostDeviceVector; } // namespace xgboost diff --git a/src/common/host_device_vector.h b/src/common/host_device_vector.h index 3f4cb2b94..9428c311f 100644 --- a/src/common/host_device_vector.h +++ b/src/common/host_device_vector.h @@ -70,10 +70,10 @@ class HostDeviceVector { HostDeviceVector(HostDeviceVector&&) = delete; void operator=(const HostDeviceVector&) = delete; void operator=(HostDeviceVector&&) = delete; - size_t size() const; - int device() const; - T* ptr_d(int device); - T* ptr_h() { return data_h().data(); } + size_t Size() const; + int DeviceIdx() const; + T* DevicePointer(int device); + T* HostPointer() { return HostVector().data(); } // only define functions returning device_ptr // if HostDeviceVector.h is included from a .cu file @@ -82,10 +82,10 @@ class HostDeviceVector { thrust::device_ptr tend(int device); #endif - std::vector& data_h(); + std::vector& HostVector(); // passing in new_device == -1 keeps the device as is - void resize(size_t new_size, T v = T(), int new_device = -1); + void Resize(size_t new_size, T v = T(), int new_device = -1); private: HostDeviceVectorImpl* impl_; diff --git a/src/common/io.h b/src/common/io.h index c6e3a11c0..6f792a589 100644 --- a/src/common/io.h +++ b/src/common/io.h @@ -15,8 +15,8 @@ namespace xgboost { namespace common { -typedef rabit::utils::MemoryFixSizeBuffer MemoryFixSizeBuffer; -typedef rabit::utils::MemoryBufferStream MemoryBufferStream; +using MemoryFixSizeBuffer = rabit::utils::MemoryFixSizeBuffer; +using MemoryBufferStream = rabit::utils::MemoryBufferStream; /*! * \brief Input stream that support additional PeekRead diff --git a/src/common/math.h b/src/common/math.h index fb2459f44..be2598e3a 100644 --- a/src/common/math.h +++ b/src/common/math.h @@ -39,12 +39,12 @@ inline void Softmax(std::vector* p_rec) { wmax = std::max(rec[i], wmax); } double wsum = 0.0f; - for (size_t i = 0; i < rec.size(); ++i) { - rec[i] = std::exp(rec[i] - wmax); - wsum += rec[i]; + for (float & elem : rec) { + elem = std::exp(elem - wmax); + wsum += elem; } - for (size_t i = 0; i < rec.size(); ++i) { - rec[i] /= static_cast(wsum); + for (float & elem : rec) { + elem /= static_cast(wsum); } } diff --git a/src/common/quantile.h b/src/common/quantile.h index c79eca5e3..9372581a9 100644 --- a/src/common/quantile.h +++ b/src/common/quantile.h @@ -35,7 +35,7 @@ struct WQSummary { /*! \brief the value of data */ DType value; // constructor - Entry() {} + Entry() = default; // constructor Entry(RType rmin, RType rmax, RType wmin, DType value) : rmin(rmin), rmax(rmax), wmin(wmin), value(value) {} @@ -48,11 +48,11 @@ struct WQSummary { CHECK(rmax- rmin - wmin > -eps) << "relation constraint: min/max"; } /*! \return rmin estimation for v strictly bigger than value */ - inline RType rmin_next() const { + inline RType RMinNext() const { return rmin + wmin; } /*! \return rmax estimation for v strictly smaller than value */ - inline RType rmax_prev() const { + inline RType RMaxPrev() const { return rmax - wmin; } }; @@ -65,7 +65,7 @@ struct WQSummary { // weight of instance RType weight; // default constructor - QEntry() {} + QEntry() = default; // constructor QEntry(DType value, RType weight) : value(value), weight(weight) {} @@ -116,7 +116,7 @@ struct WQSummary { inline RType MaxError() const { RType res = data[0].rmax - data[0].rmin - data[0].wmin; for (size_t i = 1; i < size; ++i) { - res = std::max(data[i].rmax_prev() - data[i - 1].rmin_next(), res); + res = std::max(data[i].RMaxPrev() - data[i - 1].RMinNext(), res); res = std::max(data[i].rmax - data[i].rmin - data[i].wmin, res); } return res; @@ -140,8 +140,8 @@ struct WQSummary { if (istart == 0) { return Entry(0.0f, 0.0f, 0.0f, qvalue); } else { - return Entry(data[istart - 1].rmin_next(), - data[istart].rmax_prev(), + return Entry(data[istart - 1].RMinNext(), + data[istart].RMaxPrev(), 0.0f, qvalue); } } @@ -197,7 +197,7 @@ struct WQSummary { while (i < src.size - 1 && dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i; CHECK(i != src.size - 1); - if (dx2 < src.data[i].rmin_next() + src.data[i + 1].rmax_prev()) { + if (dx2 < src.data[i].RMinNext() + src.data[i + 1].RMaxPrev()) { if (i != lastidx) { data[size++] = src.data[i]; lastidx = i; } @@ -236,20 +236,20 @@ struct WQSummary { *dst = Entry(a->rmin + b->rmin, a->rmax + b->rmax, a->wmin + b->wmin, a->value); - aprev_rmin = a->rmin_next(); - bprev_rmin = b->rmin_next(); + aprev_rmin = a->RMinNext(); + bprev_rmin = b->RMinNext(); ++dst; ++a; ++b; } else if (a->value < b->value) { *dst = Entry(a->rmin + bprev_rmin, - a->rmax + b->rmax_prev(), + a->rmax + b->RMaxPrev(), a->wmin, a->value); - aprev_rmin = a->rmin_next(); + aprev_rmin = a->RMinNext(); ++dst; ++a; } else { *dst = Entry(b->rmin + aprev_rmin, - b->rmax + a->rmax_prev(), + b->rmax + a->RMaxPrev(), b->wmin, b->value); - bprev_rmin = b->rmin_next(); + bprev_rmin = b->RMinNext(); ++dst; ++b; } } @@ -307,7 +307,7 @@ struct WQSummary { data[i].rmax = prev_rmax; *err_maxgap = std::max(*err_maxgap, prev_rmax - data[i].rmax); } - RType rmin_next = data[i].rmin_next(); + RType rmin_next = data[i].RMinNext(); if (data[i].rmax < rmin_next) { data[i].rmax = rmin_next; *err_wgap = std::max(*err_wgap, data[i].rmax - rmin_next); @@ -334,13 +334,13 @@ struct WQSummary { template struct WXQSummary : public WQSummary { // redefine entry type - typedef typename WQSummary::Entry Entry; + using Entry = typename WQSummary::Entry; // constructor WXQSummary(Entry *data, size_t size) : WQSummary(data, size) {} // check if the block is large chunk inline static bool CheckLarge(const Entry &e, RType chunk) { - return e.rmin_next() > e.rmax_prev() + chunk; + return e.RMinNext() > e.RMaxPrev() + chunk; } // set prune inline void SetPrune(const WQSummary &src, size_t maxsize) { @@ -377,13 +377,13 @@ struct WXQSummary : public WQSummary { if (CheckLarge(src.data[i], chunk)) { if (bid != i - 1) { // accumulate the range of the rest points - mrange += src.data[i].rmax_prev() - src.data[bid].rmin_next(); + mrange += src.data[i].RMaxPrev() - src.data[bid].RMinNext(); } bid = i; ++nbig; } } if (bid != src.size - 2) { - mrange += src.data[src.size-1].rmax_prev() - src.data[bid].rmin_next(); + mrange += src.data[src.size-1].RMaxPrev() - src.data[bid].RMinNext(); } } // assert: there cannot be more than n big data points @@ -405,14 +405,14 @@ struct WXQSummary : public WQSummary { if (end == src.size - 1 || CheckLarge(src.data[end], chunk)) { if (bid != end - 1) { size_t i = bid; - RType maxdx2 = src.data[end].rmax_prev() * 2; + RType maxdx2 = src.data[end].RMaxPrev() * 2; for (; k < n; ++k) { RType dx2 = 2 * ((k * mrange) / n + begin); if (dx2 >= maxdx2) break; while (i < end && dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i; if (i == end) break; - if (dx2 < src.data[i].rmin_next() + src.data[i + 1].rmax_prev()) { + if (dx2 < src.data[i].RMinNext() + src.data[i + 1].RMaxPrev()) { if (i != lastidx) { this->data[this->size++] = src.data[i]; lastidx = i; } @@ -429,7 +429,7 @@ struct WXQSummary : public WQSummary { } bid = end; // shift base by the gap - begin += src.data[bid].rmin_next() - src.data[bid].rmax_prev(); + begin += src.data[bid].RMinNext() - src.data[bid].RMaxPrev(); } } } @@ -448,7 +448,7 @@ struct GKSummary { /*! \brief the value of data */ DType value; // constructor - Entry() {} + Entry() = default; // constructor Entry(RType rmin, RType rmax, DType value) : rmin(rmin), rmax(rmax), value(value) {} @@ -591,17 +591,17 @@ template class QuantileSketchTemplate { public: /*! \brief type of summary type */ - typedef TSummary Summary; + using Summary = TSummary; /*! \brief the entry type */ - typedef typename Summary::Entry Entry; + using Entry = typename Summary::Entry; /*! \brief same as summary, but use STL to backup the space */ struct SummaryContainer : public Summary { std::vector space; - SummaryContainer(const SummaryContainer &src) : Summary(NULL, src.size) { + SummaryContainer(const SummaryContainer &src) : Summary(nullptr, src.size) { this->space = src.space; this->data = dmlc::BeginPtr(this->space); } - SummaryContainer() : Summary(NULL, 0) { + SummaryContainer() : Summary(nullptr, 0) { } /*! \brief reserve space for summary */ inline void Reserve(size_t size) { @@ -775,7 +775,7 @@ class QuantileSketchTemplate { inline void InitLevel(size_t nlevel) { if (level.size() >= nlevel) return; data.resize(limit_size * nlevel); - level.resize(nlevel, Summary(NULL, 0)); + level.resize(nlevel, Summary(nullptr, 0)); for (size_t l = 0; l < level.size(); ++l) { level[l].data = dmlc::BeginPtr(data) + l * limit_size; } diff --git a/src/common/random.h b/src/common/random.h index 92f414108..bcfe2e904 100644 --- a/src/common/random.h +++ b/src/common/random.h @@ -15,7 +15,7 @@ namespace common { /*! * \brief Define mt19937 as default type Random Engine. */ -typedef std::mt19937 RandomEngine; +using RandomEngine = std::mt19937; #if XGBOOST_CUSTOMIZE_GLOBAL_PRNG /*! @@ -56,7 +56,7 @@ typedef CustomGlobalRandomEngine GlobalRandomEngine; /*! * \brief global random engine */ -typedef RandomEngine GlobalRandomEngine; +using GlobalRandomEngine = RandomEngine; #endif /*! diff --git a/src/common/row_set.h b/src/common/row_set.h index 921f4fbe8..01e27a39c 100644 --- a/src/common/row_set.h +++ b/src/common/row_set.h @@ -21,18 +21,18 @@ class RowSetCollection { * rows (instances) associated with a particular node in a decision * tree. */ struct Elem { - const size_t* begin; - const size_t* end; - int node_id; + const size_t* begin{nullptr}; + const size_t* end{nullptr}; + int node_id{-1}; // id of node associated with this instance set; -1 means uninitialized - Elem(void) - : begin(nullptr), end(nullptr), node_id(-1) {} + Elem() + = default; Elem(const size_t* begin, const size_t* end, int node_id) : begin(begin), end(end), node_id(node_id) {} - inline size_t size() const { + inline size_t Size() const { return end - begin; } }; @@ -42,11 +42,11 @@ class RowSetCollection { std::vector right; }; - inline std::vector::const_iterator begin() const { + inline std::vector::const_iterator begin() const { // NOLINT return elem_of_each_node_.begin(); } - inline std::vector::const_iterator end() const { + inline std::vector::const_iterator end() const { // NOLINT return elem_of_each_node_.end(); } @@ -88,7 +88,7 @@ class RowSetCollection { unsigned left_node_id, unsigned right_node_id) { const Elem e = elem_of_each_node_[node_id]; - const bst_omp_uint nthread = static_cast(row_split_tloc.size()); + const auto nthread = static_cast(row_split_tloc.size()); CHECK(e.begin != nullptr); size_t* all_begin = dmlc::BeginPtr(row_indices_); size_t* begin = all_begin + (e.begin - all_begin); diff --git a/src/common/timer.h b/src/common/timer.h index d1cd53ac7..32460ae56 100644 --- a/src/common/timer.h +++ b/src/common/timer.h @@ -12,10 +12,10 @@ namespace xgboost { namespace common { struct Timer { - typedef std::chrono::high_resolution_clock ClockT; - typedef std::chrono::high_resolution_clock::time_point TimePointT; - typedef std::chrono::high_resolution_clock::duration DurationT; - typedef std::chrono::duration SecondsT; + using ClockT = std::chrono::high_resolution_clock; + using TimePointT = std::chrono::high_resolution_clock::time_point; + using DurationT = std::chrono::high_resolution_clock::duration; + using SecondsT = std::chrono::duration; TimePointT start; DurationT elapsed; @@ -70,7 +70,7 @@ struct Monitor { if (debug_verbose) { #ifdef __CUDACC__ #include "device_helpers.cuh" - dh::synchronize_n_devices(dList.size(), dList); + dh::SynchronizeNDevices(dList.size(), dList); #endif } timer_map[name].Start(); @@ -80,7 +80,7 @@ struct Monitor { if (debug_verbose) { #ifdef __CUDACC__ #include "device_helpers.cuh" - dh::synchronize_n_devices(dList.size(), dList); + dh::SynchronizeNDevices(dList.size(), dList); #endif } timer_map[name].Stop(); diff --git a/src/data/data.cc b/src/data/data.cc index d7887a363..0281e3fa5 100644 --- a/src/data/data.cc +++ b/src/data/data.cc @@ -24,51 +24,51 @@ DMLC_REGISTRY_ENABLE(::xgboost::data::SparsePageFormatReg); namespace xgboost { // implementation of inline functions void MetaInfo::Clear() { - num_row = num_col = num_nonzero = 0; - labels.clear(); - root_index.clear(); - group_ptr.clear(); - weights.clear(); - base_margin.clear(); + num_row_ = num_col_ = num_nonzero_ = 0; + labels_.clear(); + root_index_.clear(); + group_ptr_.clear(); + weights_.clear(); + base_margin_.clear(); } void MetaInfo::SaveBinary(dmlc::Stream *fo) const { int32_t version = kVersion; fo->Write(&version, sizeof(version)); - fo->Write(&num_row, sizeof(num_row)); - fo->Write(&num_col, sizeof(num_col)); - fo->Write(&num_nonzero, sizeof(num_nonzero)); - fo->Write(labels); - fo->Write(group_ptr); - fo->Write(weights); - fo->Write(root_index); - fo->Write(base_margin); + fo->Write(&num_row_, sizeof(num_row_)); + fo->Write(&num_col_, sizeof(num_col_)); + fo->Write(&num_nonzero_, sizeof(num_nonzero_)); + fo->Write(labels_); + fo->Write(group_ptr_); + fo->Write(weights_); + fo->Write(root_index_); + fo->Write(base_margin_); } void MetaInfo::LoadBinary(dmlc::Stream *fi) { int version; CHECK(fi->Read(&version, sizeof(version)) == sizeof(version)) << "MetaInfo: invalid version"; CHECK_EQ(version, kVersion) << "MetaInfo: invalid format"; - CHECK(fi->Read(&num_row, sizeof(num_row)) == sizeof(num_row)) << "MetaInfo: invalid format"; - CHECK(fi->Read(&num_col, sizeof(num_col)) == sizeof(num_col)) << "MetaInfo: invalid format"; - CHECK(fi->Read(&num_nonzero, sizeof(num_nonzero)) == sizeof(num_nonzero)) + CHECK(fi->Read(&num_row_, sizeof(num_row_)) == sizeof(num_row_)) << "MetaInfo: invalid format"; + CHECK(fi->Read(&num_col_, sizeof(num_col_)) == sizeof(num_col_)) << "MetaInfo: invalid format"; + CHECK(fi->Read(&num_nonzero_, sizeof(num_nonzero_)) == sizeof(num_nonzero_)) << "MetaInfo: invalid format"; - CHECK(fi->Read(&labels)) << "MetaInfo: invalid format"; - CHECK(fi->Read(&group_ptr)) << "MetaInfo: invalid format"; - CHECK(fi->Read(&weights)) << "MetaInfo: invalid format"; - CHECK(fi->Read(&root_index)) << "MetaInfo: invalid format"; - CHECK(fi->Read(&base_margin)) << "MetaInfo: invalid format"; + CHECK(fi->Read(&labels_)) << "MetaInfo: invalid format"; + CHECK(fi->Read(&group_ptr_)) << "MetaInfo: invalid format"; + CHECK(fi->Read(&weights_)) << "MetaInfo: invalid format"; + CHECK(fi->Read(&root_index_)) << "MetaInfo: invalid format"; + CHECK(fi->Read(&base_margin_)) << "MetaInfo: invalid format"; } // try to load group information from file, if exists inline bool MetaTryLoadGroup(const std::string& fname, std::vector* group) { std::unique_ptr fi(dmlc::Stream::Create(fname.c_str(), "r", true)); - if (fi.get() == nullptr) return false; + if (fi == nullptr) return false; dmlc::istream is(fi.get()); group->clear(); group->push_back(0); - unsigned nline; + unsigned nline = 0; while (is >> nline) { group->push_back(group->back() + nline); } @@ -79,7 +79,7 @@ inline bool MetaTryLoadGroup(const std::string& fname, inline bool MetaTryLoadFloatInfo(const std::string& fname, std::vector* data) { std::unique_ptr fi(dmlc::Stream::Create(fname.c_str(), "r", true)); - if (fi.get() == nullptr) return false; + if (fi == nullptr) return false; dmlc::istream is(fi.get()); data->clear(); bst_float value; @@ -93,16 +93,16 @@ inline bool MetaTryLoadFloatInfo(const std::string& fname, #define DISPATCH_CONST_PTR(dtype, old_ptr, cast_ptr, proc) \ switch (dtype) { \ case kFloat32: { \ - const float* cast_ptr = reinterpret_cast(old_ptr); proc; break; \ + auto cast_ptr = reinterpret_cast(old_ptr); proc; break; \ } \ case kDouble: { \ - const double* cast_ptr = reinterpret_cast(old_ptr); proc; break; \ + auto cast_ptr = reinterpret_cast(old_ptr); proc; break; \ } \ case kUInt32: { \ - const uint32_t* cast_ptr = reinterpret_cast(old_ptr); proc; break; \ + auto cast_ptr = reinterpret_cast(old_ptr); proc; break; \ } \ case kUInt64: { \ - const uint64_t* cast_ptr = reinterpret_cast(old_ptr); proc; break; \ + auto cast_ptr = reinterpret_cast(old_ptr); proc; break; \ } \ default: LOG(FATAL) << "Unknown data type" << dtype; \ } \ @@ -110,28 +110,28 @@ inline bool MetaTryLoadFloatInfo(const std::string& fname, void MetaInfo::SetInfo(const char* key, const void* dptr, DataType dtype, size_t num) { if (!std::strcmp(key, "root_index")) { - root_index.resize(num); + root_index_.resize(num); DISPATCH_CONST_PTR(dtype, dptr, cast_dptr, - std::copy(cast_dptr, cast_dptr + num, root_index.begin())); + std::copy(cast_dptr, cast_dptr + num, root_index_.begin())); } else if (!std::strcmp(key, "label")) { - labels.resize(num); + labels_.resize(num); DISPATCH_CONST_PTR(dtype, dptr, cast_dptr, - std::copy(cast_dptr, cast_dptr + num, labels.begin())); + std::copy(cast_dptr, cast_dptr + num, labels_.begin())); } else if (!std::strcmp(key, "weight")) { - weights.resize(num); + weights_.resize(num); DISPATCH_CONST_PTR(dtype, dptr, cast_dptr, - std::copy(cast_dptr, cast_dptr + num, weights.begin())); + std::copy(cast_dptr, cast_dptr + num, weights_.begin())); } else if (!std::strcmp(key, "base_margin")) { - base_margin.resize(num); + base_margin_.resize(num); DISPATCH_CONST_PTR(dtype, dptr, cast_dptr, - std::copy(cast_dptr, cast_dptr + num, base_margin.begin())); + std::copy(cast_dptr, cast_dptr + num, base_margin_.begin())); } else if (!std::strcmp(key, "group")) { - group_ptr.resize(num + 1); + group_ptr_.resize(num + 1); DISPATCH_CONST_PTR(dtype, dptr, cast_dptr, - std::copy(cast_dptr, cast_dptr + num, group_ptr.begin() + 1)); - group_ptr[0] = 0; - for (size_t i = 1; i < group_ptr.size(); ++i) { - group_ptr[i] = group_ptr[i - 1] + group_ptr[i]; + std::copy(cast_dptr, cast_dptr + num, group_ptr_.begin() + 1)); + group_ptr_[0] = 0; + for (size_t i = 1; i < group_ptr_.size(); ++i) { + group_ptr_[i] = group_ptr_[i - 1] + group_ptr_[i]; } } } @@ -163,7 +163,9 @@ DMatrix* DMatrix::Load(const std::string& uri, << "-" << rabit::GetWorldSize() << cache_shards[i].substr(pos, cache_shards[i].length()); } - if (i + 1 != cache_shards.size()) os << ':'; + if (i + 1 != cache_shards.size()) { + os << ':'; + } } cache_file = os.str(); } @@ -187,7 +189,7 @@ DMatrix* DMatrix::Load(const std::string& uri, if (file_format == "auto" && npart == 1) { int magic; std::unique_ptr fi(dmlc::Stream::Create(fname.c_str(), "r", true)); - if (fi.get() != nullptr) { + if (fi != nullptr) { common::PeekableInStream is(fi.get()); if (is.PeekRead(&magic, sizeof(magic)) == sizeof(magic) && magic == data::SimpleCSRSource::kMagic) { @@ -195,8 +197,8 @@ DMatrix* DMatrix::Load(const std::string& uri, source->LoadBinary(&is); DMatrix* dmat = DMatrix::Create(std::move(source), cache_file); if (!silent) { - LOG(CONSOLE) << dmat->info().num_row << 'x' << dmat->info().num_col << " matrix with " - << dmat->info().num_nonzero << " entries loaded from " << uri; + LOG(CONSOLE) << dmat->Info().num_row_ << 'x' << dmat->Info().num_col_ << " matrix with " + << dmat->Info().num_nonzero_ << " entries loaded from " << uri; } return dmat; } @@ -207,26 +209,26 @@ DMatrix* DMatrix::Load(const std::string& uri, dmlc::Parser::Create(fname.c_str(), partid, npart, file_format.c_str())); DMatrix* dmat = DMatrix::Create(parser.get(), cache_file); if (!silent) { - LOG(CONSOLE) << dmat->info().num_row << 'x' << dmat->info().num_col << " matrix with " - << dmat->info().num_nonzero << " entries loaded from " << uri; + LOG(CONSOLE) << dmat->Info().num_row_ << 'x' << dmat->Info().num_col_ << " matrix with " + << dmat->Info().num_nonzero_ << " entries loaded from " << uri; } /* sync up number of features after matrix loaded. * partitioned data will fail the train/val validation check * since partitioned data not knowing the real number of features. */ - rabit::Allreduce(&dmat->info().num_col, 1); + rabit::Allreduce(&dmat->Info().num_col_, 1); // backward compatiblity code. if (!load_row_split) { - MetaInfo& info = dmat->info(); - if (MetaTryLoadGroup(fname + ".group", &info.group_ptr) && !silent) { - LOG(CONSOLE) << info.group_ptr.size() - 1 + MetaInfo& info = dmat->Info(); + if (MetaTryLoadGroup(fname + ".group", &info.group_ptr_) && !silent) { + LOG(CONSOLE) << info.group_ptr_.size() - 1 << " groups are loaded from " << fname << ".group"; } - if (MetaTryLoadFloatInfo(fname + ".base_margin", &info.base_margin) && !silent) { - LOG(CONSOLE) << info.base_margin.size() + if (MetaTryLoadFloatInfo(fname + ".base_margin", &info.base_margin_) && !silent) { + LOG(CONSOLE) << info.base_margin_.size() << " base_margin are loaded from " << fname << ".base_margin"; } - if (MetaTryLoadFloatInfo(fname + ".weight", &info.weights) && !silent) { - LOG(CONSOLE) << info.weights.size() + if (MetaTryLoadFloatInfo(fname + ".weight", &info.weights_) && !silent) { + LOG(CONSOLE) << info.weights_.size() << " weights are loaded from " << fname << ".weight"; } } diff --git a/src/data/simple_csr_source.cc b/src/data/simple_csr_source.cc index 1a8bd419e..92456301b 100644 --- a/src/data/simple_csr_source.cc +++ b/src/data/simple_csr_source.cc @@ -18,7 +18,7 @@ void SimpleCSRSource::Clear() { void SimpleCSRSource::CopyFrom(DMatrix* src) { this->Clear(); - this->info = src->info(); + this->info = src->Info(); dmlc::DataIter* iter = src->RowIterator(); iter->BeforeFirst(); while (iter->Next()) { @@ -36,10 +36,10 @@ void SimpleCSRSource::CopyFrom(dmlc::Parser* parser) { while (parser->Next()) { const dmlc::RowBlock& batch = parser->Value(); if (batch.label != nullptr) { - info.labels.insert(info.labels.end(), batch.label, batch.label + batch.size); + info.labels_.insert(info.labels_.end(), batch.label, batch.label + batch.size); } if (batch.weight != nullptr) { - info.weights.insert(info.weights.end(), batch.weight, batch.weight + batch.size); + info.weights_.insert(info.weights_.end(), batch.weight, batch.weight + batch.size); } // Remove the assertion on batch.index, which can be null in the case that the data in this // batch is entirely sparse. Although it's true that this indicates a likely issue with the @@ -48,13 +48,13 @@ void SimpleCSRSource::CopyFrom(dmlc::Parser* parser) { // CHECK(batch.index != nullptr); // update information - this->info.num_row += batch.size; + this->info.num_row_ += batch.size; // copy the data over for (size_t i = batch.offset[0]; i < batch.offset[batch.size]; ++i) { uint32_t index = batch.index[i]; bst_float fvalue = batch.value == nullptr ? 1.0f : batch.value[i]; - row_data_.push_back(SparseBatch::Entry(index, fvalue)); - this->info.num_col = std::max(this->info.num_col, + row_data_.emplace_back(index, fvalue); + this->info.num_col_ = std::max(this->info.num_col_, static_cast(index + 1)); } size_t top = row_ptr_.size(); @@ -62,7 +62,7 @@ void SimpleCSRSource::CopyFrom(dmlc::Parser* parser) { row_ptr_.push_back(row_ptr_[top - 1] + batch.offset[i + 1] - batch.offset[0]); } } - this->info.num_nonzero = static_cast(row_data_.size()); + this->info.num_nonzero_ = static_cast(row_data_.size()); } void SimpleCSRSource::LoadBinary(dmlc::Stream* fi) { diff --git a/src/data/simple_csr_source.h b/src/data/simple_csr_source.h index 1e7adb0b2..a70a48a39 100644 --- a/src/data/simple_csr_source.h +++ b/src/data/simple_csr_source.h @@ -35,9 +35,9 @@ class SimpleCSRSource : public DataSource { std::vector row_data_; // functions /*! \brief default constructor */ - SimpleCSRSource() : row_ptr_(1, 0), at_first_(true) {} + SimpleCSRSource() : row_ptr_(1, 0) {} /*! \brief destructor */ - virtual ~SimpleCSRSource() {} + ~SimpleCSRSource() override = default; /*! \brief clear the data structure */ void Clear(); /*! @@ -72,7 +72,7 @@ class SimpleCSRSource : public DataSource { private: /*! \brief internal variable, used to support iterator interface */ - bool at_first_; + bool at_first_{true}; /*! \brief */ RowBatch batch_; }; diff --git a/src/data/simple_dmatrix.cc b/src/data/simple_dmatrix.cc index 42c836545..18b13bf2f 100644 --- a/src/data/simple_dmatrix.cc +++ b/src/data/simple_dmatrix.cc @@ -20,7 +20,7 @@ bool SimpleDMatrix::ColBatchIter::Next() { data_ptr_ += 1; SparsePage* pcol = cpages_[data_ptr_ - 1].get(); batch_.size = col_index_.size(); - col_data_.resize(col_index_.size(), SparseBatch::Inst(NULL, 0)); + col_data_.resize(col_index_.size(), SparseBatch::Inst(nullptr, 0)); for (size_t i = 0; i < col_data_.size(); ++i) { const bst_uint ridx = col_index_[i]; col_data_[i] = SparseBatch::Inst @@ -33,7 +33,7 @@ bool SimpleDMatrix::ColBatchIter::Next() { } dmlc::DataIter* SimpleDMatrix::ColIterator() { - size_t ncol = this->info().num_col; + size_t ncol = this->Info().num_col_; col_iter_.col_index_.resize(ncol); for (size_t i = 0; i < ncol; ++i) { col_iter_.col_index_[i] = static_cast(i); @@ -43,10 +43,10 @@ dmlc::DataIter* SimpleDMatrix::ColIterator() { } dmlc::DataIter* SimpleDMatrix::ColIterator(const std::vector&fset) { - size_t ncol = this->info().num_col; + size_t ncol = this->Info().num_col_; col_iter_.col_index_.resize(0); - for (size_t i = 0; i < fset.size(); ++i) { - if (fset[i] < ncol) col_iter_.col_index_.push_back(fset[i]); + for (auto fidx : fset) { + if (fidx < ncol) col_iter_.col_index_.push_back(fidx); } col_iter_.BeforeFirst(); return &col_iter_; @@ -56,9 +56,9 @@ void SimpleDMatrix::InitColAccess(const std::vector &enabled, float pkeep, size_t max_row_perbatch, bool sorted) { if (this->HaveColAccess(sorted)) return; - col_iter_.sorted = sorted; + col_iter_.sorted_ = sorted; col_iter_.cpages_.clear(); - if (info().num_row < max_row_perbatch) { + if (Info().num_row_ < max_row_perbatch) { std::unique_ptr page(new SparsePage()); this->MakeOneBatch(enabled, pkeep, page.get(), sorted); col_iter_.cpages_.push_back(std::move(page)); @@ -66,10 +66,10 @@ void SimpleDMatrix::InitColAccess(const std::vector &enabled, this->MakeManyBatch(enabled, pkeep, max_row_perbatch, sorted); } // setup col-size - col_size_.resize(info().num_col); + col_size_.resize(Info().num_col_); std::fill(col_size_.begin(), col_size_.end(), 0); - for (size_t i = 0; i < col_iter_.cpages_.size(); ++i) { - SparsePage *pcol = col_iter_.cpages_[i].get(); + for (auto & cpage : col_iter_.cpages_) { + SparsePage *pcol = cpage.get(); for (size_t j = 0; j < pcol->Size(); ++j) { col_size_[j] += pcol->offset[j + 1] - pcol->offset[j]; } @@ -80,14 +80,14 @@ void SimpleDMatrix::InitColAccess(const std::vector &enabled, void SimpleDMatrix::MakeOneBatch(const std::vector& enabled, float pkeep, SparsePage* pcol, bool sorted) { // clear rowset - buffered_rowset_.clear(); + buffered_rowset_.Clear(); // bit map const int nthread = omp_get_max_threads(); std::vector bmap; pcol->Clear(); common::ParallelGroupBuilder builder(&pcol->offset, &pcol->data); - builder.InitBudget(info().num_col, nthread); + builder.InitBudget(Info().num_col_, nthread); // start working dmlc::DataIter* iter = this->RowIterator(); iter->BeforeFirst(); @@ -99,9 +99,9 @@ void SimpleDMatrix::MakeOneBatch(const std::vector& enabled, float pkeep, long batch_size = static_cast(batch.size); // NOLINT(*) for (long i = 0; i < batch_size; ++i) { // NOLINT(*) - bst_uint ridx = static_cast(batch.base_rowid + i); + auto ridx = static_cast(batch.base_rowid + i); if (pkeep == 1.0f || coin_flip(rnd)) { - buffered_rowset_.push_back(ridx); + buffered_rowset_.PushBack(ridx); } else { bmap[i] = false; } @@ -109,7 +109,7 @@ void SimpleDMatrix::MakeOneBatch(const std::vector& enabled, float pkeep, #pragma omp parallel for schedule(static) for (long i = 0; i < batch_size; ++i) { // NOLINT(*) int tid = omp_get_thread_num(); - bst_uint ridx = static_cast(batch.base_rowid + i); + auto ridx = static_cast(batch.base_rowid + i); if (bmap[ridx]) { RowBatch::Inst inst = batch[i]; for (bst_uint j = 0; j < inst.length; ++j) { @@ -128,13 +128,13 @@ void SimpleDMatrix::MakeOneBatch(const std::vector& enabled, float pkeep, #pragma omp parallel for schedule(static) for (long i = 0; i < static_cast(batch.size); ++i) { // NOLINT(*) int tid = omp_get_thread_num(); - bst_uint ridx = static_cast(batch.base_rowid + i); + auto ridx = static_cast(batch.base_rowid + i); if (bmap[ridx]) { RowBatch::Inst inst = batch[i]; for (bst_uint j = 0; j < inst.length; ++j) { if (enabled[inst[j].index]) { builder.Push(inst[j].index, - SparseBatch::Entry((bst_uint)(batch.base_rowid+i), + SparseBatch::Entry(static_cast(batch.base_rowid+i), inst[j].fvalue), tid); } } @@ -142,11 +142,11 @@ void SimpleDMatrix::MakeOneBatch(const std::vector& enabled, float pkeep, } } - CHECK_EQ(pcol->Size(), info().num_col); + CHECK_EQ(pcol->Size(), Info().num_col_); if (sorted) { // sort columns - bst_omp_uint ncol = static_cast(pcol->Size()); + auto ncol = static_cast(pcol->Size()); #pragma omp parallel for schedule(dynamic, 1) num_threads(nthread) for (bst_omp_uint i = 0; i < ncol; ++i) { if (pcol->offset[i] < pcol->offset[i + 1]) { @@ -164,7 +164,7 @@ void SimpleDMatrix::MakeManyBatch(const std::vector& enabled, size_t btop = 0; std::bernoulli_distribution coin_flip(pkeep); auto& rnd = common::GlobalRandom(); - buffered_rowset_.clear(); + buffered_rowset_.Clear(); // internal temp cache SparsePage tmp; tmp.Clear(); // start working @@ -174,16 +174,16 @@ void SimpleDMatrix::MakeManyBatch(const std::vector& enabled, while (iter->Next()) { const RowBatch &batch = iter->Value(); for (size_t i = 0; i < batch.size; ++i) { - bst_uint ridx = static_cast(batch.base_rowid + i); + auto ridx = static_cast(batch.base_rowid + i); if (pkeep == 1.0f || coin_flip(rnd)) { - buffered_rowset_.push_back(ridx); + buffered_rowset_.PushBack(ridx); tmp.Push(batch[i]); } if (tmp.Size() >= max_row_perbatch) { std::unique_ptr page(new SparsePage()); this->MakeColPage(tmp.GetRowBatch(0), btop, enabled, page.get(), sorted); col_iter_.cpages_.push_back(std::move(page)); - btop = buffered_rowset_.size(); + btop = buffered_rowset_.Size(); tmp.Clear(); } } @@ -205,7 +205,7 @@ void SimpleDMatrix::MakeColPage(const RowBatch& batch, pcol->Clear(); common::ParallelGroupBuilder builder(&pcol->offset, &pcol->data); - builder.InitBudget(info().num_col, nthread); + builder.InitBudget(Info().num_col_, nthread); bst_omp_uint ndata = static_cast(batch.size); #pragma omp parallel for schedule(static) num_threads(nthread) for (bst_omp_uint i = 0; i < ndata; ++i) { @@ -231,10 +231,10 @@ void SimpleDMatrix::MakeColPage(const RowBatch& batch, tid); } } - CHECK_EQ(pcol->Size(), info().num_col); + CHECK_EQ(pcol->Size(), Info().num_col_); // sort columns if (sorted) { - bst_omp_uint ncol = static_cast(pcol->Size()); + auto ncol = static_cast(pcol->Size()); #pragma omp parallel for schedule(dynamic, 1) num_threads(nthread) for (bst_omp_uint i = 0; i < ncol; ++i) { if (pcol->offset[i] < pcol->offset[i + 1]) { diff --git a/src/data/simple_dmatrix.h b/src/data/simple_dmatrix.h index 58d60c444..20136e0b5 100644 --- a/src/data/simple_dmatrix.h +++ b/src/data/simple_dmatrix.h @@ -22,11 +22,11 @@ class SimpleDMatrix : public DMatrix { explicit SimpleDMatrix(std::unique_ptr&& source) : source_(std::move(source)) {} - MetaInfo& info() override { + MetaInfo& Info() override { return source_->info; } - const MetaInfo& info() const override { + const MetaInfo& Info() const override { return source_->info; } @@ -37,10 +37,10 @@ class SimpleDMatrix : public DMatrix { } bool HaveColAccess(bool sorted) const override { - return col_size_.size() != 0 && col_iter_.sorted == sorted; + return col_size_.size() != 0 && col_iter_.sorted_ == sorted; } - const RowSet& buffered_rowset() const override { + const RowSet& BufferedRowset() const override { return buffered_rowset_; } @@ -49,8 +49,8 @@ class SimpleDMatrix : public DMatrix { } float GetColDensity(size_t cidx) const override { - size_t nmiss = buffered_rowset_.size() - col_size_[cidx]; - return 1.0f - (static_cast(nmiss)) / buffered_rowset_.size(); + size_t nmiss = buffered_rowset_.Size() - col_size_[cidx]; + return 1.0f - (static_cast(nmiss)) / buffered_rowset_.Size(); } dmlc::DataIter* ColIterator() override; @@ -67,7 +67,7 @@ class SimpleDMatrix : public DMatrix { // in-memory column batch iterator. struct ColBatchIter: dmlc::DataIter { public: - ColBatchIter() : data_ptr_(0), sorted(false) {} + ColBatchIter() = default; void BeforeFirst() override { data_ptr_ = 0; } @@ -86,11 +86,11 @@ class SimpleDMatrix : public DMatrix { // column sparse pages std::vector > cpages_; // data pointer - size_t data_ptr_; + size_t data_ptr_{0}; // temporal space for batch ColBatch batch_; // Is column sorted? - bool sorted; + bool sorted_{false}; }; // source data pointer. diff --git a/src/data/sparse_batch_page.h b/src/data/sparse_batch_page.h index 8c111a840..1c64df7d0 100644 --- a/src/data/sparse_batch_page.h +++ b/src/data/sparse_batch_page.h @@ -51,11 +51,11 @@ class SparsePage { return offset.size() - 1; } /*! \return estimation of memory cost of this page */ - inline size_t MemCostBytes(void) const { + inline size_t MemCostBytes() const { return offset.size() * sizeof(size_t) + data.size() * sizeof(SparseBatch::Entry); } /*! \brief clear the page */ - inline void Clear(void) { + inline void Clear() { min_index = 0; offset.clear(); offset.push_back(0); @@ -92,7 +92,7 @@ class SparsePage { for (size_t i = batch.offset[0]; i < batch.offset[batch.size]; ++i) { uint32_t index = batch.index[i]; bst_float fvalue = batch.value == nullptr ? 1.0f : batch.value[i]; - data.push_back(SparseBatch::Entry(index, fvalue)); + data.emplace_back(index, fvalue); } CHECK_EQ(offset.back(), data.size()); } @@ -145,7 +145,7 @@ class SparsePage { class SparsePage::Format { public: /*! \brief virtual destructor */ - virtual ~Format() {} + virtual ~Format() = default; /*! * \brief Load all the segments into page, advance fi to end of the block. * \param page The data to read page into. diff --git a/src/data/sparse_page_dmatrix.cc b/src/data/sparse_page_dmatrix.cc index f1cc48e57..6c3376c62 100644 --- a/src/data/sparse_page_dmatrix.cc +++ b/src/data/sparse_page_dmatrix.cc @@ -94,9 +94,9 @@ void SparsePageDMatrix::ColPageIter::Init(const std::vector& index_set } dmlc::DataIter* SparsePageDMatrix::ColIterator() { - CHECK(col_iter_.get() != nullptr); + CHECK(col_iter_ != nullptr); std::vector col_index; - size_t ncol = this->info().num_col; + size_t ncol = this->Info().num_col_; for (size_t i = 0; i < ncol; ++i) { col_index.push_back(static_cast(i)); } @@ -106,12 +106,12 @@ dmlc::DataIter* SparsePageDMatrix::ColIterator() { dmlc::DataIter* SparsePageDMatrix:: ColIterator(const std::vector& fset) { - CHECK(col_iter_.get() != nullptr); + CHECK(col_iter_ != nullptr); std::vector col_index; - size_t ncol = this->info().num_col; - for (size_t i = 0; i < fset.size(); ++i) { - if (fset[i] < ncol) { - col_index.push_back(fset[i]); + size_t ncol = this->Info().num_col_; + for (auto fidx : fset) { + if (fidx < ncol) { + col_index.push_back(fidx); } } col_iter_->Init(col_index, false); @@ -126,7 +126,7 @@ bool SparsePageDMatrix::TryInitColData(bool sorted) { std::string col_meta_name = cache_shards[0] + ".col.meta"; std::unique_ptr fmeta( dmlc::Stream::Create(col_meta_name.c_str(), "r", true)); - if (fmeta.get() == nullptr) return false; + if (fmeta == nullptr) return false; CHECK(fmeta->Read(&buffered_rowset_)) << "invalid col.meta file"; CHECK(fmeta->Read(&col_size_)) << "invalid col.meta file"; } @@ -136,7 +136,7 @@ bool SparsePageDMatrix::TryInitColData(bool sorted) { std::string col_data_name = prefix + ".col.page"; std::unique_ptr fdata( dmlc::SeekStream::CreateForRead(col_data_name.c_str(), true)); - if (fdata.get() == nullptr) return false; + if (fdata == nullptr) return false; files.push_back(std::move(fdata)); } col_iter_.reset(new ColPageIter(std::move(files))); @@ -150,12 +150,12 @@ void SparsePageDMatrix::InitColAccess(const std::vector& enabled, size_t max_row_perbatch, bool sorted) { if (HaveColAccess(sorted)) return; if (TryInitColData(sorted)) return; - const MetaInfo& info = this->info(); + const MetaInfo& info = this->Info(); if (max_row_perbatch == std::numeric_limits::max()) { max_row_perbatch = kMaxRowPerBatch; } - buffered_rowset_.clear(); - col_size_.resize(info.num_col); + buffered_rowset_.Clear(); + col_size_.resize(info.num_col_); std::fill(col_size_.begin(), col_size_.end(), 0); dmlc::DataIter* iter = this->RowIterator(); std::bernoulli_distribution coin_flip(pkeep); @@ -173,7 +173,7 @@ void SparsePageDMatrix::InitColAccess(const std::vector& enabled, const int nthread = std::max(omp_get_max_threads(), std::max(omp_get_num_procs() / 2 - 1, 1)); common::ParallelGroupBuilder builder(&pcol->offset, &pcol->data); - builder.InitBudget(info.num_col, nthread); + builder.InitBudget(info.num_col_, nthread); bst_omp_uint ndata = static_cast(prow.Size()); #pragma omp parallel for schedule(static) num_threads(nthread) for (bst_omp_uint i = 0; i < ndata; ++i) { @@ -196,10 +196,10 @@ void SparsePageDMatrix::InitColAccess(const std::vector& enabled, tid); } } - CHECK_EQ(pcol->Size(), info.num_col); + CHECK_EQ(pcol->Size(), info.num_col_); // sort columns if (sorted) { - bst_omp_uint ncol = static_cast(pcol->Size()); + auto ncol = static_cast(pcol->Size()); #pragma omp parallel for schedule(dynamic, 1) num_threads(nthread) for (bst_omp_uint i = 0; i < ncol; ++i) { if (pcol->offset[i] < pcol->offset[i + 1]) { @@ -213,16 +213,16 @@ void SparsePageDMatrix::InitColAccess(const std::vector& enabled, auto make_next_col = [&] (SparsePage* dptr) { tmp.Clear(); - size_t btop = buffered_rowset_.size(); + size_t btop = buffered_rowset_.Size(); while (true) { if (batch_ptr != batch_top) { const RowBatch& batch = iter->Value(); CHECK_EQ(batch_top, batch.size); for (size_t i = batch_ptr; i < batch_top; ++i) { - bst_uint ridx = static_cast(batch.base_rowid + i); + auto ridx = static_cast(batch.base_rowid + i); if (pkeep == 1.0f || coin_flip(rnd)) { - buffered_rowset_.push_back(ridx); + buffered_rowset_.PushBack(ridx); tmp.Push(batch[i]); } @@ -263,7 +263,7 @@ void SparsePageDMatrix::InitColAccess(const std::vector& enabled, double tstart = dmlc::GetTime(); size_t bytes_write = 0; // print every 4 sec. - const double kStep = 4.0; + constexpr double kStep = 4.0; size_t tick_expected = kStep; while (make_next_col(page.get())) { diff --git a/src/data/sparse_page_dmatrix.h b/src/data/sparse_page_dmatrix.h index 597a223b9..7706f793c 100644 --- a/src/data/sparse_page_dmatrix.h +++ b/src/data/sparse_page_dmatrix.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -22,15 +23,15 @@ namespace data { class SparsePageDMatrix : public DMatrix { public: explicit SparsePageDMatrix(std::unique_ptr&& source, - const std::string& cache_info) - : source_(std::move(source)), cache_info_(cache_info) { + std::string cache_info) + : source_(std::move(source)), cache_info_(std::move(cache_info)) { } - MetaInfo& info() override { + MetaInfo& Info() override { return source_->info; } - const MetaInfo& info() const override { + const MetaInfo& Info() const override { return source_->info; } @@ -41,10 +42,10 @@ class SparsePageDMatrix : public DMatrix { } bool HaveColAccess(bool sorted) const override { - return col_iter_.get() != nullptr && col_iter_->sorted == sorted; + return col_iter_ != nullptr && col_iter_->sorted == sorted; } - const RowSet& buffered_rowset() const override { + const RowSet& BufferedRowset() const override { return buffered_rowset_; } @@ -53,8 +54,8 @@ class SparsePageDMatrix : public DMatrix { } float GetColDensity(size_t cidx) const override { - size_t nmiss = buffered_rowset_.size() - col_size_[cidx]; - return 1.0f - (static_cast(nmiss)) / buffered_rowset_.size(); + size_t nmiss = buffered_rowset_.Size() - col_size_[cidx]; + return 1.0f - (static_cast(nmiss)) / buffered_rowset_.Size(); } bool SingleColBlock() const override { @@ -79,7 +80,7 @@ class SparsePageDMatrix : public DMatrix { class ColPageIter : public dmlc::DataIter { public: explicit ColPageIter(std::vector >&& files); - virtual ~ColPageIter(); + ~ColPageIter() override; void BeforeFirst() override; const ColBatch &Value() const override { return out_; diff --git a/src/data/sparse_page_raw_format.cc b/src/data/sparse_page_raw_format.cc index 07c390cab..ecc62f14d 100644 --- a/src/data/sparse_page_raw_format.cc +++ b/src/data/sparse_page_raw_format.cc @@ -34,8 +34,7 @@ class SparsePageRawFormat : public SparsePage::Format { // setup the offset page->offset.clear(); page->offset.push_back(0); - for (size_t i = 0; i < sorted_index_set.size(); ++i) { - bst_uint fid = sorted_index_set[i]; + for (unsigned int fid : sorted_index_set) { CHECK_LT(fid + 1, disk_offset_.size()); size_t size = disk_offset_[fid + 1] - disk_offset_[fid]; page->offset.push_back(page->offset.back() + size); diff --git a/src/data/sparse_page_source.cc b/src/data/sparse_page_source.cc index 751c02824..64b78b2bf 100644 --- a/src/data/sparse_page_source.cc +++ b/src/data/sparse_page_source.cc @@ -89,12 +89,12 @@ bool SparsePageSource::CacheExist(const std::string& cache_info) { { std::string name_info = cache_shards[0]; std::unique_ptr finfo(dmlc::Stream::Create(name_info.c_str(), "r", true)); - if (finfo.get() == nullptr) return false; + if (finfo == nullptr) return false; } for (const std::string& prefix : cache_shards) { std::string name_row = prefix + ".row.page"; std::unique_ptr frow(dmlc::Stream::Create(name_row.c_str(), "r", true)); - if (frow.get() == nullptr) return false; + if (frow == nullptr) return false; } return true; } @@ -119,22 +119,22 @@ void SparsePageSource::Create(dmlc::Parser* src, size_t bytes_write = 0; double tstart = dmlc::GetTime(); // print every 4 sec. - const double kStep = 4.0; + constexpr double kStep = 4.0; size_t tick_expected = static_cast(kStep); while (src->Next()) { const dmlc::RowBlock& batch = src->Value(); if (batch.label != nullptr) { - info.labels.insert(info.labels.end(), batch.label, batch.label + batch.size); + info.labels_.insert(info.labels_.end(), batch.label, batch.label + batch.size); } if (batch.weight != nullptr) { - info.weights.insert(info.weights.end(), batch.weight, batch.weight + batch.size); + info.weights_.insert(info.weights_.end(), batch.weight, batch.weight + batch.size); } - info.num_row += batch.size; - info.num_nonzero += batch.offset[batch.size] - batch.offset[0]; + info.num_row_ += batch.size; + info.num_nonzero_ += batch.offset[batch.size] - batch.offset[0]; for (size_t i = batch.offset[0]; i < batch.offset[batch.size]; ++i) { uint32_t index = batch.index[i]; - info.num_col = std::max(info.num_col, + info.num_col_ = std::max(info.num_col_, static_cast(index + 1)); } page->Push(batch); @@ -183,7 +183,7 @@ void SparsePageSource::Create(DMatrix* src, std::shared_ptr page; writer.Alloc(&page); page->Clear(); - MetaInfo info = src->info(); + MetaInfo info = src->Info(); size_t bytes_write = 0; double tstart = dmlc::GetTime(); dmlc::DataIter* iter = src->RowIterator(); diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h index 59bf501fd..88f2c3ef6 100644 --- a/src/data/sparse_page_source.h +++ b/src/data/sparse_page_source.h @@ -33,7 +33,7 @@ class SparsePageSource : public DataSource { */ explicit SparsePageSource(const std::string& cache_prefix) noexcept(false); /*! \brief destructor */ - virtual ~SparsePageSource(); + ~SparsePageSource() override; // implement Next bool Next() override; // implement BeforeFirst diff --git a/src/data/sparse_page_writer.cc b/src/data/sparse_page_writer.cc index 939e7a7a0..f420ff2ce 100644 --- a/src/data/sparse_page_writer.cc +++ b/src/data/sparse_page_writer.cc @@ -34,7 +34,7 @@ SparsePage::Writer::Writer( fo->Write(format_shard); std::shared_ptr page; while (wqueue->Pop(&page)) { - if (page.get() == nullptr) break; + if (page == nullptr) break; fmt->Write(*page, fo.get()); qrecycle_.Push(std::move(page)); } @@ -61,7 +61,7 @@ void SparsePage::Writer::PushWrite(std::shared_ptr&& page) { } void SparsePage::Writer::Alloc(std::shared_ptr* out_page) { - CHECK(out_page->get() == nullptr); + CHECK(*out_page == nullptr); if (num_free_buffer_ != 0) { out_page->reset(new SparsePage()); --num_free_buffer_; diff --git a/src/gbm/gblinear.cc b/src/gbm/gblinear.cc index 6e14c2e6b..d1ea3a306 100644 --- a/src/gbm/gblinear.cc +++ b/src/gbm/gblinear.cc @@ -52,9 +52,9 @@ class GBLinear : public GradientBooster { explicit GBLinear(const std::vector > &cache, bst_float base_margin) : base_margin_(base_margin), - sum_instance_weight(0), - sum_weight_complete(false), - is_converged(false) { + sum_instance_weight_(0), + sum_weight_complete_(false), + is_converged_(false) { // Add matrices to the prediction cache for (auto &d : cache) { PredictionCacheEntry e; @@ -63,46 +63,46 @@ class GBLinear : public GradientBooster { } } void Configure(const std::vector >& cfg) override { - if (model.weight.size() == 0) { - model.param.InitAllowUnknown(cfg); + if (model_.weight.size() == 0) { + model_.param.InitAllowUnknown(cfg); } - param.InitAllowUnknown(cfg); - updater.reset(LinearUpdater::Create(param.updater)); - updater->Init(cfg); - monitor.Init("GBLinear ", param.debug_verbose); + param_.InitAllowUnknown(cfg); + updater_.reset(LinearUpdater::Create(param_.updater)); + updater_->Init(cfg); + monitor_.Init("GBLinear ", param_.debug_verbose); } void Load(dmlc::Stream* fi) override { - model.Load(fi); + model_.Load(fi); } void Save(dmlc::Stream* fo) const override { - model.Save(fo); + model_.Save(fo); } void DoBoost(DMatrix *p_fmat, - HostDeviceVector *in_gpair, + HostDeviceVector *in_gpair, ObjFunction* obj) override { - monitor.Start("DoBoost"); + monitor_.Start("DoBoost"); if (!p_fmat->HaveColAccess(false)) { - std::vector enabled(p_fmat->info().num_col, true); - p_fmat->InitColAccess(enabled, 1.0f, param.max_row_perbatch, false); + std::vector enabled(p_fmat->Info().num_col_, true); + p_fmat->InitColAccess(enabled, 1.0f, param_.max_row_perbatch, false); } - model.LazyInitModel(); + model_.LazyInitModel(); this->LazySumWeights(p_fmat); if (!this->CheckConvergence()) { - updater->Update(&in_gpair->data_h(), p_fmat, &model, sum_instance_weight); + updater_->Update(&in_gpair->HostVector(), p_fmat, &model_, sum_instance_weight_); } this->UpdatePredictionCache(); - monitor.Stop("DoBoost"); + monitor_.Stop("DoBoost"); } void PredictBatch(DMatrix *p_fmat, HostDeviceVector *out_preds, unsigned ntree_limit) override { - monitor.Start("PredictBatch"); + monitor_.Start("PredictBatch"); CHECK_EQ(ntree_limit, 0U) << "GBLinear::Predict ntrees is only valid for gbtree predictor"; @@ -110,19 +110,19 @@ class GBLinear : public GradientBooster { auto it = cache_.find(p_fmat); if (it != cache_.end() && it->second.predictions.size() != 0) { std::vector &y = it->second.predictions; - out_preds->resize(y.size()); - std::copy(y.begin(), y.end(), out_preds->data_h().begin()); + out_preds->Resize(y.size()); + std::copy(y.begin(), y.end(), out_preds->HostVector().begin()); } else { - this->PredictBatchInternal(p_fmat, &out_preds->data_h()); + this->PredictBatchInternal(p_fmat, &out_preds->HostVector()); } - monitor.Stop("PredictBatch"); + monitor_.Stop("PredictBatch"); } // add base margin void PredictInstance(const SparseBatch::Inst &inst, std::vector *out_preds, unsigned ntree_limit, unsigned root_index) override { - const int ngroup = model.param.num_output_group; + const int ngroup = model_.param.num_output_group; for (int gid = 0; gid < ngroup; ++gid) { this->Pred(inst, dmlc::BeginPtr(*out_preds), gid, base_margin_); } @@ -138,15 +138,15 @@ class GBLinear : public GradientBooster { std::vector* out_contribs, unsigned ntree_limit, bool approximate, int condition = 0, unsigned condition_feature = 0) override { - model.LazyInitModel(); + model_.LazyInitModel(); CHECK_EQ(ntree_limit, 0U) << "GBLinear::PredictContribution: ntrees is only valid for gbtree predictor"; - const std::vector& base_margin = p_fmat->info().base_margin; - const int ngroup = model.param.num_output_group; - const size_t ncolumns = model.param.num_feature + 1; + const std::vector& base_margin = p_fmat->Info().base_margin_; + const int ngroup = model_.param.num_output_group; + const size_t ncolumns = model_.param.num_feature + 1; // allocate space for (#features + bias) times #groups times #rows std::vector& contribs = *out_contribs; - contribs.resize(p_fmat->info().num_row * ncolumns * ngroup); + contribs.resize(p_fmat->Info().num_row_ * ncolumns * ngroup); // make sure contributions is zeroed, we could be reusing a previously allocated one std::fill(contribs.begin(), contribs.end(), 0); // start collecting the contributions @@ -155,21 +155,21 @@ class GBLinear : public GradientBooster { while (iter->Next()) { const RowBatch& batch = iter->Value(); // parallel over local batch - const bst_omp_uint nsize = static_cast(batch.size); + const auto nsize = static_cast(batch.size); #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < nsize; ++i) { const RowBatch::Inst &inst = batch[i]; - size_t row_idx = static_cast(batch.base_rowid + i); + auto row_idx = static_cast(batch.base_rowid + i); // loop over output groups for (int gid = 0; gid < ngroup; ++gid) { bst_float *p_contribs = &contribs[(row_idx * ngroup + gid) * ncolumns]; // calculate linear terms' contributions for (bst_uint c = 0; c < inst.length; ++c) { - if (inst[c].index >= model.param.num_feature) continue; - p_contribs[inst[c].index] = inst[c].fvalue * model[inst[c].index][gid]; + if (inst[c].index >= model_.param.num_feature) continue; + p_contribs[inst[c].index] = inst[c].fvalue * model_[inst[c].index][gid]; } // add base margin to BIAS - p_contribs[ncolumns - 1] = model.bias()[gid] + + p_contribs[ncolumns - 1] = model_.bias()[gid] + ((base_margin.size() != 0) ? base_margin[row_idx * ngroup + gid] : base_margin_); } } @@ -182,34 +182,34 @@ class GBLinear : public GradientBooster { std::vector& contribs = *out_contribs; // linear models have no interaction effects - const size_t nelements = model.param.num_feature*model.param.num_feature; - contribs.resize(p_fmat->info().num_row * nelements * model.param.num_output_group); + const size_t nelements = model_.param.num_feature*model_.param.num_feature; + contribs.resize(p_fmat->Info().num_row_ * nelements * model_.param.num_output_group); std::fill(contribs.begin(), contribs.end(), 0); } std::vector DumpModel(const FeatureMap& fmap, bool with_stats, std::string format) const override { - return model.DumpModel(fmap, with_stats, format); + return model_.DumpModel(fmap, with_stats, format); } protected: void PredictBatchInternal(DMatrix *p_fmat, std::vector *out_preds) { - monitor.Start("PredictBatchInternal"); - model.LazyInitModel(); + monitor_.Start("PredictBatchInternal"); + model_.LazyInitModel(); std::vector &preds = *out_preds; - const std::vector& base_margin = p_fmat->info().base_margin; + const std::vector& base_margin = p_fmat->Info().base_margin_; // start collecting the prediction dmlc::DataIter *iter = p_fmat->RowIterator(); - const int ngroup = model.param.num_output_group; - preds.resize(p_fmat->info().num_row * ngroup); + const int ngroup = model_.param.num_output_group; + preds.resize(p_fmat->Info().num_row_ * ngroup); while (iter->Next()) { const RowBatch &batch = iter->Value(); // output convention: nrow * k, where nrow is number of rows // k is number of group // parallel over local batch - const omp_ulong nsize = static_cast(batch.size); + const auto nsize = static_cast(batch.size); #pragma omp parallel for schedule(static) for (omp_ulong i = 0; i < nsize; ++i) { const size_t ridx = batch.base_rowid + i; @@ -221,14 +221,14 @@ class GBLinear : public GradientBooster { } } } - monitor.Stop("PredictBatchInternal"); + monitor_.Stop("PredictBatchInternal"); } void UpdatePredictionCache() { // update cache entry for (auto &kv : cache_) { PredictionCacheEntry &e = kv.second; if (e.predictions.size() == 0) { - size_t n = model.param.num_output_group * e.data->info().num_row; + size_t n = model_.param.num_output_group * e.data->Info().num_row_; e.predictions.resize(n); } this->PredictBatchInternal(e.data.get(), &e.predictions); @@ -236,53 +236,53 @@ class GBLinear : public GradientBooster { } bool CheckConvergence() { - if (param.tolerance == 0.0f) return false; - if (is_converged) return true; - if (previous_model.weight.size() != model.weight.size()) { - previous_model = model; + if (param_.tolerance == 0.0f) return false; + if (is_converged_) return true; + if (previous_model_.weight.size() != model_.weight.size()) { + previous_model_ = model_; return false; } float largest_dw = 0.0; - for (size_t i = 0; i < model.weight.size(); i++) { + for (size_t i = 0; i < model_.weight.size(); i++) { largest_dw = std::max( - largest_dw, std::abs(model.weight[i] - previous_model.weight[i])); + largest_dw, std::abs(model_.weight[i] - previous_model_.weight[i])); } - previous_model = model; + previous_model_ = model_; - is_converged = largest_dw <= param.tolerance; - return is_converged; + is_converged_ = largest_dw <= param_.tolerance; + return is_converged_; } void LazySumWeights(DMatrix *p_fmat) { - if (!sum_weight_complete) { - auto &info = p_fmat->info(); - for (size_t i = 0; i < info.num_row; i++) { - sum_instance_weight += info.GetWeight(i); + if (!sum_weight_complete_) { + auto &info = p_fmat->Info(); + for (size_t i = 0; i < info.num_row_; i++) { + sum_instance_weight_ += info.GetWeight(i); } - sum_weight_complete = true; + sum_weight_complete_ = true; } } inline void Pred(const RowBatch::Inst &inst, bst_float *preds, int gid, bst_float base) { - bst_float psum = model.bias()[gid] + base; + bst_float psum = model_.bias()[gid] + base; for (bst_uint i = 0; i < inst.length; ++i) { - if (inst[i].index >= model.param.num_feature) continue; - psum += inst[i].fvalue * model[inst[i].index][gid]; + if (inst[i].index >= model_.param.num_feature) continue; + psum += inst[i].fvalue * model_[inst[i].index][gid]; } preds[gid] = psum; } // biase margin score bst_float base_margin_; // model field - GBLinearModel model; - GBLinearModel previous_model; - GBLinearTrainParam param; - std::unique_ptr updater; - double sum_instance_weight; - bool sum_weight_complete; - common::Monitor monitor; - bool is_converged; + GBLinearModel model_; + GBLinearModel previous_model_; + GBLinearTrainParam param_; + std::unique_ptr updater_; + double sum_instance_weight_; + bool sum_weight_complete_; + common::Monitor monitor_; + bool is_converged_; /** * \struct PredictionCacheEntry diff --git a/src/gbm/gblinear_model.h b/src/gbm/gblinear_model.h index 10e4ffe0c..dbaa22a54 100644 --- a/src/gbm/gblinear_model.h +++ b/src/gbm/gblinear_model.h @@ -40,7 +40,7 @@ class GBLinearModel { // weight for each of feature, bias is the last one std::vector weight; // initialize the model parameter - inline void LazyInitModel(void) { + inline void LazyInitModel() { if (!weight.empty()) return; // bias is the last weight weight.resize((param.num_feature + 1) * param.num_output_group); diff --git a/src/gbm/gbtree.cc b/src/gbm/gbtree.cc index 3ccf5782a..9572358fc 100644 --- a/src/gbm/gbtree.cc +++ b/src/gbm/gbtree.cc @@ -143,32 +143,32 @@ class GBTree : public GradientBooster { } void Configure(const std::vector >& cfg) override { - this->cfg = cfg; + this->cfg_ = cfg; model_.Configure(cfg); // initialize the updaters only when needed. - std::string updater_seq = tparam.updater_seq; - tparam.InitAllowUnknown(cfg); - if (updater_seq != tparam.updater_seq) updaters.clear(); - for (const auto& up : updaters) { + std::string updater_seq = tparam_.updater_seq; + tparam_.InitAllowUnknown(cfg); + if (updater_seq != tparam_.updater_seq) updaters_.clear(); + for (const auto& up : updaters_) { up->Init(cfg); } // for the 'update' process_type, move trees into trees_to_update - if (tparam.process_type == kUpdate) { + if (tparam_.process_type == kUpdate) { model_.InitTreesToUpdate(); } // configure predictor - predictor = std::unique_ptr(Predictor::Create(tparam.predictor)); - predictor->Init(cfg, cache_); - monitor.Init("GBTree", tparam.debug_verbose); + predictor_ = std::unique_ptr(Predictor::Create(tparam_.predictor)); + predictor_->Init(cfg, cache_); + monitor_.Init("GBTree", tparam_.debug_verbose); } void Load(dmlc::Stream* fi) override { model_.Load(fi); - this->cfg.clear(); - this->cfg.push_back(std::make_pair(std::string("num_feature"), - common::ToString(model_.param.num_feature))); + this->cfg_.clear(); + this->cfg_.emplace_back(std::string("num_feature"), + common::ToString(model_.param.num_feature)); } void Save(dmlc::Stream* fo) const override { @@ -177,29 +177,29 @@ class GBTree : public GradientBooster { bool AllowLazyCheckPoint() const override { return model_.param.num_output_group == 1 || - tparam.updater_seq.find("distcol") != std::string::npos; + tparam_.updater_seq.find("distcol") != std::string::npos; } void DoBoost(DMatrix* p_fmat, - HostDeviceVector* in_gpair, + HostDeviceVector* in_gpair, ObjFunction* obj) override { std::vector > > new_trees; const int ngroup = model_.param.num_output_group; - monitor.Start("BoostNewTrees"); + monitor_.Start("BoostNewTrees"); if (ngroup == 1) { std::vector > ret; BoostNewTrees(in_gpair, p_fmat, 0, &ret); new_trees.push_back(std::move(ret)); } else { - CHECK_EQ(in_gpair->size() % ngroup, 0U) + CHECK_EQ(in_gpair->Size() % ngroup, 0U) << "must have exactly ngroup*nrow gpairs"; // TODO(canonizer): perform this on GPU if HostDeviceVector has device set. - HostDeviceVector tmp(in_gpair->size() / ngroup, - bst_gpair(), in_gpair->device()); - std::vector& gpair_h = in_gpair->data_h(); - bst_omp_uint nsize = static_cast(tmp.size()); + HostDeviceVector tmp(in_gpair->Size() / ngroup, + GradientPair(), in_gpair->DeviceIdx()); + std::vector& gpair_h = in_gpair->HostVector(); + auto nsize = static_cast(tmp.Size()); for (int gid = 0; gid < ngroup; ++gid) { - std::vector& tmp_h = tmp.data_h(); + std::vector& tmp_h = tmp.HostVector(); #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < nsize; ++i) { tmp_h[i] = gpair_h[i * ngroup + gid]; @@ -209,43 +209,43 @@ class GBTree : public GradientBooster { new_trees.push_back(std::move(ret)); } } - monitor.Stop("BoostNewTrees"); - monitor.Start("CommitModel"); + monitor_.Stop("BoostNewTrees"); + monitor_.Start("CommitModel"); this->CommitModel(std::move(new_trees)); - monitor.Stop("CommitModel"); + monitor_.Stop("CommitModel"); } void PredictBatch(DMatrix* p_fmat, HostDeviceVector* out_preds, unsigned ntree_limit) override { - predictor->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit); + predictor_->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit); } void PredictInstance(const SparseBatch::Inst& inst, std::vector* out_preds, unsigned ntree_limit, unsigned root_index) override { - predictor->PredictInstance(inst, out_preds, model_, + predictor_->PredictInstance(inst, out_preds, model_, ntree_limit, root_index); } void PredictLeaf(DMatrix* p_fmat, std::vector* out_preds, unsigned ntree_limit) override { - predictor->PredictLeaf(p_fmat, out_preds, model_, ntree_limit); + predictor_->PredictLeaf(p_fmat, out_preds, model_, ntree_limit); } void PredictContribution(DMatrix* p_fmat, std::vector* out_contribs, unsigned ntree_limit, bool approximate, int condition, unsigned condition_feature) override { - predictor->PredictContribution(p_fmat, out_contribs, model_, ntree_limit, approximate); + predictor_->PredictContribution(p_fmat, out_contribs, model_, ntree_limit, approximate); } void PredictInteractionContributions(DMatrix* p_fmat, std::vector* out_contribs, unsigned ntree_limit, bool approximate) override { - predictor->PredictInteractionContributions(p_fmat, out_contribs, model_, + predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_, ntree_limit, approximate); } @@ -258,18 +258,18 @@ class GBTree : public GradientBooster { protected: // initialize updater before using them inline void InitUpdater() { - if (updaters.size() != 0) return; - std::string tval = tparam.updater_seq; + if (updaters_.size() != 0) return; + std::string tval = tparam_.updater_seq; std::vector ups = common::Split(tval, ','); for (const std::string& pstr : ups) { std::unique_ptr up(TreeUpdater::Create(pstr.c_str())); - up->Init(this->cfg); - updaters.push_back(std::move(up)); + up->Init(this->cfg_); + updaters_.push_back(std::move(up)); } } // do group specific group - inline void BoostNewTrees(HostDeviceVector* gpair, + inline void BoostNewTrees(HostDeviceVector* gpair, DMatrix *p_fmat, int bst_group, std::vector >* ret) { @@ -277,26 +277,27 @@ class GBTree : public GradientBooster { std::vector new_trees; ret->clear(); // create the trees - for (int i = 0; i < tparam.num_parallel_tree; ++i) { - if (tparam.process_type == kDefault) { + for (int i = 0; i < tparam_.num_parallel_tree; ++i) { + if (tparam_.process_type == kDefault) { // create new tree std::unique_ptr ptr(new RegTree()); - ptr->param.InitAllowUnknown(this->cfg); + ptr->param.InitAllowUnknown(this->cfg_); ptr->InitModel(); new_trees.push_back(ptr.get()); ret->push_back(std::move(ptr)); - } else if (tparam.process_type == kUpdate) { + } else if (tparam_.process_type == kUpdate) { CHECK_LT(model_.trees.size(), model_.trees_to_update.size()); // move an existing tree from trees_to_update auto t = std::move(model_.trees_to_update[model_.trees.size() + - bst_group * tparam.num_parallel_tree + i]); + bst_group * tparam_.num_parallel_tree + i]); new_trees.push_back(t.get()); ret->push_back(std::move(t)); } } // update the trees - for (auto& up : updaters) + for (auto& up : updaters_) { up->Update(gpair, p_fmat, new_trees); +} } // commit new trees all at once @@ -307,22 +308,22 @@ class GBTree : public GradientBooster { num_new_trees += new_trees[gid].size(); model_.CommitModel(std::move(new_trees[gid]), gid); } - predictor->UpdatePredictionCache(model_, &updaters, num_new_trees); + predictor_->UpdatePredictionCache(model_, &updaters_, num_new_trees); } // --- data structure --- GBTreeModel model_; // training parameter - GBTreeTrainParam tparam; + GBTreeTrainParam tparam_; // ----training fields---- // configurations for tree - std::vector > cfg; + std::vector > cfg_; // the updaters that can be applied to each of tree - std::vector> updaters; + std::vector> updaters_; // Cached matrices std::vector> cache_; - std::unique_ptr predictor; - common::Monitor monitor; + std::unique_ptr predictor_; + common::Monitor monitor_; }; // dart @@ -333,22 +334,22 @@ class Dart : public GBTree { void Configure(const std::vector >& cfg) override { GBTree::Configure(cfg); if (model_.trees.size() == 0) { - dparam.InitAllowUnknown(cfg); + dparam_.InitAllowUnknown(cfg); } } void Load(dmlc::Stream* fi) override { GBTree::Load(fi); - weight_drop.resize(model_.param.num_trees); + weight_drop_.resize(model_.param.num_trees); if (model_.param.num_trees != 0) { - fi->Read(&weight_drop); + fi->Read(&weight_drop_); } } void Save(dmlc::Stream* fo) const override { GBTree::Save(fo); - if (weight_drop.size() != 0) { - fo->Write(weight_drop); + if (weight_drop_.size() != 0) { + fo->Write(weight_drop_); } } @@ -357,7 +358,7 @@ class Dart : public GBTree { HostDeviceVector* out_preds, unsigned ntree_limit) override { DropTrees(ntree_limit); - PredLoopInternal(p_fmat, &out_preds->data_h(), 0, ntree_limit, true); + PredLoopInternal(p_fmat, &out_preds->HostVector(), 0, ntree_limit, true); } void PredictInstance(const SparseBatch::Inst& inst, @@ -365,9 +366,9 @@ class Dart : public GBTree { unsigned ntree_limit, unsigned root_index) override { DropTrees(1); - if (thread_temp.size() == 0) { - thread_temp.resize(1, RegTree::FVec()); - thread_temp[0].Init(model_.param.num_feature); + if (thread_temp_.size() == 0) { + thread_temp_.resize(1, RegTree::FVec()); + thread_temp_[0].Init(model_.param.num_feature); } out_preds->resize(model_.param.num_output_group); ntree_limit *= model_.param.num_output_group; @@ -378,7 +379,7 @@ class Dart : public GBTree { for (int gid = 0; gid < model_.param.num_output_group; ++gid) { (*out_preds)[gid] = PredValue(inst, gid, root_index, - &thread_temp[0], 0, ntree_limit) + model_.base_margin; + &thread_temp_[0], 0, ntree_limit) + model_.base_margin; } } @@ -400,8 +401,8 @@ class Dart : public GBTree { } if (init_out_preds) { - size_t n = num_group * p_fmat->info().num_row; - const std::vector& base_margin = p_fmat->info().base_margin; + size_t n = num_group * p_fmat->Info().num_row_; + const std::vector& base_margin = p_fmat->Info().base_margin_; out_preds->resize(n); if (base_margin.size() != 0) { CHECK_EQ(out_preds->size(), n); @@ -427,37 +428,37 @@ class Dart : public GBTree { int num_group, unsigned tree_begin, unsigned tree_end) { - const MetaInfo& info = p_fmat->info(); + const MetaInfo& info = p_fmat->Info(); const int nthread = omp_get_max_threads(); CHECK_EQ(num_group, model_.param.num_output_group); InitThreadTemp(nthread); std::vector& preds = *out_preds; CHECK_EQ(model_.param.size_leaf_vector, 0) << "size_leaf_vector is enforced to 0 so far"; - CHECK_EQ(preds.size(), p_fmat->info().num_row * num_group); + CHECK_EQ(preds.size(), p_fmat->Info().num_row_ * num_group); // start collecting the prediction dmlc::DataIter* iter = p_fmat->RowIterator(); - Derived* self = static_cast(this); + auto* self = static_cast(this); iter->BeforeFirst(); while (iter->Next()) { const RowBatch &batch = iter->Value(); // parallel over local batch - const int K = 8; - const bst_omp_uint nsize = static_cast(batch.size); - const bst_omp_uint rest = nsize % K; + constexpr int kUnroll = 8; + const auto nsize = static_cast(batch.size); + const bst_omp_uint rest = nsize % kUnroll; #pragma omp parallel for schedule(static) - for (bst_omp_uint i = 0; i < nsize - rest; i += K) { + for (bst_omp_uint i = 0; i < nsize - rest; i += kUnroll) { const int tid = omp_get_thread_num(); - RegTree::FVec& feats = thread_temp[tid]; - int64_t ridx[K]; - RowBatch::Inst inst[K]; - for (int k = 0; k < K; ++k) { + RegTree::FVec& feats = thread_temp_[tid]; + int64_t ridx[kUnroll]; + RowBatch::Inst inst[kUnroll]; + for (int k = 0; k < kUnroll; ++k) { ridx[k] = static_cast(batch.base_rowid + i + k); } - for (int k = 0; k < K; ++k) { + for (int k = 0; k < kUnroll; ++k) { inst[k] = batch[i + k]; } - for (int k = 0; k < K; ++k) { + for (int k = 0; k < kUnroll; ++k) { for (int gid = 0; gid < num_group; ++gid) { const size_t offset = ridx[k] * num_group + gid; preds[offset] += @@ -467,8 +468,8 @@ class Dart : public GBTree { } } for (bst_omp_uint i = nsize - rest; i < nsize; ++i) { - RegTree::FVec& feats = thread_temp[0]; - const int64_t ridx = static_cast(batch.base_rowid + i); + RegTree::FVec& feats = thread_temp_[0]; + const auto ridx = static_cast(batch.base_rowid + i); const RowBatch::Inst inst = batch[i]; for (int gid = 0; gid < num_group; ++gid) { const size_t offset = ridx * num_group + gid; @@ -489,9 +490,9 @@ class Dart : public GBTree { model_.CommitModel(std::move(new_trees[gid]), gid); } size_t num_drop = NormalizeTrees(num_new_trees); - if (dparam.silent != 1) { + if (dparam_.silent != 1) { LOG(INFO) << "drop " << num_drop << " trees, " - << "weight = " << weight_drop.back(); + << "weight = " << weight_drop_.back(); } } @@ -506,10 +507,10 @@ class Dart : public GBTree { p_feats->Fill(inst); for (size_t i = tree_begin; i < tree_end; ++i) { if (model_.tree_info[i] == bst_group) { - bool drop = (std::binary_search(idx_drop.begin(), idx_drop.end(), i)); + bool drop = (std::binary_search(idx_drop_.begin(), idx_drop_.end(), i)); if (!drop) { int tid = model_.trees[i]->GetLeafIndex(*p_feats, root_index); - psum += weight_drop[i] * (*model_.trees[i])[tid].leaf_value(); + psum += weight_drop_[i] * (*model_.trees[i])[tid].LeafValue(); } } } @@ -519,45 +520,45 @@ class Dart : public GBTree { // select which trees to drop inline void DropTrees(unsigned ntree_limit_drop) { - idx_drop.clear(); + idx_drop_.clear(); if (ntree_limit_drop > 0) return; std::uniform_real_distribution<> runif(0.0, 1.0); auto& rnd = common::GlobalRandom(); bool skip = false; - if (dparam.skip_drop > 0.0) skip = (runif(rnd) < dparam.skip_drop); + if (dparam_.skip_drop > 0.0) skip = (runif(rnd) < dparam_.skip_drop); // sample some trees to drop if (!skip) { - if (dparam.sample_type == 1) { + if (dparam_.sample_type == 1) { bst_float sum_weight = 0.0; - for (size_t i = 0; i < weight_drop.size(); ++i) { - sum_weight += weight_drop[i]; + for (auto elem : weight_drop_) { + sum_weight += elem; } - for (size_t i = 0; i < weight_drop.size(); ++i) { - if (runif(rnd) < dparam.rate_drop * weight_drop.size() * weight_drop[i] / sum_weight) { - idx_drop.push_back(i); + for (size_t i = 0; i < weight_drop_.size(); ++i) { + if (runif(rnd) < dparam_.rate_drop * weight_drop_.size() * weight_drop_[i] / sum_weight) { + idx_drop_.push_back(i); } } - if (dparam.one_drop && idx_drop.empty() && !weight_drop.empty()) { + if (dparam_.one_drop && idx_drop_.empty() && !weight_drop_.empty()) { // the expression below is an ugly but MSVC2013-friendly equivalent of // size_t i = std::discrete_distribution(weight_drop.begin(), // weight_drop.end())(rnd); size_t i = std::discrete_distribution( - weight_drop.size(), 0., static_cast(weight_drop.size()), + weight_drop_.size(), 0., static_cast(weight_drop_.size()), [this](double x) -> double { - return weight_drop[static_cast(x)]; + return weight_drop_[static_cast(x)]; })(rnd); - idx_drop.push_back(i); + idx_drop_.push_back(i); } } else { - for (size_t i = 0; i < weight_drop.size(); ++i) { - if (runif(rnd) < dparam.rate_drop) { - idx_drop.push_back(i); + for (size_t i = 0; i < weight_drop_.size(); ++i) { + if (runif(rnd) < dparam_.rate_drop) { + idx_drop_.push_back(i); } } - if (dparam.one_drop && idx_drop.empty() && !weight_drop.empty()) { - size_t i = std::uniform_int_distribution(0, weight_drop.size() - 1)(rnd); - idx_drop.push_back(i); + if (dparam_.one_drop && idx_drop_.empty() && !weight_drop_.empty()) { + size_t i = std::uniform_int_distribution(0, weight_drop_.size() - 1)(rnd); + idx_drop_.push_back(i); } } } @@ -565,58 +566,58 @@ class Dart : public GBTree { // set normalization factors inline size_t NormalizeTrees(size_t size_new_trees) { - float lr = 1.0 * dparam.learning_rate / size_new_trees; - size_t num_drop = idx_drop.size(); + float lr = 1.0 * dparam_.learning_rate / size_new_trees; + size_t num_drop = idx_drop_.size(); if (num_drop == 0) { for (size_t i = 0; i < size_new_trees; ++i) { - weight_drop.push_back(1.0); + weight_drop_.push_back(1.0); } } else { - if (dparam.normalize_type == 1) { + if (dparam_.normalize_type == 1) { // normalize_type 1 float factor = 1.0 / (1.0 + lr); - for (size_t i = 0; i < idx_drop.size(); ++i) { - weight_drop[idx_drop[i]] *= factor; + for (auto i : idx_drop_) { + weight_drop_[i] *= factor; } for (size_t i = 0; i < size_new_trees; ++i) { - weight_drop.push_back(factor); + weight_drop_.push_back(factor); } } else { // normalize_type 0 float factor = 1.0 * num_drop / (num_drop + lr); - for (size_t i = 0; i < idx_drop.size(); ++i) { - weight_drop[idx_drop[i]] *= factor; + for (auto i : idx_drop_) { + weight_drop_[i] *= factor; } for (size_t i = 0; i < size_new_trees; ++i) { - weight_drop.push_back(1.0 / (num_drop + lr)); + weight_drop_.push_back(1.0 / (num_drop + lr)); } } } // reset - idx_drop.clear(); + idx_drop_.clear(); return num_drop; } // init thread buffers inline void InitThreadTemp(int nthread) { - int prev_thread_temp_size = thread_temp.size(); + int prev_thread_temp_size = thread_temp_.size(); if (prev_thread_temp_size < nthread) { - thread_temp.resize(nthread, RegTree::FVec()); + thread_temp_.resize(nthread, RegTree::FVec()); for (int i = prev_thread_temp_size; i < nthread; ++i) { - thread_temp[i].Init(model_.param.num_feature); + thread_temp_[i].Init(model_.param.num_feature); } } } // --- data structure --- // training parameter - DartTrainParam dparam; + DartTrainParam dparam_; /*! \brief prediction buffer */ - std::vector weight_drop; + std::vector weight_drop_; // indexes of dropped trees - std::vector idx_drop; + std::vector idx_drop_; // temporal storage for per thread - std::vector thread_temp; + std::vector thread_temp_; }; // register the objective functions @@ -627,7 +628,7 @@ DMLC_REGISTER_PARAMETER(DartTrainParam); XGBOOST_REGISTER_GBM(GBTree, "gbtree") .describe("Tree booster, gradient boosted trees.") .set_body([](const std::vector >& cached_mats, bst_float base_margin) { - GBTree* p = new GBTree(base_margin); + auto* p = new GBTree(base_margin); p->InitCache(cached_mats); return p; }); diff --git a/src/gbm/gbtree_model.h b/src/gbm/gbtree_model.h index 8beb48e0f..f89a0afed 100644 --- a/src/gbm/gbtree_model.h +++ b/src/gbm/gbtree_model.h @@ -70,8 +70,8 @@ struct GBTreeModel { void InitTreesToUpdate() { if (trees_to_update.size() == 0u) { - for (size_t i = 0; i < trees.size(); ++i) { - trees_to_update.push_back(std::move(trees[i])); + for (auto & tree : trees) { + trees_to_update.push_back(std::move(tree)); } trees.clear(); param.num_trees = 0; @@ -100,8 +100,8 @@ struct GBTreeModel { void Save(dmlc::Stream* fo) const { CHECK_EQ(param.num_trees, static_cast(trees.size())); fo->Write(¶m, sizeof(param)); - for (size_t i = 0; i < trees.size(); ++i) { - trees[i]->Save(fo); + for (const auto & tree : trees) { + tree->Save(fo); } if (tree_info.size() != 0) { fo->Write(dmlc::BeginPtr(tree_info), sizeof(int) * tree_info.size()); @@ -111,15 +111,15 @@ struct GBTreeModel { std::vector DumpModel(const FeatureMap& fmap, bool with_stats, std::string format) const { std::vector dump; - for (size_t i = 0; i < trees.size(); i++) { - dump.push_back(trees[i]->DumpModel(fmap, with_stats, format)); + for (const auto & tree : trees) { + dump.push_back(tree->DumpModel(fmap, with_stats, format)); } return dump; } void CommitModel(std::vector >&& new_trees, int bst_group) { - for (size_t i = 0; i < new_trees.size(); ++i) { - trees.push_back(std::move(new_trees[i])); + for (auto & new_tree : new_trees) { + trees.push_back(std::move(new_tree)); tree_info.push_back(bst_group); } param.num_trees += static_cast(new_trees.size()); diff --git a/src/learner.cc b/src/learner.cc index 883c7a8e5..8177d2bfc 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -141,8 +141,8 @@ DMLC_REGISTER_PARAMETER(LearnerTrainParam); */ class LearnerImpl : public Learner { public: - explicit LearnerImpl(const std::vector >& cache) - : cache_(cache) { + explicit LearnerImpl(std::vector > cache) + : cache_(std::move(cache)) { // boosted tree name_obj_ = "reg:linear"; name_gbm_ = "gbtree"; @@ -155,25 +155,25 @@ class LearnerImpl : public Learner { } void ConfigureUpdaters() { - if (tparam.tree_method == 0 || tparam.tree_method == 1 || - tparam.tree_method == 2) { + if (tparam_.tree_method == 0 || tparam_.tree_method == 1 || + tparam_.tree_method == 2) { if (cfg_.count("updater") == 0) { - if (tparam.dsplit == 1) { + if (tparam_.dsplit == 1) { cfg_["updater"] = "distcol"; - } else if (tparam.dsplit == 2) { + } else if (tparam_.dsplit == 2) { cfg_["updater"] = "grow_histmaker,prune"; } - if (tparam.prob_buffer_row != 1.0f) { + if (tparam_.prob_buffer_row != 1.0f) { cfg_["updater"] = "grow_histmaker,refresh,prune"; } } - } else if (tparam.tree_method == 3) { + } else if (tparam_.tree_method == 3) { /* histogram-based algorithm */ LOG(CONSOLE) << "Tree method is selected to be \'hist\', which uses a " "single updater " << "grow_fast_histmaker."; cfg_["updater"] = "grow_fast_histmaker"; - } else if (tparam.tree_method == 4) { + } else if (tparam_.tree_method == 4) { this->AssertGPUSupport(); if (cfg_.count("updater") == 0) { cfg_["updater"] = "grow_gpu,prune"; @@ -181,7 +181,7 @@ class LearnerImpl : public Learner { if (cfg_.count("predictor") == 0) { cfg_["predictor"] = "gpu_predictor"; } - } else if (tparam.tree_method == 5) { + } else if (tparam_.tree_method == 5) { this->AssertGPUSupport(); if (cfg_.count("updater") == 0) { cfg_["updater"] = "grow_gpu_hist"; @@ -195,8 +195,8 @@ class LearnerImpl : public Learner { void Configure( const std::vector >& args) override { // add to configurations - tparam.InitAllowUnknown(args); - monitor.Init("Learner", tparam.debug_verbose); + tparam_.InitAllowUnknown(args); + monitor_.Init("Learner", tparam_.debug_verbose); cfg_.clear(); for (const auto& kv : args) { if (kv.first == "eval_metric") { @@ -206,20 +206,20 @@ class LearnerImpl : public Learner { }; if (std::all_of(metrics_.begin(), metrics_.end(), dup_check)) { metrics_.emplace_back(Metric::Create(kv.second)); - mparam.contain_eval_metrics = 1; + mparam_.contain_eval_metrics = 1; } } else { cfg_[kv.first] = kv.second; } } - if (tparam.nthread != 0) { - omp_set_num_threads(tparam.nthread); + if (tparam_.nthread != 0) { + omp_set_num_threads(tparam_.nthread); } // add additional parameters // These are cosntraints that need to be satisfied. - if (tparam.dsplit == 0 && rabit::IsDistributed()) { - tparam.dsplit = 2; + if (tparam_.dsplit == 0 && rabit::IsDistributed()) { + tparam_.dsplit = 2; } if (cfg_.count("num_class") != 0) { @@ -244,21 +244,21 @@ class LearnerImpl : public Learner { } if (!this->ModelInitialized()) { - mparam.InitAllowUnknown(args); + mparam_.InitAllowUnknown(args); name_obj_ = cfg_["objective"]; name_gbm_ = cfg_["booster"]; // set seed only before the model is initialized - common::GlobalRandom().seed(tparam.seed); + common::GlobalRandom().seed(tparam_.seed); } // set number of features correctly. - cfg_["num_feature"] = common::ToString(mparam.num_feature); - cfg_["num_class"] = common::ToString(mparam.num_class); + cfg_["num_feature"] = common::ToString(mparam_.num_feature); + cfg_["num_class"] = common::ToString(mparam_.num_class); - if (gbm_.get() != nullptr) { + if (gbm_ != nullptr) { gbm_->Configure(cfg_.begin(), cfg_.end()); } - if (obj_.get() != nullptr) { + if (obj_ != nullptr) { obj_->Configure(cfg_.begin(), cfg_.end()); } } @@ -281,7 +281,7 @@ class LearnerImpl : public Learner { // use the peekable reader. fi = &fp; // read parameter - CHECK_EQ(fi->Read(&mparam, sizeof(mparam)), sizeof(mparam)) + CHECK_EQ(fi->Read(&mparam_, sizeof(mparam_)), sizeof(mparam_)) << "BoostLearner: wrong model format"; { // backward compatibility code for compatible with old model type @@ -303,9 +303,9 @@ class LearnerImpl : public Learner { CHECK(fi->Read(&name_gbm_)) << "BoostLearner: wrong model format"; // duplicated code with LazyInitModel obj_.reset(ObjFunction::Create(name_obj_)); - gbm_.reset(GradientBooster::Create(name_gbm_, cache_, mparam.base_score)); + gbm_.reset(GradientBooster::Create(name_gbm_, cache_, mparam_.base_score)); gbm_->Load(fi); - if (mparam.contain_extra_attrs != 0) { + if (mparam_.contain_extra_attrs != 0) { std::vector > attr; fi->Read(&attr); attributes_ = @@ -316,35 +316,35 @@ class LearnerImpl : public Learner { fi->Read(&max_delta_step); cfg_["max_delta_step"] = max_delta_step; } - if (mparam.contain_eval_metrics != 0) { + if (mparam_.contain_eval_metrics != 0) { std::vector metr; fi->Read(&metr); for (auto name : metr) { metrics_.emplace_back(Metric::Create(name)); } } - cfg_["num_class"] = common::ToString(mparam.num_class); - cfg_["num_feature"] = common::ToString(mparam.num_feature); + cfg_["num_class"] = common::ToString(mparam_.num_class); + cfg_["num_feature"] = common::ToString(mparam_.num_feature); obj_->Configure(cfg_.begin(), cfg_.end()); } // rabit save model to rabit checkpoint void Save(dmlc::Stream* fo) const override { - fo->Write(&mparam, sizeof(LearnerModelParam)); + fo->Write(&mparam_, sizeof(LearnerModelParam)); fo->Write(name_obj_); fo->Write(name_gbm_); gbm_->Save(fo); - if (mparam.contain_extra_attrs != 0) { + if (mparam_.contain_extra_attrs != 0) { std::vector > attr( attributes_.begin(), attributes_.end()); fo->Write(attr); } if (name_obj_ == "count:poisson") { - std::map::const_iterator it = + auto it = cfg_.find("max_delta_step"); if (it != cfg_.end()) fo->Write(it->second); } - if (mparam.contain_eval_metrics != 0) { + if (mparam_.contain_eval_metrics != 0) { std::vector metr; for (auto& ev : metrics_) { metr.emplace_back(ev->Name()); @@ -354,37 +354,37 @@ class LearnerImpl : public Learner { } void UpdateOneIter(int iter, DMatrix* train) override { - monitor.Start("UpdateOneIter"); + monitor_.Start("UpdateOneIter"); CHECK(ModelInitialized()) << "Always call InitModel or LoadModel before update"; - if (tparam.seed_per_iteration || rabit::IsDistributed()) { - common::GlobalRandom().seed(tparam.seed * kRandSeedMagic + iter); + if (tparam_.seed_per_iteration || rabit::IsDistributed()) { + common::GlobalRandom().seed(tparam_.seed * kRandSeedMagic + iter); } this->LazyInitDMatrix(train); - monitor.Start("PredictRaw"); + monitor_.Start("PredictRaw"); this->PredictRaw(train, &preds_); - monitor.Stop("PredictRaw"); - monitor.Start("GetGradient"); - obj_->GetGradient(&preds_, train->info(), iter, &gpair_); - monitor.Stop("GetGradient"); + monitor_.Stop("PredictRaw"); + monitor_.Start("GetGradient"); + obj_->GetGradient(&preds_, train->Info(), iter, &gpair_); + monitor_.Stop("GetGradient"); gbm_->DoBoost(train, &gpair_, obj_.get()); - monitor.Stop("UpdateOneIter"); + monitor_.Stop("UpdateOneIter"); } void BoostOneIter(int iter, DMatrix* train, - HostDeviceVector* in_gpair) override { - monitor.Start("BoostOneIter"); - if (tparam.seed_per_iteration || rabit::IsDistributed()) { - common::GlobalRandom().seed(tparam.seed * kRandSeedMagic + iter); + HostDeviceVector* in_gpair) override { + monitor_.Start("BoostOneIter"); + if (tparam_.seed_per_iteration || rabit::IsDistributed()) { + common::GlobalRandom().seed(tparam_.seed * kRandSeedMagic + iter); } this->LazyInitDMatrix(train); gbm_->DoBoost(train, in_gpair); - monitor.Stop("BoostOneIter"); + monitor_.Stop("BoostOneIter"); } std::string EvalOneIter(int iter, const std::vector& data_sets, const std::vector& data_names) override { - monitor.Start("EvalOneIter"); + monitor_.Start("EvalOneIter"); std::ostringstream os; os << '[' << iter << ']' << std::setiosflags(std::ios::fixed); if (metrics_.size() == 0) { @@ -395,17 +395,17 @@ class LearnerImpl : public Learner { obj_->EvalTransform(&preds_); for (auto& ev : metrics_) { os << '\t' << data_names[i] << '-' << ev->Name() << ':' - << ev->Eval(preds_.data_h(), data_sets[i]->info(), tparam.dsplit == 2); + << ev->Eval(preds_.HostVector(), data_sets[i]->Info(), tparam_.dsplit == 2); } } - monitor.Stop("EvalOneIter"); + monitor_.Stop("EvalOneIter"); return os.str(); } void SetAttr(const std::string& key, const std::string& value) override { attributes_[key] = value; - mparam.contain_extra_attrs = 1; + mparam_.contain_extra_attrs = 1; } bool GetAttr(const std::string& key, std::string* out) const override { @@ -438,7 +438,7 @@ class LearnerImpl : public Learner { this->PredictRaw(data, &preds_); obj_->EvalTransform(&preds_); return std::make_pair(metric, - ev->Eval(preds_.data_h(), data->info(), tparam.dsplit == 2)); + ev->Eval(preds_.HostVector(), data->Info(), tparam_.dsplit == 2)); } void Predict(DMatrix* data, bool output_margin, @@ -446,12 +446,12 @@ class LearnerImpl : public Learner { bool pred_leaf, bool pred_contribs, bool approx_contribs, bool pred_interactions) const override { if (pred_contribs) { - gbm_->PredictContribution(data, &out_preds->data_h(), ntree_limit, approx_contribs); + gbm_->PredictContribution(data, &out_preds->HostVector(), ntree_limit, approx_contribs); } else if (pred_interactions) { - gbm_->PredictInteractionContributions(data, &out_preds->data_h(), ntree_limit, + gbm_->PredictInteractionContributions(data, &out_preds->HostVector(), ntree_limit, approx_contribs); } else if (pred_leaf) { - gbm_->PredictLeaf(data, &out_preds->data_h(), ntree_limit); + gbm_->PredictLeaf(data, &out_preds->HostVector(), ntree_limit); } else { this->PredictRaw(data, out_preds, ntree_limit); if (!output_margin) { @@ -464,21 +464,21 @@ class LearnerImpl : public Learner { // check if p_train is ready to used by training. // if not, initialize the column access. inline void LazyInitDMatrix(DMatrix* p_train) { - if (tparam.tree_method == 3 || tparam.tree_method == 4 || - tparam.tree_method == 5 || name_gbm_ == "gblinear") { + if (tparam_.tree_method == 3 || tparam_.tree_method == 4 || + tparam_.tree_method == 5 || name_gbm_ == "gblinear") { return; } - monitor.Start("LazyInitDMatrix"); + monitor_.Start("LazyInitDMatrix"); if (!p_train->HaveColAccess(true)) { - int ncol = static_cast(p_train->info().num_col); + auto ncol = static_cast(p_train->Info().num_col_); std::vector enabled(ncol, true); // set max row per batch to limited value // in distributed mode, use safe choice otherwise - size_t max_row_perbatch = tparam.max_row_perbatch; - const size_t safe_max_row = static_cast(32ul << 10ul); + size_t max_row_perbatch = tparam_.max_row_perbatch; + const auto safe_max_row = static_cast(32ul << 10ul); - if (tparam.tree_method == 0 && p_train->info().num_row >= (4UL << 20UL)) { + if (tparam_.tree_method == 0 && p_train->Info().num_row_ >= (4UL << 20UL)) { LOG(CONSOLE) << "Tree method is automatically selected to be \'approx\'" << " for faster speed." @@ -487,57 +487,57 @@ class LearnerImpl : public Learner { max_row_perbatch = std::min(max_row_perbatch, safe_max_row); } - if (tparam.tree_method == 1) { + if (tparam_.tree_method == 1) { LOG(CONSOLE) << "Tree method is selected to be \'approx\'"; max_row_perbatch = std::min(max_row_perbatch, safe_max_row); } - if (tparam.test_flag == "block" || tparam.dsplit == 2) { + if (tparam_.test_flag == "block" || tparam_.dsplit == 2) { max_row_perbatch = std::min(max_row_perbatch, safe_max_row); } // initialize column access - p_train->InitColAccess(enabled, tparam.prob_buffer_row, max_row_perbatch, true); + p_train->InitColAccess(enabled, tparam_.prob_buffer_row, max_row_perbatch, true); } if (!p_train->SingleColBlock() && cfg_.count("updater") == 0) { - if (tparam.tree_method == 2) { + if (tparam_.tree_method == 2) { LOG(CONSOLE) << "tree method is set to be 'exact'," << " but currently we are only able to proceed with " "approximate algorithm"; } cfg_["updater"] = "grow_histmaker,prune"; - if (gbm_.get() != nullptr) { + if (gbm_ != nullptr) { gbm_->Configure(cfg_.begin(), cfg_.end()); } } - monitor.Stop("LazyInitDMatrix"); + monitor_.Stop("LazyInitDMatrix"); } // return whether model is already initialized. - inline bool ModelInitialized() const { return gbm_.get() != nullptr; } + inline bool ModelInitialized() const { return gbm_ != nullptr; } // lazily initialize the model if it haven't yet been initialized. inline void LazyInitModel() { if (this->ModelInitialized()) return; // estimate feature bound unsigned num_feature = 0; - for (size_t i = 0; i < cache_.size(); ++i) { - CHECK(cache_[i] != nullptr); + for (auto & matrix : cache_) { + CHECK(matrix != nullptr); num_feature = std::max(num_feature, - static_cast(cache_[i]->info().num_col)); + static_cast(matrix->Info().num_col_)); } // run allreduce on num_feature to find the maximum value rabit::Allreduce(&num_feature, 1); - if (num_feature > mparam.num_feature) { - mparam.num_feature = num_feature; + if (num_feature > mparam_.num_feature) { + mparam_.num_feature = num_feature; } // setup - cfg_["num_feature"] = common::ToString(mparam.num_feature); - CHECK(obj_.get() == nullptr && gbm_.get() == nullptr); + cfg_["num_feature"] = common::ToString(mparam_.num_feature); + CHECK(obj_ == nullptr && gbm_ == nullptr); obj_.reset(ObjFunction::Create(name_obj_)); obj_->Configure(cfg_.begin(), cfg_.end()); // reset the base score - mparam.base_score = obj_->ProbToMargin(mparam.base_score); - gbm_.reset(GradientBooster::Create(name_gbm_, cache_, mparam.base_score)); + mparam_.base_score = obj_->ProbToMargin(mparam_.base_score); + gbm_.reset(GradientBooster::Create(name_gbm_, cache_, mparam_.base_score)); gbm_->Configure(cfg_.begin(), cfg_.end()); } /*! @@ -549,15 +549,15 @@ class LearnerImpl : public Learner { */ inline void PredictRaw(DMatrix* data, HostDeviceVector* out_preds, unsigned ntree_limit = 0) const { - CHECK(gbm_.get() != nullptr) + CHECK(gbm_ != nullptr) << "Predict must happen after Load or InitModel"; gbm_->PredictBatch(data, out_preds, ntree_limit); } // model parameter - LearnerModelParam mparam; + LearnerModelParam mparam_; // training parameter - LearnerTrainParam tparam; + LearnerTrainParam tparam_; // configurations std::map cfg_; // attributes @@ -569,7 +569,7 @@ class LearnerImpl : public Learner { // temporal storages for prediction HostDeviceVector preds_; // gradient pairs - HostDeviceVector gpair_; + HostDeviceVector gpair_; private: /*! \brief random number transformation seed. */ @@ -577,7 +577,7 @@ class LearnerImpl : public Learner { // internal cached dmatrix std::vector > cache_; - common::Monitor monitor; + common::Monitor monitor_; }; Learner* Learner::Create( diff --git a/src/linear/coordinate_common.h b/src/linear/coordinate_common.h index 141bb68a1..841ebf80d 100644 --- a/src/linear/coordinate_common.h +++ b/src/linear/coordinate_common.h @@ -62,14 +62,14 @@ inline double CoordinateDeltaBias(double sum_grad, double sum_hess) { * \return The gradient and diagonal Hessian entry for a given feature. */ inline std::pair GetGradient(int group_idx, int num_group, int fidx, - const std::vector &gpair, + const std::vector &gpair, DMatrix *p_fmat) { double sum_grad = 0.0, sum_hess = 0.0; dmlc::DataIter *iter = p_fmat->ColIterator({static_cast(fidx)}); while (iter->Next()) { const ColBatch &batch = iter->Value(); ColBatch::Inst col = batch[0]; - const bst_omp_uint ndata = static_cast(col.length); + const auto ndata = static_cast(col.length); for (bst_omp_uint j = 0; j < ndata; ++j) { const bst_float v = col[j].fvalue; auto &p = gpair[col[j].index * num_group + group_idx]; @@ -93,14 +93,14 @@ inline std::pair GetGradient(int group_idx, int num_group, int f * \return The gradient and diagonal Hessian entry for a given feature. */ inline std::pair GetGradientParallel(int group_idx, int num_group, int fidx, - const std::vector &gpair, + const std::vector &gpair, DMatrix *p_fmat) { double sum_grad = 0.0, sum_hess = 0.0; dmlc::DataIter *iter = p_fmat->ColIterator({static_cast(fidx)}); while (iter->Next()) { const ColBatch &batch = iter->Value(); ColBatch::Inst col = batch[0]; - const bst_omp_uint ndata = static_cast(col.length); + const auto ndata = static_cast(col.length); #pragma omp parallel for schedule(static) reduction(+ : sum_grad, sum_hess) for (bst_omp_uint j = 0; j < ndata; ++j) { const bst_float v = col[j].fvalue; @@ -124,11 +124,11 @@ inline std::pair GetGradientParallel(int group_idx, int num_grou * \return The gradient and diagonal Hessian entry for the bias. */ inline std::pair GetBiasGradientParallel(int group_idx, int num_group, - const std::vector &gpair, + const std::vector &gpair, DMatrix *p_fmat) { - const RowSet &rowset = p_fmat->buffered_rowset(); + const RowSet &rowset = p_fmat->BufferedRowset(); double sum_grad = 0.0, sum_hess = 0.0; - const bst_omp_uint ndata = static_cast(rowset.size()); + const auto ndata = static_cast(rowset.Size()); #pragma omp parallel for schedule(static) reduction(+ : sum_grad, sum_hess) for (bst_omp_uint i = 0; i < ndata; ++i) { auto &p = gpair[rowset[i] * num_group + group_idx]; @@ -151,7 +151,7 @@ inline std::pair GetBiasGradientParallel(int group_idx, int num_ * \param p_fmat The input feature matrix. */ inline void UpdateResidualParallel(int fidx, int group_idx, int num_group, - float dw, std::vector *in_gpair, + float dw, std::vector *in_gpair, DMatrix *p_fmat) { if (dw == 0.0f) return; dmlc::DataIter *iter = p_fmat->ColIterator({static_cast(fidx)}); @@ -159,12 +159,12 @@ inline void UpdateResidualParallel(int fidx, int group_idx, int num_group, const ColBatch &batch = iter->Value(); ColBatch::Inst col = batch[0]; // update grad value - const bst_omp_uint num_row = static_cast(col.length); + const auto num_row = static_cast(col.length); #pragma omp parallel for schedule(static) for (bst_omp_uint j = 0; j < num_row; ++j) { - bst_gpair &p = (*in_gpair)[col[j].index * num_group + group_idx]; + GradientPair &p = (*in_gpair)[col[j].index * num_group + group_idx]; if (p.GetHess() < 0.0f) continue; - p += bst_gpair(p.GetHess() * col[j].fvalue * dw, 0); + p += GradientPair(p.GetHess() * col[j].fvalue * dw, 0); } } } @@ -179,16 +179,16 @@ inline void UpdateResidualParallel(int fidx, int group_idx, int num_group, * \param p_fmat The input feature matrix. */ inline void UpdateBiasResidualParallel(int group_idx, int num_group, float dbias, - std::vector *in_gpair, + std::vector *in_gpair, DMatrix *p_fmat) { if (dbias == 0.0f) return; - const RowSet &rowset = p_fmat->buffered_rowset(); - const bst_omp_uint ndata = static_cast(p_fmat->info().num_row); + const RowSet &rowset = p_fmat->BufferedRowset(); + const auto ndata = static_cast(p_fmat->Info().num_row_); #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < ndata; ++i) { - bst_gpair &g = (*in_gpair)[rowset[i] * num_group + group_idx]; + GradientPair &g = (*in_gpair)[rowset[i] * num_group + group_idx]; if (g.GetHess() < 0.0f) continue; - g += bst_gpair(g.GetHess() * dbias, 0); + g += GradientPair(g.GetHess() * dbias, 0); } } @@ -201,7 +201,7 @@ class FeatureSelector { /*! \brief factory method */ static FeatureSelector *Create(int choice); /*! \brief virtual destructor */ - virtual ~FeatureSelector() {} + virtual ~FeatureSelector() = default; /** * \brief Setting up the selector state prior to looping through features. * @@ -213,7 +213,7 @@ class FeatureSelector { * \param param A parameter with algorithm-dependent use. */ virtual void Setup(const gbm::GBLinearModel &model, - const std::vector &gpair, + const std::vector &gpair, DMatrix *p_fmat, float alpha, float lambda, int param) {} /** @@ -232,7 +232,7 @@ class FeatureSelector { virtual int NextFeature(int iteration, const gbm::GBLinearModel &model, int group_idx, - const std::vector &gpair, + const std::vector &gpair, DMatrix *p_fmat, float alpha, float lambda) = 0; }; @@ -242,7 +242,7 @@ class FeatureSelector { class CyclicFeatureSelector : public FeatureSelector { public: int NextFeature(int iteration, const gbm::GBLinearModel &model, - int group_idx, const std::vector &gpair, + int group_idx, const std::vector &gpair, DMatrix *p_fmat, float alpha, float lambda) override { return iteration % model.param.num_feature; } @@ -255,23 +255,23 @@ class CyclicFeatureSelector : public FeatureSelector { class ShuffleFeatureSelector : public FeatureSelector { public: void Setup(const gbm::GBLinearModel &model, - const std::vector &gpair, + const std::vector &gpair, DMatrix *p_fmat, float alpha, float lambda, int param) override { - if (feat_index.size() == 0) { - feat_index.resize(model.param.num_feature); - std::iota(feat_index.begin(), feat_index.end(), 0); + if (feat_index_.size() == 0) { + feat_index_.resize(model.param.num_feature); + std::iota(feat_index_.begin(), feat_index_.end(), 0); } - std::shuffle(feat_index.begin(), feat_index.end(), common::GlobalRandom()); + std::shuffle(feat_index_.begin(), feat_index_.end(), common::GlobalRandom()); } int NextFeature(int iteration, const gbm::GBLinearModel &model, - int group_idx, const std::vector &gpair, + int group_idx, const std::vector &gpair, DMatrix *p_fmat, float alpha, float lambda) override { - return feat_index[iteration % model.param.num_feature]; + return feat_index_[iteration % model.param.num_feature]; } protected: - std::vector feat_index; + std::vector feat_index_; }; /** @@ -281,7 +281,7 @@ class ShuffleFeatureSelector : public FeatureSelector { class RandomFeatureSelector : public FeatureSelector { public: int NextFeature(int iteration, const gbm::GBLinearModel &model, - int group_idx, const std::vector &gpair, + int group_idx, const std::vector &gpair, DMatrix *p_fmat, float alpha, float lambda) override { return common::GlobalRandom()() % model.param.num_feature; } @@ -299,32 +299,32 @@ class RandomFeatureSelector : public FeatureSelector { class GreedyFeatureSelector : public FeatureSelector { public: void Setup(const gbm::GBLinearModel &model, - const std::vector &gpair, + const std::vector &gpair, DMatrix *p_fmat, float alpha, float lambda, int param) override { - top_k = static_cast(param); + top_k_ = static_cast(param); const bst_uint ngroup = model.param.num_output_group; - if (param <= 0) top_k = std::numeric_limits::max(); - if (counter.size() == 0) { - counter.resize(ngroup); - gpair_sums.resize(model.param.num_feature * ngroup); + if (param <= 0) top_k_ = std::numeric_limits::max(); + if (counter_.size() == 0) { + counter_.resize(ngroup); + gpair_sums_.resize(model.param.num_feature * ngroup); } for (bst_uint gid = 0u; gid < ngroup; ++gid) { - counter[gid] = 0u; + counter_[gid] = 0u; } } int NextFeature(int iteration, const gbm::GBLinearModel &model, - int group_idx, const std::vector &gpair, + int group_idx, const std::vector &gpair, DMatrix *p_fmat, float alpha, float lambda) override { // k-th selected feature for a group - auto k = counter[group_idx]++; + auto k = counter_[group_idx]++; // stop after either reaching top-K or going through all the features in a group - if (k >= top_k || counter[group_idx] == model.param.num_feature) return -1; + if (k >= top_k_ || counter_[group_idx] == model.param.num_feature) return -1; const int ngroup = model.param.num_output_group; const bst_omp_uint nfeat = model.param.num_feature; // Calculate univariate gradient sums - std::fill(gpair_sums.begin(), gpair_sums.end(), std::make_pair(0., 0.)); + std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.)); dmlc::DataIter *iter = p_fmat->ColIterator(); while (iter->Next()) { const ColBatch &batch = iter->Value(); @@ -332,7 +332,7 @@ class GreedyFeatureSelector : public FeatureSelector { for (bst_omp_uint i = 0; i < nfeat; ++i) { const ColBatch::Inst col = batch[i]; const bst_uint ndata = col.length; - auto &sums = gpair_sums[group_idx * nfeat + i]; + auto &sums = gpair_sums_[group_idx * nfeat + i]; for (bst_uint j = 0u; j < ndata; ++j) { const bst_float v = col[j].fvalue; auto &p = gpair[col[j].index * ngroup + group_idx]; @@ -346,7 +346,7 @@ class GreedyFeatureSelector : public FeatureSelector { int best_fidx = 0; double best_weight_update = 0.0f; for (bst_omp_uint fidx = 0; fidx < nfeat; ++fidx) { - auto &s = gpair_sums[group_idx * nfeat + fidx]; + auto &s = gpair_sums_[group_idx * nfeat + fidx]; float dw = std::abs(static_cast( CoordinateDelta(s.first, s.second, model[fidx][group_idx], alpha, lambda))); if (dw > best_weight_update) { @@ -358,9 +358,9 @@ class GreedyFeatureSelector : public FeatureSelector { } protected: - bst_uint top_k; - std::vector counter; - std::vector> gpair_sums; + bst_uint top_k_; + std::vector counter_; + std::vector> gpair_sums_; }; /** @@ -377,21 +377,21 @@ class GreedyFeatureSelector : public FeatureSelector { class ThriftyFeatureSelector : public FeatureSelector { public: void Setup(const gbm::GBLinearModel &model, - const std::vector &gpair, + const std::vector &gpair, DMatrix *p_fmat, float alpha, float lambda, int param) override { - top_k = static_cast(param); - if (param <= 0) top_k = std::numeric_limits::max(); + top_k_ = static_cast(param); + if (param <= 0) top_k_ = std::numeric_limits::max(); const bst_uint ngroup = model.param.num_output_group; const bst_omp_uint nfeat = model.param.num_feature; - if (deltaw.size() == 0) { - deltaw.resize(nfeat * ngroup); - sorted_idx.resize(nfeat * ngroup); - counter.resize(ngroup); - gpair_sums.resize(nfeat * ngroup); + if (deltaw_.size() == 0) { + deltaw_.resize(nfeat * ngroup); + sorted_idx_.resize(nfeat * ngroup); + counter_.resize(ngroup); + gpair_sums_.resize(nfeat * ngroup); } // Calculate univariate gradient sums - std::fill(gpair_sums.begin(), gpair_sums.end(), std::make_pair(0., 0.)); + std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.)); dmlc::DataIter *iter = p_fmat->ColIterator(); while (iter->Next()) { const ColBatch &batch = iter->Value(); @@ -401,7 +401,7 @@ class ThriftyFeatureSelector : public FeatureSelector { const ColBatch::Inst col = batch[i]; const bst_uint ndata = col.length; for (bst_uint gid = 0u; gid < ngroup; ++gid) { - auto &sums = gpair_sums[gid * nfeat + i]; + auto &sums = gpair_sums_[gid * nfeat + i]; for (bst_uint j = 0u; j < ndata; ++j) { const bst_float v = col[j].fvalue; auto &p = gpair[col[j].index * ngroup + gid]; @@ -413,45 +413,45 @@ class ThriftyFeatureSelector : public FeatureSelector { } } // rank by descending weight magnitude within the groups - std::fill(deltaw.begin(), deltaw.end(), 0.f); - std::iota(sorted_idx.begin(), sorted_idx.end(), 0); - bst_float *pdeltaw = &deltaw[0]; + std::fill(deltaw_.begin(), deltaw_.end(), 0.f); + std::iota(sorted_idx_.begin(), sorted_idx_.end(), 0); + bst_float *pdeltaw = &deltaw_[0]; for (bst_uint gid = 0u; gid < ngroup; ++gid) { // Calculate univariate weight changes for (bst_omp_uint i = 0; i < nfeat; ++i) { auto ii = gid * nfeat + i; - auto &s = gpair_sums[ii]; - deltaw[ii] = static_cast(CoordinateDelta( + auto &s = gpair_sums_[ii]; + deltaw_[ii] = static_cast(CoordinateDelta( s.first, s.second, model[i][gid], alpha, lambda)); } // sort in descending order of deltaw abs values - auto start = sorted_idx.begin() + gid * nfeat; + auto start = sorted_idx_.begin() + gid * nfeat; std::sort(start, start + nfeat, [pdeltaw](size_t i, size_t j) { return std::abs(*(pdeltaw + i)) > std::abs(*(pdeltaw + j)); }); - counter[gid] = 0u; + counter_[gid] = 0u; } } int NextFeature(int iteration, const gbm::GBLinearModel &model, - int group_idx, const std::vector &gpair, + int group_idx, const std::vector &gpair, DMatrix *p_fmat, float alpha, float lambda) override { // k-th selected feature for a group - auto k = counter[group_idx]++; + auto k = counter_[group_idx]++; // stop after either reaching top-N or going through all the features in a group - if (k >= top_k || counter[group_idx] == model.param.num_feature) return -1; + if (k >= top_k_ || counter_[group_idx] == model.param.num_feature) return -1; // note that sorted_idx stores the "long" indices const size_t grp_offset = group_idx * model.param.num_feature; - return static_cast(sorted_idx[grp_offset + k] - grp_offset); + return static_cast(sorted_idx_[grp_offset + k] - grp_offset); } protected: - bst_uint top_k; - std::vector deltaw; - std::vector sorted_idx; - std::vector counter; - std::vector> gpair_sums; + bst_uint top_k_; + std::vector deltaw_; + std::vector sorted_idx_; + std::vector counter_; + std::vector> gpair_sums_; }; /** diff --git a/src/linear/updater_coordinate.cc b/src/linear/updater_coordinate.cc index 4caf37ca0..8de2b6d97 100644 --- a/src/linear/updater_coordinate.cc +++ b/src/linear/updater_coordinate.cc @@ -85,7 +85,7 @@ class CoordinateUpdater : public LinearUpdater { monitor.Init("CoordinateUpdater", param.debug_verbose); } - void Update(std::vector *in_gpair, DMatrix *p_fmat, + void Update(std::vector *in_gpair, DMatrix *p_fmat, gbm::GBLinearModel *model, double sum_instance_weight) override { param.DenormalizePenalties(sum_instance_weight); const int ngroup = model->param.num_output_group; @@ -111,7 +111,7 @@ class CoordinateUpdater : public LinearUpdater { } } - inline void UpdateFeature(int fidx, int group_idx, std::vector *in_gpair, + inline void UpdateFeature(int fidx, int group_idx, std::vector *in_gpair, DMatrix *p_fmat, gbm::GBLinearModel *model) { const int ngroup = model->param.num_output_group; bst_float &w = (*model)[fidx][group_idx]; diff --git a/src/linear/updater_shotgun.cc b/src/linear/updater_shotgun.cc index a15f22bba..4cd52d36e 100644 --- a/src/linear/updater_shotgun.cc +++ b/src/linear/updater_shotgun.cc @@ -58,59 +58,60 @@ class ShotgunUpdater : public LinearUpdater { public: // set training parameter void Init(const std::vector > &args) override { - param.InitAllowUnknown(args); - selector.reset(FeatureSelector::Create(param.feature_selector)); + param_.InitAllowUnknown(args); + selector_.reset(FeatureSelector::Create(param_.feature_selector)); } - void Update(std::vector *in_gpair, DMatrix *p_fmat, + void Update(std::vector *in_gpair, DMatrix *p_fmat, gbm::GBLinearModel *model, double sum_instance_weight) override { - param.DenormalizePenalties(sum_instance_weight); - std::vector &gpair = *in_gpair; + param_.DenormalizePenalties(sum_instance_weight); + std::vector &gpair = *in_gpair; const int ngroup = model->param.num_output_group; // update bias for (int gid = 0; gid < ngroup; ++gid) { auto grad = GetBiasGradientParallel(gid, ngroup, *in_gpair, p_fmat); - auto dbias = static_cast(param.learning_rate * + auto dbias = static_cast(param_.learning_rate * CoordinateDeltaBias(grad.first, grad.second)); model->bias()[gid] += dbias; UpdateBiasResidualParallel(gid, ngroup, dbias, in_gpair, p_fmat); } // lock-free parallel updates of weights - selector->Setup(*model, *in_gpair, p_fmat, param.reg_alpha_denorm, param.reg_lambda_denorm, 0); + selector_->Setup(*model, *in_gpair, p_fmat, param_.reg_alpha_denorm, + param_.reg_lambda_denorm, 0); dmlc::DataIter *iter = p_fmat->ColIterator(); while (iter->Next()) { const ColBatch &batch = iter->Value(); - const bst_omp_uint nfeat = static_cast(batch.size); + const auto nfeat = static_cast(batch.size); #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < nfeat; ++i) { - int ii = selector->NextFeature(i, *model, 0, *in_gpair, p_fmat, - param.reg_alpha_denorm, param.reg_lambda_denorm); + int ii = selector_->NextFeature(i, *model, 0, *in_gpair, p_fmat, + param_.reg_alpha_denorm, param_.reg_lambda_denorm); if (ii < 0) continue; const bst_uint fid = batch.col_index[ii]; ColBatch::Inst col = batch[ii]; for (int gid = 0; gid < ngroup; ++gid) { double sum_grad = 0.0, sum_hess = 0.0; for (bst_uint j = 0; j < col.length; ++j) { - bst_gpair &p = gpair[col[j].index * ngroup + gid]; + GradientPair &p = gpair[col[j].index * ngroup + gid]; if (p.GetHess() < 0.0f) continue; const bst_float v = col[j].fvalue; sum_grad += p.GetGrad() * v; sum_hess += p.GetHess() * v * v; } bst_float &w = (*model)[fid][gid]; - bst_float dw = static_cast( - param.learning_rate * - CoordinateDelta(sum_grad, sum_hess, w, param.reg_alpha_denorm, - param.reg_lambda_denorm)); + auto dw = static_cast( + param_.learning_rate * + CoordinateDelta(sum_grad, sum_hess, w, param_.reg_alpha_denorm, + param_.reg_lambda_denorm)); if (dw == 0.f) continue; w += dw; // update grad values for (bst_uint j = 0; j < col.length; ++j) { - bst_gpair &p = gpair[col[j].index * ngroup + gid]; + GradientPair &p = gpair[col[j].index * ngroup + gid]; if (p.GetHess() < 0.0f) continue; - p += bst_gpair(p.GetHess() * col[j].fvalue * dw, 0); + p += GradientPair(p.GetHess() * col[j].fvalue * dw, 0); } } } @@ -119,9 +120,9 @@ class ShotgunUpdater : public LinearUpdater { protected: // training parameters - ShotgunTrainParam param; + ShotgunTrainParam param_; - std::unique_ptr selector; + std::unique_ptr selector_; }; DMLC_REGISTER_PARAMETER(ShotgunTrainParam); diff --git a/src/metric/elementwise_metric.cc b/src/metric/elementwise_metric.cc index e3e2eb005..06d8df61e 100644 --- a/src/metric/elementwise_metric.cc +++ b/src/metric/elementwise_metric.cc @@ -24,16 +24,16 @@ struct EvalEWiseBase : public Metric { bst_float Eval(const std::vector& preds, const MetaInfo& info, bool distributed) const override { - CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; - CHECK_EQ(preds.size(), info.labels.size()) + CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty"; + CHECK_EQ(preds.size(), info.labels_.size()) << "label and prediction size not match, " << "hint: use merror or mlogloss for multi-class classification"; - const omp_ulong ndata = static_cast(info.labels.size()); + const auto ndata = static_cast(info.labels_.size()); double sum = 0.0, wsum = 0.0; #pragma omp parallel for reduction(+: sum, wsum) schedule(static) for (omp_ulong i = 0; i < ndata; ++i) { const bst_float wt = info.GetWeight(i); - sum += static_cast(this)->EvalRow(info.labels[i], preds[i]) * wt; + sum += static_cast(this)->EvalRow(info.labels_[i], preds[i]) * wt; wsum += wt; } double dat[2]; dat[0] = sum, dat[1] = wsum; diff --git a/src/metric/multiclass_metric.cc b/src/metric/multiclass_metric.cc index 191813720..312dc76b5 100644 --- a/src/metric/multiclass_metric.cc +++ b/src/metric/multiclass_metric.cc @@ -23,23 +23,23 @@ struct EvalMClassBase : public Metric { bst_float Eval(const std::vector &preds, const MetaInfo &info, bool distributed) const override { - CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; - CHECK(preds.size() % info.labels.size() == 0) + CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty"; + CHECK(preds.size() % info.labels_.size() == 0) << "label and prediction size not match"; - const size_t nclass = preds.size() / info.labels.size(); + const size_t nclass = preds.size() / info.labels_.size(); CHECK_GE(nclass, 1U) << "mlogloss and merror are only used for multi-class classification," << " use logloss for binary classification"; - const bst_omp_uint ndata = static_cast(info.labels.size()); + const auto ndata = static_cast(info.labels_.size()); double sum = 0.0, wsum = 0.0; int label_error = 0; #pragma omp parallel for reduction(+: sum, wsum) schedule(static) for (bst_omp_uint i = 0; i < ndata; ++i) { const bst_float wt = info.GetWeight(i); - int label = static_cast(info.labels[i]); + auto label = static_cast(info.labels_[i]); if (label >= 0 && label < static_cast(nclass)) { sum += Derived::EvalRow(label, - dmlc::BeginPtr(preds) + i * nclass, + preds.data() + i * nclass, nclass) * wt; wsum += wt; } else { @@ -99,7 +99,7 @@ struct EvalMultiLogLoss : public EvalMClassBase { const bst_float *pred, size_t nclass) { const bst_float eps = 1e-16f; - size_t k = static_cast(label); + auto k = static_cast(label); if (pred[k] > eps) { return -std::log(pred[k]); } else { diff --git a/src/metric/rank_metric.cc b/src/metric/rank_metric.cc index 216169ca1..f4c2a5300 100644 --- a/src/metric/rank_metric.cc +++ b/src/metric/rank_metric.cc @@ -19,7 +19,7 @@ DMLC_REGISTRY_FILE_TAG(rank_metric); struct EvalAMS : public Metric { public: explicit EvalAMS(const char* param) { - CHECK(param != nullptr) + CHECK(param != nullptr) // NOLINT << "AMS must be in format ams@k"; ratio_ = atof(param); std::ostringstream os; @@ -32,7 +32,7 @@ struct EvalAMS : public Metric { CHECK(!distributed) << "metric AMS do not support distributed evaluation"; using namespace std; // NOLINT(*) - const bst_omp_uint ndata = static_cast(info.labels.size()); + const auto ndata = static_cast(info.labels_.size()); std::vector > rec(ndata); #pragma omp parallel for schedule(static) @@ -40,7 +40,7 @@ struct EvalAMS : public Metric { rec[i] = std::make_pair(preds[i], i); } std::sort(rec.begin(), rec.end(), common::CmpFirst); - unsigned ntop = static_cast(ratio_ * ndata); + auto ntop = static_cast(ratio_ * ndata); if (ntop == 0) ntop = ndata; const double br = 10.0; unsigned thresindex = 0; @@ -48,7 +48,7 @@ struct EvalAMS : public Metric { for (unsigned i = 0; i < static_cast(ndata-1) && i < ntop; ++i) { const unsigned ridx = rec[i].second; const bst_float wt = info.GetWeight(ridx); - if (info.labels[ridx] > 0.5f) { + if (info.labels_[ridx] > 0.5f) { s_tp += wt; } else { b_fp += wt; @@ -84,16 +84,16 @@ struct EvalAuc : public Metric { bst_float Eval(const std::vector &preds, const MetaInfo &info, bool distributed) const override { - CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; - CHECK_EQ(preds.size(), info.labels.size()) + CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty"; + CHECK_EQ(preds.size(), info.labels_.size()) << "label size predict size not match"; std::vector tgptr(2, 0); - tgptr[1] = static_cast(info.labels.size()); + tgptr[1] = static_cast(info.labels_.size()); - const std::vector &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr; - CHECK_EQ(gptr.back(), info.labels.size()) + const std::vector &gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_; + CHECK_EQ(gptr.back(), info.labels_.size()) << "EvalAuc: group structure must match number of prediction"; - const bst_omp_uint ngroup = static_cast(gptr.size() - 1); + const auto ngroup = static_cast(gptr.size() - 1); // sum statistics bst_float sum_auc = 0.0f; int auc_error = 0; @@ -102,7 +102,7 @@ struct EvalAuc : public Metric { for (bst_omp_uint k = 0; k < ngroup; ++k) { rec.clear(); for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) { - rec.push_back(std::make_pair(preds[j], j)); + rec.emplace_back(preds[j], j); } XGBOOST_PARALLEL_SORT(rec.begin(), rec.end(), common::CmpFirst); // calculate AUC @@ -110,7 +110,7 @@ struct EvalAuc : public Metric { double sum_npos = 0.0, sum_nneg = 0.0, buf_pos = 0.0, buf_neg = 0.0; for (size_t j = 0; j < rec.size(); ++j) { const bst_float wt = info.GetWeight(rec[j].second); - const bst_float ctr = info.labels[rec[j].second]; + const bst_float ctr = info.labels_[rec[j].second]; // keep bucketing predictions in same bucket if (j != 0 && rec[j].first != rec[j - 1].first) { sum_pospair += buf_neg * (sum_npos + buf_pos *0.5); @@ -156,16 +156,16 @@ struct EvalRankList : public Metric { bst_float Eval(const std::vector &preds, const MetaInfo &info, bool distributed) const override { - CHECK_EQ(preds.size(), info.labels.size()) + CHECK_EQ(preds.size(), info.labels_.size()) << "label size predict size not match"; // quick consistency when group is not available std::vector tgptr(2, 0); tgptr[1] = static_cast(preds.size()); - const std::vector &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr; + const std::vector &gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_; CHECK_NE(gptr.size(), 0U) << "must specify group when constructing rank file"; CHECK_EQ(gptr.back(), preds.size()) << "EvalRanklist: group structure must match number of prediction"; - const bst_omp_uint ngroup = static_cast(gptr.size() - 1); + const auto ngroup = static_cast(gptr.size() - 1); // sum statistics double sum_metric = 0.0f; #pragma omp parallel reduction(+:sum_metric) @@ -176,7 +176,7 @@ struct EvalRankList : public Metric { for (bst_omp_uint k = 0; k < ngroup; ++k) { rec.clear(); for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) { - rec.push_back(std::make_pair(preds[j], static_cast(info.labels[j]))); + rec.emplace_back(preds[j], static_cast(info.labels_[j])); } sum_metric += this->EvalMetric(rec); } @@ -230,7 +230,7 @@ struct EvalPrecision : public EvalRankList{ explicit EvalPrecision(const char *name) : EvalRankList("pre", name) {} protected: - virtual bst_float EvalMetric(std::vector< std::pair > &rec) const { + bst_float EvalMetric(std::vector< std::pair > &rec) const override { // calculate Precision std::sort(rec.begin(), rec.end(), common::CmpFirst); unsigned nhit = 0; @@ -279,7 +279,7 @@ struct EvalMAP : public EvalRankList { explicit EvalMAP(const char *name) : EvalRankList("map", name) {} protected: - virtual bst_float EvalMetric(std::vector< std::pair > &rec) const { + bst_float EvalMetric(std::vector< std::pair > &rec) const override { std::sort(rec.begin(), rec.end(), common::CmpFirst); unsigned nhits = 0; double sumap = 0.0; @@ -307,14 +307,14 @@ struct EvalMAP : public EvalRankList { /*! \brief Cox: Partial likelihood of the Cox proportional hazards model */ struct EvalCox : public Metric { public: - EvalCox() {} + EvalCox() = default; bst_float Eval(const std::vector &preds, const MetaInfo &info, bool distributed) const override { CHECK(!distributed) << "Cox metric does not support distributed evaluation"; using namespace std; // NOLINT(*) - const bst_omp_uint ndata = static_cast(info.labels.size()); + const auto ndata = static_cast(info.labels_.size()); const std::vector &label_order = info.LabelAbsSort(); // pre-compute a sum for the denominator @@ -328,7 +328,7 @@ struct EvalCox : public Metric { bst_omp_uint num_events = 0; for (bst_omp_uint i = 0; i < ndata; ++i) { const size_t ind = label_order[i]; - const auto label = info.labels[ind]; + const auto label = info.labels_[ind]; if (label > 0) { out -= log(preds[ind]) - log(exp_p_sum); ++num_events; @@ -336,7 +336,7 @@ struct EvalCox : public Metric { // only update the denominator after we move forward in time (labels are sorted) accumulated_sum += preds[ind]; - if (i == ndata - 1 || std::abs(label) < std::abs(info.labels[label_order[i + 1]])) { + if (i == ndata - 1 || std::abs(label) < std::abs(info.labels_[label_order[i + 1]])) { exp_p_sum -= accumulated_sum; accumulated_sum = 0; } @@ -358,16 +358,16 @@ struct EvalAucPR : public Metric { bst_float Eval(const std::vector &preds, const MetaInfo &info, bool distributed) const override { - CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; - CHECK_EQ(preds.size(), info.labels.size()) + CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty"; + CHECK_EQ(preds.size(), info.labels_.size()) << "label size predict size not match"; std::vector tgptr(2, 0); - tgptr[1] = static_cast(info.labels.size()); + tgptr[1] = static_cast(info.labels_.size()); const std::vector &gptr = - info.group_ptr.size() == 0 ? tgptr : info.group_ptr; - CHECK_EQ(gptr.back(), info.labels.size()) + info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_; + CHECK_EQ(gptr.back(), info.labels_.size()) << "EvalAucPR: group structure must match number of prediction"; - const bst_omp_uint ngroup = static_cast(gptr.size() - 1); + const auto ngroup = static_cast(gptr.size() - 1); // sum statistics double auc = 0.0; int auc_error = 0, auc_gt_one = 0; @@ -378,9 +378,9 @@ struct EvalAucPR : public Metric { double total_neg = 0.0; rec.clear(); for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) { - total_pos += info.GetWeight(j) * info.labels[j]; - total_neg += info.GetWeight(j) * (1.0f - info.labels[j]); - rec.push_back(std::make_pair(preds[j], j)); + total_pos += info.GetWeight(j) * info.labels_[j]; + total_neg += info.GetWeight(j) * (1.0f - info.labels_[j]); + rec.emplace_back(preds[j], j); } XGBOOST_PARALLEL_SORT(rec.begin(), rec.end(), common::CmpFirst); // we need pos > 0 && neg > 0 @@ -390,11 +390,10 @@ struct EvalAucPR : public Metric { // calculate AUC double tp = 0.0, prevtp = 0.0, fp = 0.0, prevfp = 0.0, h = 0.0, a = 0.0, b = 0.0; for (size_t j = 0; j < rec.size(); ++j) { - tp += info.GetWeight(rec[j].second) * info.labels[rec[j].second]; - fp += info.GetWeight(rec[j].second) * (1.0f - info.labels[rec[j].second]); + tp += info.GetWeight(rec[j].second) * info.labels_[rec[j].second]; + fp += info.GetWeight(rec[j].second) * (1.0f - info.labels_[rec[j].second]); if ((j < rec.size() - 1 && rec[j].first != rec[j + 1].first) || j == rec.size() - 1) { if (tp == prevtp) { - h = 1.0; a = 1.0; b = 0.0; } else { diff --git a/src/objective/multiclass_obj.cc b/src/objective/multiclass_obj.cc index 9dcb85686..9341682e1 100644 --- a/src/objective/multiclass_obj.cc +++ b/src/objective/multiclass_obj.cc @@ -38,15 +38,15 @@ class SoftmaxMultiClassObj : public ObjFunction { void GetGradient(HostDeviceVector* preds, const MetaInfo& info, int iter, - HostDeviceVector* out_gpair) override { - CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; - CHECK(preds->size() == (static_cast(param_.num_class) * info.labels.size())) + HostDeviceVector* out_gpair) override { + CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty"; + CHECK(preds->Size() == (static_cast(param_.num_class) * info.labels_.size())) << "SoftmaxMultiClassObj: label size and pred size does not match"; - std::vector& preds_h = preds->data_h(); - out_gpair->resize(preds_h.size()); - std::vector& gpair = out_gpair->data_h(); + std::vector& preds_h = preds->HostVector(); + out_gpair->Resize(preds_h.size()); + std::vector& gpair = out_gpair->HostVector(); const int nclass = param_.num_class; - const omp_ulong ndata = static_cast(preds_h.size() / nclass); + const auto ndata = static_cast(preds_h.size() / nclass); int label_error = 0; #pragma omp parallel @@ -58,7 +58,7 @@ class SoftmaxMultiClassObj : public ObjFunction { rec[k] = preds_h[i * nclass + k]; } common::Softmax(&rec); - int label = static_cast(info.labels[i]); + auto label = static_cast(info.labels_[i]); if (label < 0 || label >= nclass) { label_error = label; label = 0; } @@ -67,9 +67,9 @@ class SoftmaxMultiClassObj : public ObjFunction { bst_float p = rec[k]; const bst_float h = 2.0f * p * (1.0f - p) * wt; if (label == k) { - gpair[i * nclass + k] = bst_gpair((p - 1.0f) * wt, h); + gpair[i * nclass + k] = GradientPair((p - 1.0f) * wt, h); } else { - gpair[i * nclass + k] = bst_gpair(p* wt, h); + gpair[i * nclass + k] = GradientPair(p* wt, h); } } } @@ -91,10 +91,10 @@ class SoftmaxMultiClassObj : public ObjFunction { private: inline void Transform(HostDeviceVector *io_preds, bool prob) { - std::vector &preds = io_preds->data_h(); + std::vector &preds = io_preds->HostVector(); std::vector tmp; const int nclass = param_.num_class; - const omp_ulong ndata = static_cast(preds.size() / nclass); + const auto ndata = static_cast(preds.size() / nclass); if (!prob) tmp.resize(ndata); #pragma omp parallel diff --git a/src/objective/rank_obj.cc b/src/objective/rank_obj.cc index 93559e135..65a01d759 100644 --- a/src/objective/rank_obj.cc +++ b/src/objective/rank_obj.cc @@ -40,17 +40,17 @@ class LambdaRankObj : public ObjFunction { void GetGradient(HostDeviceVector* preds, const MetaInfo& info, int iter, - HostDeviceVector* out_gpair) override { - CHECK_EQ(preds->size(), info.labels.size()) << "label size predict size not match"; - auto& preds_h = preds->data_h(); - out_gpair->resize(preds_h.size()); - std::vector& gpair = out_gpair->data_h(); + HostDeviceVector* out_gpair) override { + CHECK_EQ(preds->Size(), info.labels_.size()) << "label size predict size not match"; + auto& preds_h = preds->HostVector(); + out_gpair->Resize(preds_h.size()); + std::vector& gpair = out_gpair->HostVector(); // quick consistency when group is not available - std::vector tgptr(2, 0); tgptr[1] = static_cast(info.labels.size()); - const std::vector &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr; - CHECK(gptr.size() != 0 && gptr.back() == info.labels.size()) + std::vector tgptr(2, 0); tgptr[1] = static_cast(info.labels_.size()); + const std::vector &gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_; + CHECK(gptr.size() != 0 && gptr.back() == info.labels_.size()) << "group structure not consistent with #rows"; - const bst_omp_uint ngroup = static_cast(gptr.size() - 1); + const auto ngroup = static_cast(gptr.size() - 1); #pragma omp parallel { // parall construct, declare random number generator here, so that each @@ -64,8 +64,8 @@ class LambdaRankObj : public ObjFunction { for (bst_omp_uint k = 0; k < ngroup; ++k) { lst.clear(); pairs.clear(); for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) { - lst.push_back(ListEntry(preds_h[j], info.labels[j], j)); - gpair[j] = bst_gpair(0.0f, 0.0f); + lst.emplace_back(preds_h[j], info.labels_[j], j); + gpair[j] = GradientPair(0.0f, 0.0f); } std::sort(lst.begin(), lst.end(), ListEntry::CmpPred); rec.resize(lst.size()); @@ -85,9 +85,9 @@ class LambdaRankObj : public ObjFunction { for (unsigned pid = i; pid < j; ++pid) { unsigned ridx = std::uniform_int_distribution(0, nleft + nright - 1)(rnd); if (ridx < nleft) { - pairs.push_back(LambdaPair(rec[ridx].second, rec[pid].second)); + pairs.emplace_back(rec[ridx].second, rec[pid].second); } else { - pairs.push_back(LambdaPair(rec[pid].second, rec[ridx+j-i].second)); + pairs.emplace_back(rec[pid].second, rec[ridx+j-i].second); } } } @@ -101,22 +101,22 @@ class LambdaRankObj : public ObjFunction { if (param_.fix_list_weight != 0.0f) { scale *= param_.fix_list_weight / (gptr[k + 1] - gptr[k]); } - for (size_t i = 0; i < pairs.size(); ++i) { - const ListEntry &pos = lst[pairs[i].pos_index]; - const ListEntry &neg = lst[pairs[i].neg_index]; - const bst_float w = pairs[i].weight * scale; + for (auto & pair : pairs) { + const ListEntry &pos = lst[pair.pos_index]; + const ListEntry &neg = lst[pair.neg_index]; + const bst_float w = pair.weight * scale; const float eps = 1e-16f; bst_float p = common::Sigmoid(pos.pred - neg.pred); bst_float g = p - 1.0f; bst_float h = std::max(p * (1.0f - p), eps); // accumulate gradient and hessian in both pid, and nid - gpair[pos.rindex] += bst_gpair(g * w, 2.0f*w*h); - gpair[neg.rindex] += bst_gpair(-g * w, 2.0f*w*h); + gpair[pos.rindex] += GradientPair(g * w, 2.0f*w*h); + gpair[neg.rindex] += GradientPair(-g * w, 2.0f*w*h); } } } } - const char* DefaultEvalMetric(void) const override { + const char* DefaultEvalMetric() const override { return "map"; } @@ -177,7 +177,7 @@ class LambdaRankObjNDCG : public LambdaRankObj { void GetLambdaWeight(const std::vector &sorted_list, std::vector *io_pairs) override { std::vector &pairs = *io_pairs; - float IDCG; + float IDCG; // NOLINT { std::vector labels(sorted_list.size()); for (size_t i = 0; i < sorted_list.size(); ++i) { @@ -187,32 +187,32 @@ class LambdaRankObjNDCG : public LambdaRankObj { IDCG = CalcDCG(labels); } if (IDCG == 0.0) { - for (size_t i = 0; i < pairs.size(); ++i) { - pairs[i].weight = 0.0f; + for (auto & pair : pairs) { + pair.weight = 0.0f; } } else { IDCG = 1.0f / IDCG; - for (size_t i = 0; i < pairs.size(); ++i) { - unsigned pos_idx = pairs[i].pos_index; - unsigned neg_idx = pairs[i].neg_index; + for (auto & pair : pairs) { + unsigned pos_idx = pair.pos_index; + unsigned neg_idx = pair.neg_index; float pos_loginv = 1.0f / std::log2(pos_idx + 2.0f); float neg_loginv = 1.0f / std::log2(neg_idx + 2.0f); - int pos_label = static_cast(sorted_list[pos_idx].label); - int neg_label = static_cast(sorted_list[neg_idx].label); + auto pos_label = static_cast(sorted_list[pos_idx].label); + auto neg_label = static_cast(sorted_list[neg_idx].label); bst_float original = ((1 << pos_label) - 1) * pos_loginv + ((1 << neg_label) - 1) * neg_loginv; float changed = ((1 << neg_label) - 1) * pos_loginv + ((1 << pos_label) - 1) * neg_loginv; bst_float delta = (original - changed) * IDCG; if (delta < 0.0f) delta = - delta; - pairs[i].weight = delta; + pair.weight = delta; } } } inline static bst_float CalcDCG(const std::vector &labels) { double sumdcg = 0.0; for (size_t i = 0; i < labels.size(); ++i) { - const unsigned rel = static_cast(labels[i]); + const auto rel = static_cast(labels[i]); if (rel != 0) { sumdcg += ((1 << rel) - 1) / std::log2(static_cast(i + 2)); } @@ -238,7 +238,7 @@ class LambdaRankObjMAP : public LambdaRankObj { float ap_acc_add; /* \brief the accumulated positive instance count */ float hits; - MAPStats(void) {} + MAPStats() = default; MAPStats(float ap_acc, float ap_acc_miss, float ap_acc_add, float hits) : ap_acc(ap_acc), ap_acc_miss(ap_acc_miss), ap_acc_add(ap_acc_add), hits(hits) {} }; @@ -300,10 +300,10 @@ class LambdaRankObjMAP : public LambdaRankObj { std::vector &pairs = *io_pairs; std::vector map_stats; GetMAPStats(sorted_list, &map_stats); - for (size_t i = 0; i < pairs.size(); ++i) { - pairs[i].weight = - GetLambdaMAP(sorted_list, pairs[i].pos_index, - pairs[i].neg_index, &map_stats); + for (auto & pair : pairs) { + pair.weight = + GetLambdaMAP(sorted_list, pair.pos_index, + pair.neg_index, &map_stats); } } }; diff --git a/src/objective/regression_obj.cc b/src/objective/regression_obj.cc index b1f75c221..75af216a1 100644 --- a/src/objective/regression_obj.cc +++ b/src/objective/regression_obj.cc @@ -32,26 +32,26 @@ struct RegLossParam : public dmlc::Parameter { template class RegLossObj : public ObjFunction { public: - RegLossObj() : labels_checked(false) {} + RegLossObj() = default; void Configure( const std::vector > &args) override { param_.InitAllowUnknown(args); } void GetGradient(HostDeviceVector *preds, const MetaInfo &info, - int iter, HostDeviceVector *out_gpair) override { - CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; - CHECK_EQ(preds->size(), info.labels.size()) + int iter, HostDeviceVector *out_gpair) override { + CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty"; + CHECK_EQ(preds->Size(), info.labels_.size()) << "labels are not correctly provided" - << "preds.size=" << preds->size() - << ", label.size=" << info.labels.size(); - auto& preds_h = preds->data_h(); + << "preds.size=" << preds->Size() + << ", label.size=" << info.labels_.size(); + auto& preds_h = preds->HostVector(); - this->LazyCheckLabels(info.labels); - out_gpair->resize(preds_h.size()); - auto& gpair = out_gpair->data_h(); - const omp_ulong n = static_cast(preds_h.size()); - auto gpair_ptr = out_gpair->ptr_h(); + this->LazyCheckLabels(info.labels_); + out_gpair->Resize(preds_h.size()); + auto& gpair = out_gpair->HostVector(); + const auto n = static_cast(preds_h.size()); + auto gpair_ptr = out_gpair->HostPointer(); avx::Float8 scale(param_.scale_pos_weight); const omp_ulong remainder = n % 8; @@ -59,10 +59,10 @@ class RegLossObj : public ObjFunction { // Use a maximum of 8 threads #pragma omp parallel for schedule(static) num_threads(std::min(8, nthread)) for (omp_ulong i = 0; i < n - remainder; i += 8) { - avx::Float8 y(&info.labels[i]); + avx::Float8 y(&info.labels_[i]); avx::Float8 p = Loss::PredTransform(avx::Float8(&preds_h[i])); - avx::Float8 w = info.weights.empty() ? avx::Float8(1.0f) - : avx::Float8(&info.weights[i]); + avx::Float8 w = info.weights_.empty() ? avx::Float8(1.0f) + : avx::Float8(&info.weights_[i]); // Adjust weight w += y * (scale * w - w); avx::Float8 grad = Loss::FirstOrderGradient(p, y); @@ -70,11 +70,11 @@ class RegLossObj : public ObjFunction { avx::StoreGpair(gpair_ptr + i, grad * w, hess * w); } for (omp_ulong i = n - remainder; i < n; ++i) { - auto y = info.labels[i]; + auto y = info.labels_[i]; bst_float p = Loss::PredTransform(preds_h[i]); bst_float w = info.GetWeight(i); w += y * ((param_.scale_pos_weight * w) - w); - gpair[i] = bst_gpair(Loss::FirstOrderGradient(p, y) * w, + gpair[i] = GradientPair(Loss::FirstOrderGradient(p, y) * w, Loss::SecondOrderGradient(p, y) * w); } @@ -85,8 +85,8 @@ class RegLossObj : public ObjFunction { return Loss::DefaultEvalMetric(); } void PredTransform(HostDeviceVector *io_preds) override { - std::vector &preds = io_preds->data_h(); - const bst_omp_uint ndata = static_cast(preds.size()); + std::vector &preds = io_preds->HostVector(); + const auto ndata = static_cast(preds.size()); #pragma omp parallel for schedule(static) for (bst_omp_uint j = 0; j < ndata; ++j) { preds[j] = Loss::PredTransform(preds[j]); @@ -98,14 +98,14 @@ class RegLossObj : public ObjFunction { protected: void LazyCheckLabels(const std::vector &labels) { - if (labels_checked) return; + if (labels_checked_) return; for (auto &y : labels) { CHECK(Loss::CheckLabel(y)) << Loss::LabelErrorMsg(); } - labels_checked = true; + labels_checked_ = true; } RegLossParam param_; - bool labels_checked; + bool labels_checked_{false}; }; // register the objective functions @@ -148,12 +148,12 @@ class PoissonRegression : public ObjFunction { void GetGradient(HostDeviceVector *preds, const MetaInfo &info, int iter, - HostDeviceVector *out_gpair) override { - CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; - CHECK_EQ(preds->size(), info.labels.size()) << "labels are not correctly provided"; - auto& preds_h = preds->data_h(); - out_gpair->resize(preds->size()); - auto& gpair = out_gpair->data_h(); + HostDeviceVector *out_gpair) override { + CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty"; + CHECK_EQ(preds->Size(), info.labels_.size()) << "labels are not correctly provided"; + auto& preds_h = preds->HostVector(); + out_gpair->Resize(preds->Size()); + auto& gpair = out_gpair->HostVector(); // check if label in range bool label_correct = true; // start calculating gradient @@ -162,9 +162,9 @@ class PoissonRegression : public ObjFunction { for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*) bst_float p = preds_h[i]; bst_float w = info.GetWeight(i); - bst_float y = info.labels[i]; + bst_float y = info.labels_[i]; if (y >= 0.0f) { - gpair[i] = bst_gpair((std::exp(p) - y) * w, + gpair[i] = GradientPair((std::exp(p) - y) * w, std::exp(p + param_.max_delta_step) * w); } else { label_correct = false; @@ -173,7 +173,7 @@ class PoissonRegression : public ObjFunction { CHECK(label_correct) << "PoissonRegression: label must be nonnegative"; } void PredTransform(HostDeviceVector *io_preds) override { - std::vector &preds = io_preds->data_h(); + std::vector &preds = io_preds->HostVector(); const long ndata = static_cast(preds.size()); // NOLINT(*) #pragma omp parallel for schedule(static) for (long j = 0; j < ndata; ++j) { // NOLINT(*) @@ -186,7 +186,7 @@ class PoissonRegression : public ObjFunction { bst_float ProbToMargin(bst_float base_score) const override { return std::log(base_score); } - const char* DefaultEvalMetric(void) const override { + const char* DefaultEvalMetric() const override { return "poisson-nloglik"; } @@ -209,12 +209,12 @@ class CoxRegression : public ObjFunction { void GetGradient(HostDeviceVector *preds, const MetaInfo &info, int iter, - HostDeviceVector *out_gpair) override { - CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; - CHECK_EQ(preds->size(), info.labels.size()) << "labels are not correctly provided"; - auto& preds_h = preds->data_h(); - out_gpair->resize(preds_h.size()); - auto& gpair = out_gpair->data_h(); + HostDeviceVector *out_gpair) override { + CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty"; + CHECK_EQ(preds->Size(), info.labels_.size()) << "labels are not correctly provided"; + auto& preds_h = preds->HostVector(); + out_gpair->Resize(preds_h.size()); + auto& gpair = out_gpair->HostVector(); const std::vector &label_order = info.LabelAbsSort(); const omp_ulong ndata = static_cast(preds_h.size()); // NOLINT(*) @@ -236,7 +236,7 @@ class CoxRegression : public ObjFunction { const double p = preds_h[ind]; const double exp_p = std::exp(p); const double w = info.GetWeight(ind); - const double y = info.labels[ind]; + const double y = info.labels_[ind]; const double abs_y = std::abs(y); // only update the denominator after we move forward in time (labels are sorted) @@ -257,14 +257,14 @@ class CoxRegression : public ObjFunction { const double grad = exp_p*r_k - static_cast(y > 0); const double hess = exp_p*r_k - exp_p*exp_p * s_k; - gpair.at(ind) = bst_gpair(grad * w, hess * w); + gpair.at(ind) = GradientPair(grad * w, hess * w); last_abs_y = abs_y; last_exp_p = exp_p; } } void PredTransform(HostDeviceVector *io_preds) override { - std::vector &preds = io_preds->data_h(); + std::vector &preds = io_preds->HostVector(); const long ndata = static_cast(preds.size()); // NOLINT(*) #pragma omp parallel for schedule(static) for (long j = 0; j < ndata; ++j) { // NOLINT(*) @@ -277,7 +277,7 @@ class CoxRegression : public ObjFunction { bst_float ProbToMargin(bst_float base_score) const override { return std::log(base_score); } - const char* DefaultEvalMetric(void) const override { + const char* DefaultEvalMetric() const override { return "cox-nloglik"; } }; @@ -297,12 +297,12 @@ class GammaRegression : public ObjFunction { void GetGradient(HostDeviceVector *preds, const MetaInfo &info, int iter, - HostDeviceVector *out_gpair) override { - CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; - CHECK_EQ(preds->size(), info.labels.size()) << "labels are not correctly provided"; - auto& preds_h = preds->data_h(); - out_gpair->resize(preds_h.size()); - auto& gpair = out_gpair->data_h(); + HostDeviceVector *out_gpair) override { + CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty"; + CHECK_EQ(preds->Size(), info.labels_.size()) << "labels are not correctly provided"; + auto& preds_h = preds->HostVector(); + out_gpair->Resize(preds_h.size()); + auto& gpair = out_gpair->HostVector(); // check if label in range bool label_correct = true; // start calculating gradient @@ -311,9 +311,9 @@ class GammaRegression : public ObjFunction { for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*) bst_float p = preds_h[i]; bst_float w = info.GetWeight(i); - bst_float y = info.labels[i]; + bst_float y = info.labels_[i]; if (y >= 0.0f) { - gpair[i] = bst_gpair((1 - y / std::exp(p)) * w, y / std::exp(p) * w); + gpair[i] = GradientPair((1 - y / std::exp(p)) * w, y / std::exp(p) * w); } else { label_correct = false; } @@ -321,7 +321,7 @@ class GammaRegression : public ObjFunction { CHECK(label_correct) << "GammaRegression: label must be positive"; } void PredTransform(HostDeviceVector *io_preds) override { - std::vector &preds = io_preds->data_h(); + std::vector &preds = io_preds->HostVector(); const long ndata = static_cast(preds.size()); // NOLINT(*) #pragma omp parallel for schedule(static) for (long j = 0; j < ndata; ++j) { // NOLINT(*) @@ -334,7 +334,7 @@ class GammaRegression : public ObjFunction { bst_float ProbToMargin(bst_float base_score) const override { return std::log(base_score); } - const char* DefaultEvalMetric(void) const override { + const char* DefaultEvalMetric() const override { return "gamma-nloglik"; } }; @@ -364,27 +364,27 @@ class TweedieRegression : public ObjFunction { void GetGradient(HostDeviceVector *preds, const MetaInfo &info, int iter, - HostDeviceVector *out_gpair) override { - CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; - CHECK_EQ(preds->size(), info.labels.size()) << "labels are not correctly provided"; - auto& preds_h = preds->data_h(); - out_gpair->resize(preds->size()); - auto& gpair = out_gpair->data_h(); + HostDeviceVector *out_gpair) override { + CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty"; + CHECK_EQ(preds->Size(), info.labels_.size()) << "labels are not correctly provided"; + auto& preds_h = preds->HostVector(); + out_gpair->Resize(preds->Size()); + auto& gpair = out_gpair->HostVector(); // check if label in range bool label_correct = true; // start calculating gradient - const omp_ulong ndata = static_cast(preds->size()); // NOLINT(*) + const omp_ulong ndata = static_cast(preds->Size()); // NOLINT(*) #pragma omp parallel for schedule(static) for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*) bst_float p = preds_h[i]; bst_float w = info.GetWeight(i); - bst_float y = info.labels[i]; + bst_float y = info.labels_[i]; float rho = param_.tweedie_variance_power; if (y >= 0.0f) { bst_float grad = -y * std::exp((1 - rho) * p) + std::exp((2 - rho) * p); bst_float hess = -y * (1 - rho) * \ std::exp((1 - rho) * p) + (2 - rho) * std::exp((2 - rho) * p); - gpair[i] = bst_gpair(grad * w, hess * w); + gpair[i] = GradientPair(grad * w, hess * w); } else { label_correct = false; } @@ -392,14 +392,14 @@ class TweedieRegression : public ObjFunction { CHECK(label_correct) << "TweedieRegression: label must be nonnegative"; } void PredTransform(HostDeviceVector *io_preds) override { - std::vector &preds = io_preds->data_h(); + std::vector &preds = io_preds->HostVector(); const long ndata = static_cast(preds.size()); // NOLINT(*) #pragma omp parallel for schedule(static) for (long j = 0; j < ndata; ++j) { // NOLINT(*) preds[j] = std::exp(preds[j]); } } - const char* DefaultEvalMetric(void) const override { + const char* DefaultEvalMetric() const override { std::ostringstream os; os << "tweedie-nloglik@" << param_.tweedie_variance_power; std::string metric = os.str(); diff --git a/src/objective/regression_obj_gpu.cu b/src/objective/regression_obj_gpu.cu index 45270e316..7369d4ec2 100644 --- a/src/objective/regression_obj_gpu.cu +++ b/src/objective/regression_obj_gpu.cu @@ -16,11 +16,12 @@ #include "../common/host_device_vector.h" #include "./regression_loss.h" -using namespace dh; namespace xgboost { namespace obj { +using dh::DVec; + DMLC_REGISTRY_FILE_TAG(regression_obj_gpu); struct GPURegLossParam : public dmlc::Parameter { @@ -43,7 +44,7 @@ struct GPURegLossParam : public dmlc::Parameter { // GPU kernel for gradient computation template __global__ void get_gradient_k -(bst_gpair *__restrict__ out_gpair, unsigned int *__restrict__ label_correct, +(GradientPair *__restrict__ out_gpair, unsigned int *__restrict__ label_correct, const float * __restrict__ preds, const float * __restrict__ labels, const float * __restrict__ weights, int n, float scale_pos_weight) { int i = threadIdx.x + blockIdx.x * blockDim.x; @@ -56,7 +57,7 @@ __global__ void get_gradient_k w *= scale_pos_weight; if (!Loss::CheckLabel(label)) atomicAnd(label_correct, 0); - out_gpair[i] = bst_gpair + out_gpair[i] = GradientPair (Loss::FirstOrderGradient(p, label) * w, Loss::SecondOrderGradient(p, label) * w); } @@ -75,40 +76,40 @@ class GPURegLossObj : public ObjFunction { protected: // manages device data struct DeviceData { - dvec labels, weights; - dvec label_correct; + DVec labels, weights; + DVec label_correct; // allocate everything on device - DeviceData(bulk_allocator* ba, int device_idx, size_t n) { - ba->allocate(device_idx, false, + DeviceData(dh::BulkAllocator* ba, int device_idx, size_t n) { + ba->Allocate(device_idx, false, &labels, n, &weights, n, &label_correct, 1); } - size_t size() const { return labels.size(); } + size_t Size() const { return labels.Size(); } }; bool copied_; - std::unique_ptr> ba_; + std::unique_ptr> ba_; std::unique_ptr data_; HostDeviceVector preds_d_; - HostDeviceVector out_gpair_d_; + HostDeviceVector out_gpair_d_; // allocate device data for n elements, do nothing if enough memory is allocated already void LazyResize(int n) { - if (data_.get() != nullptr && data_->size() >= n) + if (data_.get() != nullptr && data_->Size() >= n) return; copied_ = false; // free the old data and allocate the new data - ba_.reset(new bulk_allocator()); + ba_.reset(new dh::BulkAllocator()); data_.reset(new DeviceData(ba_.get(), 0, n)); - preds_d_.resize(n, 0.0f, param_.gpu_id); - out_gpair_d_.resize(n, bst_gpair(), param_.gpu_id); + preds_d_.Resize(n, 0.0f, param_.gpu_id); + out_gpair_d_.Resize(n, GradientPair(), param_.gpu_id); } public: - GPURegLossObj() : copied_(false), preds_d_(0, -1), out_gpair_d_(0, -1) {} + GPURegLossObj() : copied_(false), preds_d_(0, -1), out_gpair_d_({}, -1) {} void Configure(const std::vector >& args) override { param_.InitAllowUnknown(args); @@ -118,32 +119,32 @@ class GPURegLossObj : public ObjFunction { void GetGradient(HostDeviceVector* preds, const MetaInfo &info, int iter, - HostDeviceVector* out_gpair) override { - CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; - CHECK_EQ(preds->size(), info.labels.size()) + HostDeviceVector* out_gpair) override { + CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty"; + CHECK_EQ(preds->Size(), info.labels_.size()) << "labels are not correctly provided" - << "preds.size=" << preds->size() << ", label.size=" << info.labels.size(); - size_t ndata = preds->size(); - out_gpair->resize(ndata, bst_gpair(), param_.gpu_id); + << "preds.size=" << preds->Size() << ", label.size=" << info.labels_.size(); + size_t ndata = preds->Size(); + out_gpair->Resize(ndata, GradientPair(), param_.gpu_id); LazyResize(ndata); - GetGradientDevice(preds->ptr_d(param_.gpu_id), info, iter, - out_gpair->ptr_d(param_.gpu_id), ndata); + GetGradientDevice(preds->DevicePointer(param_.gpu_id), info, iter, + out_gpair->DevicePointer(param_.gpu_id), ndata); } private: void GetGradientDevice(float* preds, const MetaInfo &info, int iter, - bst_gpair* out_gpair, size_t n) { - safe_cuda(cudaSetDevice(param_.gpu_id)); + GradientPair* out_gpair, size_t n) { + dh::safe_cuda(cudaSetDevice(param_.gpu_id)); DeviceData& d = *data_; - d.label_correct.fill(1); + d.label_correct.Fill(1); // only copy the labels and weights once, similar to how the data is copied if (!copied_) { - thrust::copy(info.labels.begin(), info.labels.begin() + n, + thrust::copy(info.labels_.begin(), info.labels_.begin() + n, d.labels.tbegin()); - if (info.weights.size() > 0) { - thrust::copy(info.weights.begin(), info.weights.begin() + n, + if (info.weights_.size() > 0) { + thrust::copy(info.weights_.begin(), info.weights_.begin() + n, d.weights.tbegin()); } copied_ = true; @@ -151,11 +152,11 @@ class GPURegLossObj : public ObjFunction { // run the kernel const int block = 256; - get_gradient_k<<>> - (out_gpair, d.label_correct.data(), preds, - d.labels.data(), info.weights.size() > 0 ? d.weights.data() : nullptr, + get_gradient_k<<>> + (out_gpair, d.label_correct.Data(), preds, + d.labels.Data(), info.weights_.size() > 0 ? d.weights.Data() : nullptr, n, param_.scale_pos_weight); - safe_cuda(cudaGetLastError()); + dh::safe_cuda(cudaGetLastError()); // copy output data from the GPU unsigned int label_correct_h; @@ -173,15 +174,15 @@ class GPURegLossObj : public ObjFunction { } void PredTransform(HostDeviceVector *io_preds) override { - PredTransformDevice(io_preds->ptr_d(param_.gpu_id), io_preds->size()); + PredTransformDevice(io_preds->DevicePointer(param_.gpu_id), io_preds->Size()); } void PredTransformDevice(float* preds, size_t n) { - safe_cuda(cudaSetDevice(param_.gpu_id)); + dh::safe_cuda(cudaSetDevice(param_.gpu_id)); const int block = 256; - pred_transform_k<<>>(preds, n); - safe_cuda(cudaGetLastError()); - safe_cuda(cudaDeviceSynchronize()); + pred_transform_k<<>>(preds, n); + dh::safe_cuda(cudaGetLastError()); + dh::safe_cuda(cudaDeviceSynchronize()); } diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc index 9c956b5d9..2fcb97bdd 100644 --- a/src/predictor/cpu_predictor.cc +++ b/src/predictor/cpu_predictor.cc @@ -24,7 +24,7 @@ class CPUPredictor : public Predictor { for (size_t i = tree_begin; i < tree_end; ++i) { if (tree_info[i] == bst_group) { int tid = trees[i]->GetLeafIndex(*p_feats, root_index); - psum += (*trees[i])[tid].leaf_value(); + psum += (*trees[i])[tid].LeafValue(); } } p_feats->Drop(inst); @@ -45,35 +45,35 @@ class CPUPredictor : public Predictor { std::vector* out_preds, const gbm::GBTreeModel& model, int num_group, unsigned tree_begin, unsigned tree_end) { - const MetaInfo& info = p_fmat->info(); + const MetaInfo& info = p_fmat->Info(); const int nthread = omp_get_max_threads(); InitThreadTemp(nthread, model.param.num_feature); std::vector& preds = *out_preds; CHECK_EQ(model.param.size_leaf_vector, 0) << "size_leaf_vector is enforced to 0 so far"; - CHECK_EQ(preds.size(), p_fmat->info().num_row * num_group); + CHECK_EQ(preds.size(), p_fmat->Info().num_row_ * num_group); // start collecting the prediction dmlc::DataIter* iter = p_fmat->RowIterator(); iter->BeforeFirst(); while (iter->Next()) { const RowBatch& batch = iter->Value(); // parallel over local batch - const int K = 8; - const bst_omp_uint nsize = static_cast(batch.size); - const bst_omp_uint rest = nsize % K; + constexpr int kUnroll = 8; + const auto nsize = static_cast(batch.size); + const bst_omp_uint rest = nsize % kUnroll; #pragma omp parallel for schedule(static) - for (bst_omp_uint i = 0; i < nsize - rest; i += K) { + for (bst_omp_uint i = 0; i < nsize - rest; i += kUnroll) { const int tid = omp_get_thread_num(); RegTree::FVec& feats = thread_temp[tid]; - int64_t ridx[K]; - RowBatch::Inst inst[K]; - for (int k = 0; k < K; ++k) { + int64_t ridx[kUnroll]; + RowBatch::Inst inst[kUnroll]; + for (int k = 0; k < kUnroll; ++k) { ridx[k] = static_cast(batch.base_rowid + i + k); } - for (int k = 0; k < K; ++k) { + for (int k = 0; k < kUnroll; ++k) { inst[k] = batch[i + k]; } - for (int k = 0; k < K; ++k) { + for (int k = 0; k < kUnroll; ++k) { for (int gid = 0; gid < num_group; ++gid) { const size_t offset = ridx[k] * num_group + gid; preds[offset] += this->PredValue( @@ -84,7 +84,7 @@ class CPUPredictor : public Predictor { } for (bst_omp_uint i = nsize - rest; i < nsize; ++i) { RegTree::FVec& feats = thread_temp[0]; - const int64_t ridx = static_cast(batch.base_rowid + i); + const auto ridx = static_cast(batch.base_rowid + i); const RowBatch::Inst inst = batch[i]; for (int gid = 0; gid < num_group; ++gid) { const size_t offset = ridx * num_group + gid; @@ -113,10 +113,10 @@ class CPUPredictor : public Predictor { auto it = cache_.find(dmat); if (it != cache_.end()) { HostDeviceVector& y = it->second.predictions; - if (y.size() != 0) { - out_preds->resize(y.size()); - std::copy(y.data_h().begin(), y.data_h().end(), - out_preds->data_h().begin()); + if (y.Size() != 0) { + out_preds->Resize(y.Size()); + std::copy(y.HostVector().begin(), y.HostVector().end(), + out_preds->HostVector().begin()); return true; } } @@ -127,12 +127,12 @@ class CPUPredictor : public Predictor { void InitOutPredictions(const MetaInfo& info, HostDeviceVector* out_preds, const gbm::GBTreeModel& model) const { - size_t n = model.param.num_output_group * info.num_row; - const std::vector& base_margin = info.base_margin; - out_preds->resize(n); - std::vector& out_preds_h = out_preds->data_h(); + size_t n = model.param.num_output_group * info.num_row_; + const std::vector& base_margin = info.base_margin_; + out_preds->Resize(n); + std::vector& out_preds_h = out_preds->HostVector(); if (base_margin.size() != 0) { - CHECK_EQ(out_preds->size(), n); + CHECK_EQ(out_preds->Size(), n); std::copy(base_margin.begin(), base_margin.end(), out_preds_h.begin()); } else { std::fill(out_preds_h.begin(), out_preds_h.end(), model.base_margin); @@ -147,14 +147,14 @@ class CPUPredictor : public Predictor { return; } - this->InitOutPredictions(dmat->info(), out_preds, model); + this->InitOutPredictions(dmat->Info(), out_preds, model); ntree_limit *= model.param.num_output_group; if (ntree_limit == 0 || ntree_limit > model.trees.size()) { ntree_limit = static_cast(model.trees.size()); } - this->PredLoopInternal(dmat, &out_preds->data_h(), model, + this->PredLoopInternal(dmat, &out_preds->HostVector(), model, tree_begin, ntree_limit); } @@ -167,9 +167,9 @@ class CPUPredictor : public Predictor { for (auto& kv : cache_) { PredictionCacheEntry& e = kv.second; - if (e.predictions.size() == 0) { - InitOutPredictions(e.data->info(), &(e.predictions), model); - PredLoopInternal(e.data.get(), &(e.predictions.data_h()), model, 0, + if (e.predictions.Size() == 0) { + InitOutPredictions(e.data->Info(), &(e.predictions), model); + PredLoopInternal(e.data.get(), &(e.predictions.HostVector()), model, 0, model.trees.size()); } else if (model.param.num_output_group == 1 && updaters->size() > 0 && num_new_trees == 1 && @@ -177,7 +177,7 @@ class CPUPredictor : public Predictor { &(e.predictions))) { {} // do nothing } else { - PredLoopInternal(e.data.get(), &(e.predictions.data_h()), model, old_ntree, + PredLoopInternal(e.data.get(), &(e.predictions.HostVector()), model, old_ntree, model.trees.size()); } } @@ -209,25 +209,25 @@ class CPUPredictor : public Predictor { const gbm::GBTreeModel& model, unsigned ntree_limit) override { const int nthread = omp_get_max_threads(); InitThreadTemp(nthread, model.param.num_feature); - const MetaInfo& info = p_fmat->info(); + const MetaInfo& info = p_fmat->Info(); // number of valid trees ntree_limit *= model.param.num_output_group; if (ntree_limit == 0 || ntree_limit > model.trees.size()) { ntree_limit = static_cast(model.trees.size()); } std::vector& preds = *out_preds; - preds.resize(info.num_row * ntree_limit); + preds.resize(info.num_row_ * ntree_limit); // start collecting the prediction dmlc::DataIter* iter = p_fmat->RowIterator(); iter->BeforeFirst(); while (iter->Next()) { const RowBatch& batch = iter->Value(); // parallel over local batch - const bst_omp_uint nsize = static_cast(batch.size); + const auto nsize = static_cast(batch.size); #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < nsize; ++i) { const int tid = omp_get_thread_num(); - size_t ridx = static_cast(batch.base_rowid + i); + auto ridx = static_cast(batch.base_rowid + i); RegTree::FVec& feats = thread_temp[tid]; feats.Fill(batch[i]); for (unsigned j = 0; j < ntree_limit; ++j) { @@ -246,7 +246,7 @@ class CPUPredictor : public Predictor { unsigned condition_feature) override { const int nthread = omp_get_max_threads(); InitThreadTemp(nthread, model.param.num_feature); - const MetaInfo& info = p_fmat->info(); + const MetaInfo& info = p_fmat->Info(); // number of valid trees ntree_limit *= model.param.num_output_group; if (ntree_limit == 0 || ntree_limit > model.trees.size()) { @@ -256,7 +256,7 @@ class CPUPredictor : public Predictor { size_t ncolumns = model.param.num_feature + 1; // allocate space for (number of features + bias) times the number of rows std::vector& contribs = *out_contribs; - contribs.resize(info.num_row * ncolumns * model.param.num_output_group); + contribs.resize(info.num_row_ * ncolumns * model.param.num_output_group); // make sure contributions is zeroed, we could be reusing a previously // allocated one std::fill(contribs.begin(), contribs.end(), 0); @@ -267,15 +267,15 @@ class CPUPredictor : public Predictor { } // start collecting the contributions dmlc::DataIter* iter = p_fmat->RowIterator(); - const std::vector& base_margin = info.base_margin; + const std::vector& base_margin = info.base_margin_; iter->BeforeFirst(); while (iter->Next()) { const RowBatch& batch = iter->Value(); // parallel over local batch - const bst_omp_uint nsize = static_cast(batch.size); + const auto nsize = static_cast(batch.size); #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < nsize; ++i) { - size_t row_idx = static_cast(batch.base_rowid + i); + auto row_idx = static_cast(batch.base_rowid + i); unsigned root_id = info.GetRoot(row_idx); RegTree::FVec& feats = thread_temp[omp_get_thread_num()]; // loop over all classes @@ -310,7 +310,7 @@ class CPUPredictor : public Predictor { void PredictInteractionContributions(DMatrix* p_fmat, std::vector* out_contribs, const gbm::GBTreeModel& model, unsigned ntree_limit, bool approximate) override { - const MetaInfo& info = p_fmat->info(); + const MetaInfo& info = p_fmat->Info(); const int ngroup = model.param.num_output_group; size_t ncolumns = model.param.num_feature; const unsigned row_chunk = ngroup * (ncolumns + 1) * (ncolumns + 1); @@ -319,10 +319,10 @@ class CPUPredictor : public Predictor { // allocate space for (number of features^2) times the number of rows and tmp off/on contribs std::vector& contribs = *out_contribs; - contribs.resize(info.num_row * ngroup * (ncolumns + 1) * (ncolumns + 1)); - std::vector contribs_off(info.num_row * ngroup * (ncolumns + 1)); - std::vector contribs_on(info.num_row * ngroup * (ncolumns + 1)); - std::vector contribs_diag(info.num_row * ngroup * (ncolumns + 1)); + contribs.resize(info.num_row_ * ngroup * (ncolumns + 1) * (ncolumns + 1)); + std::vector contribs_off(info.num_row_ * ngroup * (ncolumns + 1)); + std::vector contribs_on(info.num_row_ * ngroup * (ncolumns + 1)); + std::vector contribs_diag(info.num_row_ * ngroup * (ncolumns + 1)); // Compute the difference in effects when conditioning on each of the features on and off // see: Axiomatic characterizations of probabilistic and @@ -332,7 +332,7 @@ class CPUPredictor : public Predictor { PredictContribution(p_fmat, &contribs_off, model, ntree_limit, approximate, -1, i); PredictContribution(p_fmat, &contribs_on, model, ntree_limit, approximate, 1, i); - for (size_t j = 0; j < info.num_row; ++j) { + for (size_t j = 0; j < info.num_row_; ++j) { for (int l = 0; l < ngroup; ++l) { const unsigned o_offset = j * row_chunk + l * mrow_chunk + i * (ncolumns + 1); const unsigned c_offset = j * crow_chunk + l * (ncolumns + 1); diff --git a/src/predictor/gpu_predictor.cu b/src/predictor/gpu_predictor.cu index 07469caeb..bc928059c 100644 --- a/src/predictor/gpu_predictor.cu +++ b/src/predictor/gpu_predictor.cu @@ -36,8 +36,8 @@ struct GPUPredictionParam : public dmlc::Parameter { }; DMLC_REGISTER_PARAMETER(GPUPredictionParam); -template -void increment_offset(iter_t begin_itr, iter_t end_itr, size_t amount) { +template +void IncrementOffset(IterT begin_itr, IterT end_itr, size_t amount) { thrust::transform(begin_itr, end_itr, begin_itr, [=] __device__(size_t elem) { return elem + amount; }); } @@ -50,16 +50,16 @@ void increment_offset(iter_t begin_itr, iter_t end_itr, size_t amount) { struct DeviceMatrix { DMatrix* p_mat; // Pointer to the original matrix on the host - dh::bulk_allocator ba; - dh::dvec row_ptr; - dh::dvec data; + dh::BulkAllocator ba; + dh::DVec row_ptr; + dh::DVec data; thrust::device_vector predictions; DeviceMatrix(DMatrix* dmat, int device_idx, bool silent) : p_mat(dmat) { dh::safe_cuda(cudaSetDevice(device_idx)); - auto info = dmat->info(); - ba.allocate(device_idx, silent, &row_ptr, info.num_row + 1, &data, - info.num_nonzero); + auto info = dmat->Info(); + ba.Allocate(device_idx, silent, &row_ptr, info.num_row_ + 1, &data, + info.num_nonzero_); auto iter = dmat->RowIterator(); iter->BeforeFirst(); size_t data_offset = 0; @@ -71,7 +71,7 @@ struct DeviceMatrix { if (batch.base_rowid > 0) { auto begin_itr = row_ptr.tbegin() + batch.base_rowid; auto end_itr = begin_itr + batch.size + 1; - increment_offset(begin_itr, end_itr, batch.base_rowid); + IncrementOffset(begin_itr, end_itr, batch.base_rowid); } // Copy data thrust::copy(batch.data_ptr, batch.data_ptr + batch.ind_ptr[batch.size], @@ -103,17 +103,17 @@ struct DevicePredictionNode { NodeValue val; DevicePredictionNode(const RegTree::Node& n) { // NOLINT - this->left_child_idx = n.cleft(); - this->right_child_idx = n.cright(); - this->fidx = n.split_index(); - if (n.default_left()) { + this->left_child_idx = n.LeftChild(); + this->right_child_idx = n.RightChild(); + this->fidx = n.SplitIndex(); + if (n.DefaultLeft()) { fidx |= (1U << 31); } - if (n.is_leaf()) { - this->val.leaf_weight = n.leaf_value(); + if (n.IsLeaf()) { + this->val.leaf_weight = n.LeafValue(); } else { - this->val.fvalue = n.split_cond(); + this->val.fvalue = n.SplitCond(); } } @@ -155,7 +155,7 @@ struct ElementLoader { if (use_shared) { bst_uint global_idx = blockDim.x * blockIdx.x + threadIdx.x; int shared_elements = blockDim.x * num_features; - dh::block_fill(smem, shared_elements, nanf("")); + dh::BlockFill(smem, shared_elements, nanf("")); __syncthreads(); if (global_idx < num_rows) { bst_uint elem_begin = d_row_ptr[global_idx]; @@ -309,16 +309,16 @@ class GPUPredictor : public xgboost::Predictor { thrust::copy(model.tree_info.begin(), model.tree_info.end(), tree_group.begin()); - device_matrix->predictions.resize(out_preds->size()); + device_matrix->predictions.resize(out_preds->Size()); thrust::copy(out_preds->tbegin(param.gpu_id), out_preds->tend(param.gpu_id), device_matrix->predictions.begin()); const int BLOCK_THREADS = 128; const int GRID_SIZE = static_cast( - dh::div_round_up(device_matrix->row_ptr.size() - 1, BLOCK_THREADS)); + dh::DivRoundUp(device_matrix->row_ptr.Size() - 1, BLOCK_THREADS)); int shared_memory_bytes = static_cast( - sizeof(float) * device_matrix->p_mat->info().num_col * BLOCK_THREADS); + sizeof(float) * device_matrix->p_mat->Info().num_col_ * BLOCK_THREADS); bool use_shared = true; if (shared_memory_bytes > max_shared_memory_bytes) { shared_memory_bytes = 0; @@ -327,11 +327,11 @@ class GPUPredictor : public xgboost::Predictor { PredictKernel <<>>( - dh::raw(nodes), dh::raw(device_matrix->predictions), - dh::raw(tree_segments), dh::raw(tree_group), - device_matrix->row_ptr.data(), device_matrix->data.data(), - tree_begin, tree_end, device_matrix->p_mat->info().num_col, - device_matrix->p_mat->info().num_row, use_shared, + dh::Raw(nodes), dh::Raw(device_matrix->predictions), + dh::Raw(tree_segments), dh::Raw(tree_group), + device_matrix->row_ptr.Data(), device_matrix->data.Data(), + tree_begin, tree_end, device_matrix->p_mat->Info().num_col_, + device_matrix->p_mat->Info().num_row_, use_shared, model.param.num_output_group); dh::safe_cuda(cudaDeviceSynchronize()); @@ -349,7 +349,7 @@ class GPUPredictor : public xgboost::Predictor { if (this->PredictFromCache(dmat, out_preds, model, ntree_limit)) { return; } - this->InitOutPredictions(dmat->info(), out_preds, model); + this->InitOutPredictions(dmat->Info(), out_preds, model); int tree_end = ntree_limit * model.param.num_output_group; @@ -364,11 +364,11 @@ class GPUPredictor : public xgboost::Predictor { void InitOutPredictions(const MetaInfo& info, HostDeviceVector* out_preds, const gbm::GBTreeModel& model) const { - size_t n = model.param.num_output_group * info.num_row; - const std::vector& base_margin = info.base_margin; - out_preds->resize(n, 0.0f, param.gpu_id); + size_t n = model.param.num_output_group * info.num_row_; + const std::vector& base_margin = info.base_margin_; + out_preds->Resize(n, 0.0f, param.gpu_id); if (base_margin.size() != 0) { - CHECK_EQ(out_preds->size(), n); + CHECK_EQ(out_preds->Size(), n); thrust::copy(base_margin.begin(), base_margin.end(), out_preds->tbegin(param.gpu_id)); } else { @@ -384,12 +384,12 @@ class GPUPredictor : public xgboost::Predictor { auto it = cache_.find(dmat); if (it != cache_.end()) { HostDeviceVector& y = it->second.predictions; - if (y.size() != 0) { + if (y.Size() != 0) { dh::safe_cuda(cudaSetDevice(param.gpu_id)); - out_preds->resize(y.size(), 0.0f, param.gpu_id); + out_preds->Resize(y.Size(), 0.0f, param.gpu_id); dh::safe_cuda(cudaMemcpy( - out_preds->ptr_d(param.gpu_id), y.ptr_d(param.gpu_id), - out_preds->size() * sizeof(bst_float), cudaMemcpyDefault)); + out_preds->DevicePointer(param.gpu_id), y.DevicePointer(param.gpu_id), + out_preds->Size() * sizeof(bst_float), cudaMemcpyDefault)); return true; } } @@ -409,9 +409,9 @@ class GPUPredictor : public xgboost::Predictor { DMatrix* dmat = kv.first; HostDeviceVector& predictions = e.predictions; - if (predictions.size() == 0) { + if (predictions.Size() == 0) { // ensure that the device in predictions is correct - predictions.resize(0, 0.0f, param.gpu_id); + predictions.Resize(0, 0.0f, param.gpu_id); cpu_predictor->PredictBatch(dmat, &predictions, model, 0, static_cast(model.trees.size())); } else if (model.param.num_output_group == 1 && updaters->size() > 0 && @@ -462,7 +462,7 @@ class GPUPredictor : public xgboost::Predictor { Predictor::Init(cfg, cache); cpu_predictor->Init(cfg, cache); param.InitAllowUnknown(cfg); - max_shared_memory_bytes = dh::max_shared_memory(param.gpu_id); + max_shared_memory_bytes = dh::MaxSharedMemory(param.gpu_id); } private: diff --git a/src/predictor/predictor.cc b/src/predictor/predictor.cc index a4ea6e82c..e88528287 100644 --- a/src/predictor/predictor.cc +++ b/src/predictor/predictor.cc @@ -11,8 +11,9 @@ namespace xgboost { void Predictor::Init( const std::vector>& cfg, const std::vector>& cache) { - for (const std::shared_ptr& d : cache) + for (const std::shared_ptr& d : cache) { cache_[d.get()].data = d; + } } Predictor* Predictor::Create(std::string name) { auto* e = ::dmlc::Registry::Get()->Find(name); diff --git a/src/tree/fast_hist_param.h b/src/tree/fast_hist_param.h index 5ca9e0b5e..876450991 100644 --- a/src/tree/fast_hist_param.h +++ b/src/tree/fast_hist_param.h @@ -13,7 +13,7 @@ namespace tree { /*! \brief training parameters for histogram-based training */ struct FastHistParam : public dmlc::Parameter { // integral data type to be used with columnar data storage - enum class DataType { uint8 = 1, uint16 = 2, uint32 = 4 }; + enum class DataType { uint8 = 1, uint16 = 2, uint32 = 4 }; // NOLINT int colmat_dtype; // percentage threshold for treating a feature as sparse // e.g. 0.2 indicates a feature with fewer than 20% nonzeros is considered sparse diff --git a/src/tree/param.h b/src/tree/param.h index e25e9d0c2..dc7949b00 100644 --- a/src/tree/param.h +++ b/src/tree/param.h @@ -190,26 +190,26 @@ struct TrainParam : public dmlc::Parameter { DMLC_DECLARE_ALIAS(learning_rate, eta); } /*! \brief whether need forward small to big search: default right */ - inline bool need_forward_search(float col_density, bool indicator) const { + inline bool NeedForwardSearch(float col_density, bool indicator) const { return this->default_direction == 2 || (default_direction == 0 && (col_density < opt_dense_col) && !indicator); } /*! \brief whether need backward big to small search: default left */ - inline bool need_backward_search(float col_density, bool indicator) const { + inline bool NeedBackwardSearch(float col_density, bool indicator) const { return this->default_direction != 2; } /*! \brief given the loss change, whether we need to invoke pruning */ - inline bool need_prune(double loss_chg, int depth) const { + inline bool NeedPrune(double loss_chg, int depth) const { return loss_chg < this->min_split_loss; } /*! \brief whether we can split with current hessian */ - inline bool cannot_split(double sum_hess, int depth) const { + inline bool CannotSplit(double sum_hess, int depth) const { return sum_hess < this->min_child_weight * 2.0; } /*! \brief maximum sketch size */ - inline unsigned max_sketch_size() const { - unsigned ret = static_cast(sketch_ratio / sketch_eps); + inline unsigned MaxSketchSize() const { + auto ret = static_cast(sketch_ratio / sketch_eps); CHECK_GT(ret, 0U); return ret; } @@ -220,10 +220,12 @@ struct TrainParam : public dmlc::Parameter { // functions for L1 cost template XGBOOST_DEVICE inline static T1 ThresholdL1(T1 w, T2 lambda) { - if (w > +lambda) + if (w > +lambda) { return w - lambda; - if (w < -lambda) + } + if (w < -lambda) { return w + lambda; + } return 0.0; } @@ -240,8 +242,9 @@ XGBOOST_DEVICE inline T CalcGainGivenWeight(const TrainingParams &p, T sum_grad, // calculate the cost of loss function template XGBOOST_DEVICE inline T CalcGain(const TrainingParams &p, T sum_grad, T sum_hess) { - if (sum_hess < p.min_child_weight) + if (sum_hess < p.min_child_weight) { return T(0.0); +} if (p.max_delta_step == 0.0f) { if (p.reg_alpha == 0.0f) { return Sqr(sum_grad) / (sum_hess + p.reg_lambda); @@ -276,8 +279,9 @@ XGBOOST_DEVICE inline T CalcGain(const TrainingParams &p, T sum_grad, T sum_hess template XGBOOST_DEVICE inline T CalcWeight(const TrainingParams &p, T sum_grad, T sum_hess) { - if (sum_hess < p.min_child_weight) + if (sum_hess < p.min_child_weight) { return 0.0; +} T dw; if (p.reg_alpha == 0.0f) { dw = -sum_grad / (sum_hess + p.reg_lambda); @@ -285,16 +289,18 @@ XGBOOST_DEVICE inline T CalcWeight(const TrainingParams &p, T sum_grad, dw = -ThresholdL1(sum_grad, p.reg_alpha) / (sum_hess + p.reg_lambda); } if (p.max_delta_step != 0.0f) { - if (dw > p.max_delta_step) + if (dw > p.max_delta_step) { dw = p.max_delta_step; - if (dw < -p.max_delta_step) +} + if (dw < -p.max_delta_step) { dw = -p.max_delta_step; +} } return dw; } -template -XGBOOST_DEVICE inline float CalcWeight(const TrainingParams &p, gpair_t sum_grad) { +template +XGBOOST_DEVICE inline float CalcWeight(const TrainingParams &p, GpairT sum_grad) { return CalcWeight(p, sum_grad.GetGrad(), sum_grad.GetHess()); } @@ -312,8 +318,8 @@ struct XGBOOST_ALIGNAS(16) GradStats { /*! \brief constructor, the object must be cleared during construction */ explicit GradStats(const TrainParam& param) { this->Clear(); } - template - XGBOOST_DEVICE explicit GradStats(const gpair_t &sum) + template + XGBOOST_DEVICE explicit GradStats(const GpairT &sum) : sum_grad(sum.GetGrad()), sum_hess(sum.GetHess()) {} /*! \brief clear the statistics */ inline void Clear() { sum_grad = sum_hess = 0.0f; } @@ -323,26 +329,26 @@ struct XGBOOST_ALIGNAS(16) GradStats { * \brief accumulate statistics * \param p the gradient pair */ - inline void Add(bst_gpair p) { this->Add(p.GetGrad(), p.GetHess()); } + inline void Add(GradientPair p) { this->Add(p.GetGrad(), p.GetHess()); } /*! * \brief accumulate statistics, more complicated version * \param gpair the vector storing the gradient statistics * \param info the additional information * \param ridx instance index of this instance */ - inline void Add(const std::vector& gpair, const MetaInfo& info, + inline void Add(const std::vector& gpair, const MetaInfo& info, bst_uint ridx) { - const bst_gpair& b = gpair[ridx]; + const GradientPair& b = gpair[ridx]; this->Add(b.GetGrad(), b.GetHess()); } /*! \brief calculate leaf weight */ - template - XGBOOST_DEVICE inline double CalcWeight(const param_t ¶m) const { + template + XGBOOST_DEVICE inline double CalcWeight(const ParamT ¶m) const { return xgboost::tree::CalcWeight(param, sum_grad, sum_hess); } /*! \brief calculate gain of the solution */ -template - inline double CalcGain(const param_t& param) const { +template + inline double CalcGain(const ParamT& param) const { return xgboost::tree::CalcGain(param, sum_grad, sum_hess); } /*! \brief add statistics to the data */ @@ -364,7 +370,7 @@ template /*! \brief set leaf vector value based on statistics */ inline void SetLeafVec(const TrainParam& param, bst_float* vec) const {} // constructor to allow inheritance - GradStats() {} + GradStats() = default; /*! \brief add statistics to the data */ inline void Add(double grad, double hess) { sum_grad += grad; @@ -400,8 +406,8 @@ struct ValueConstraint { inline static void Init(TrainParam *param, unsigned num_feature) { param->monotone_constraints.resize(num_feature, 0); } -template - XGBOOST_DEVICE inline double CalcWeight(const param_t ¶m, GradStats stats) const { +template + XGBOOST_DEVICE inline double CalcWeight(const ParamT ¶m, GradStats stats) const { double w = stats.CalcWeight(param); if (w < lower_bound) { return lower_bound; @@ -412,14 +418,14 @@ template return w; } -template - XGBOOST_DEVICE inline double CalcGain(const param_t ¶m, GradStats stats) const { +template + XGBOOST_DEVICE inline double CalcGain(const ParamT ¶m, GradStats stats) const { return CalcGainGivenWeight(param, stats.sum_grad, stats.sum_hess, CalcWeight(param, stats)); } -template - XGBOOST_DEVICE inline double CalcSplitGain(const param_t ¶m, int constraint, +template + XGBOOST_DEVICE inline double CalcSplitGain(const ParamT ¶m, int constraint, GradStats left, GradStats right) const { const double negative_infinity = -std::numeric_limits::infinity(); double wleft = CalcWeight(param, left); @@ -442,8 +448,9 @@ template int c = param.monotone_constraints.at(split_index); *cleft = *this; *cright = *this; - if (c == 0) + if (c == 0) { return; +} double wleft = CalcWeight(param, left); double wright = CalcWeight(param, right); double mid = (wleft + wright) / 2; @@ -464,13 +471,13 @@ template */ struct SplitEntry { /*! \brief loss change after split this node */ - bst_float loss_chg; + bst_float loss_chg{0.0f}; /*! \brief split index */ - unsigned sindex; + unsigned sindex{0}; /*! \brief split value */ - bst_float split_value; + bst_float split_value{0.0f}; /*! \brief constructor */ - SplitEntry() : loss_chg(0.0f), sindex(0), split_value(0.0f) {} + SplitEntry() = default; /*! * \brief decides whether we can replace current entry with the given * statistics @@ -482,7 +489,7 @@ struct SplitEntry { * \param split_index the feature index where the split is on */ inline bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const { - if (this->split_index() <= split_index) { + if (this->SplitIndex() <= split_index) { return new_loss_chg > this->loss_chg; } else { return !(this->loss_chg > new_loss_chg); @@ -494,7 +501,7 @@ struct SplitEntry { * \return whether the proposed split is better and can replace current split */ inline bool Update(const SplitEntry &e) { - if (this->NeedReplace(e.loss_chg, e.split_index())) { + if (this->NeedReplace(e.loss_chg, e.SplitIndex())) { this->loss_chg = e.loss_chg; this->sindex = e.sindex; this->split_value = e.split_value; @@ -515,8 +522,9 @@ struct SplitEntry { bst_float new_split_value, bool default_left) { if (this->NeedReplace(new_loss_chg, split_index)) { this->loss_chg = new_loss_chg; - if (default_left) + if (default_left) { split_index |= (1U << 31); +} this->sindex = split_index; this->split_value = new_split_value; return true; @@ -530,9 +538,9 @@ struct SplitEntry { dst.Update(src); } /*!\return feature index to split on */ - inline unsigned split_index() const { return sindex & ((1U << 31) - 1U); } + inline unsigned SplitIndex() const { return sindex & ((1U << 31) - 1U); } /*!\return whether missing value goes to left branch */ - inline bool default_left() const { return (sindex >> 31) != 0; } + inline bool DefaultLeft() const { return (sindex >> 31) != 0; } }; } // namespace tree @@ -542,14 +550,16 @@ struct SplitEntry { namespace std { inline std::ostream &operator<<(std::ostream &os, const std::vector &t) { os << '('; - for (std::vector::const_iterator it = t.begin(); it != t.end(); ++it) { - if (it != t.begin()) + for (auto it = t.begin(); it != t.end(); ++it) { + if (it != t.begin()) { os << ','; +} os << *it; } // python style tuple - if (t.size() == 1) + if (t.size() == 1) { os << ','; +} os << ')'; return os; } @@ -566,8 +576,9 @@ inline std::istream &operator>>(std::istream &is, std::vector &t) { return is; } is.get(); - if (ch == '(') + if (ch == '(') { break; +} if (!isspace(ch)) { is.setstate(std::ios::failbit); return is; @@ -597,8 +608,9 @@ inline std::istream &operator>>(std::istream &is, std::vector &t) { } break; } - if (ch == ')') + if (ch == ')') { break; +} } else if (ch == ')') { break; } else { diff --git a/src/tree/tree_model.cc b/src/tree/tree_model.cc index c78786725..374d0f949 100644 --- a/src/tree/tree_model.cc +++ b/src/tree/tree_model.cc @@ -21,45 +21,53 @@ void DumpRegTree(std::stringstream& fo, // NOLINT(*) int nid, int depth, int add_comma, bool with_stats, std::string format) { if (format == "json") { - if (add_comma) fo << ","; - if (depth != 0) fo << std::endl; - for (int i = 0; i < depth+1; ++i) fo << " "; + if (add_comma) { + fo << ","; + } + if (depth != 0) { + fo << std::endl; + } + for (int i = 0; i < depth + 1; ++i) { + fo << " "; + } } else { - for (int i = 0; i < depth; ++i) fo << '\t'; + for (int i = 0; i < depth; ++i) { + fo << '\t'; + } } - if (tree[nid].is_leaf()) { + if (tree[nid].IsLeaf()) { if (format == "json") { fo << "{ \"nodeid\": " << nid - << ", \"leaf\": " << tree[nid].leaf_value(); + << ", \"leaf\": " << tree[nid].LeafValue(); if (with_stats) { - fo << ", \"cover\": " << tree.stat(nid).sum_hess; + fo << ", \"cover\": " << tree.Stat(nid).sum_hess; } fo << " }"; } else { - fo << nid << ":leaf=" << tree[nid].leaf_value(); + fo << nid << ":leaf=" << tree[nid].LeafValue(); if (with_stats) { - fo << ",cover=" << tree.stat(nid).sum_hess; + fo << ",cover=" << tree.Stat(nid).sum_hess; } fo << '\n'; } } else { // right then left, - bst_float cond = tree[nid].split_cond(); - const unsigned split_index = tree[nid].split_index(); - if (split_index < fmap.size()) { + bst_float cond = tree[nid].SplitCond(); + const unsigned split_index = tree[nid].SplitIndex(); + if (split_index < fmap.Size()) { switch (fmap.type(split_index)) { case FeatureMap::kIndicator: { - int nyes = tree[nid].default_left() ? - tree[nid].cright() : tree[nid].cleft(); + int nyes = tree[nid].DefaultLeft() ? + tree[nid].RightChild() : tree[nid].LeftChild(); if (format == "json") { fo << "{ \"nodeid\": " << nid << ", \"depth\": " << depth - << ", \"split\": \"" << fmap.name(split_index) << "\"" + << ", \"split\": \"" << fmap.Name(split_index) << "\"" << ", \"yes\": " << nyes - << ", \"no\": " << tree[nid].cdefault(); + << ", \"no\": " << tree[nid].DefaultChild(); } else { - fo << nid << ":[" << fmap.name(split_index) << "] yes=" << nyes - << ",no=" << tree[nid].cdefault(); + fo << nid << ":[" << fmap.Name(split_index) << "] yes=" << nyes + << ",no=" << tree[nid].DefaultChild(); } break; } @@ -67,17 +75,17 @@ void DumpRegTree(std::stringstream& fo, // NOLINT(*) if (format == "json") { fo << "{ \"nodeid\": " << nid << ", \"depth\": " << depth - << ", \"split\": \"" << fmap.name(split_index) << "\"" + << ", \"split\": \"" << fmap.Name(split_index) << "\"" << ", \"split_condition\": " << int(cond + 1.0) - << ", \"yes\": " << tree[nid].cleft() - << ", \"no\": " << tree[nid].cright() - << ", \"missing\": " << tree[nid].cdefault(); + << ", \"yes\": " << tree[nid].LeftChild() + << ", \"no\": " << tree[nid].RightChild() + << ", \"missing\": " << tree[nid].DefaultChild(); } else { - fo << nid << ":[" << fmap.name(split_index) << "<" + fo << nid << ":[" << fmap.Name(split_index) << "<" << int(cond + 1.0) - << "] yes=" << tree[nid].cleft() - << ",no=" << tree[nid].cright() - << ",missing=" << tree[nid].cdefault(); + << "] yes=" << tree[nid].LeftChild() + << ",no=" << tree[nid].RightChild() + << ",missing=" << tree[nid].DefaultChild(); } break; } @@ -86,16 +94,16 @@ void DumpRegTree(std::stringstream& fo, // NOLINT(*) if (format == "json") { fo << "{ \"nodeid\": " << nid << ", \"depth\": " << depth - << ", \"split\": \"" << fmap.name(split_index) << "\"" + << ", \"split\": \"" << fmap.Name(split_index) << "\"" << ", \"split_condition\": " << cond - << ", \"yes\": " << tree[nid].cleft() - << ", \"no\": " << tree[nid].cright() - << ", \"missing\": " << tree[nid].cdefault(); + << ", \"yes\": " << tree[nid].LeftChild() + << ", \"no\": " << tree[nid].RightChild() + << ", \"missing\": " << tree[nid].DefaultChild(); } else { - fo << nid << ":[" << fmap.name(split_index) << "<" << cond - << "] yes=" << tree[nid].cleft() - << ",no=" << tree[nid].cright() - << ",missing=" << tree[nid].cdefault(); + fo << nid << ":[" << fmap.Name(split_index) << "<" << cond + << "] yes=" << tree[nid].LeftChild() + << ",no=" << tree[nid].RightChild() + << ",missing=" << tree[nid].DefaultChild(); } break; } @@ -107,22 +115,22 @@ void DumpRegTree(std::stringstream& fo, // NOLINT(*) << ", \"depth\": " << depth << ", \"split\": " << split_index << ", \"split_condition\": " << cond - << ", \"yes\": " << tree[nid].cleft() - << ", \"no\": " << tree[nid].cright() - << ", \"missing\": " << tree[nid].cdefault(); + << ", \"yes\": " << tree[nid].LeftChild() + << ", \"no\": " << tree[nid].RightChild() + << ", \"missing\": " << tree[nid].DefaultChild(); } else { fo << nid << ":[f" << split_index << "<"<< cond - << "] yes=" << tree[nid].cleft() - << ",no=" << tree[nid].cright() - << ",missing=" << tree[nid].cdefault(); + << "] yes=" << tree[nid].LeftChild() + << ",no=" << tree[nid].RightChild() + << ",missing=" << tree[nid].DefaultChild(); } } if (with_stats) { if (format == "json") { - fo << ", \"gain\": " << tree.stat(nid).loss_chg - << ", \"cover\": " << tree.stat(nid).sum_hess; + fo << ", \"gain\": " << tree.Stat(nid).loss_chg + << ", \"cover\": " << tree.Stat(nid).sum_hess; } else { - fo << ",gain=" << tree.stat(nid).loss_chg << ",cover=" << tree.stat(nid).sum_hess; + fo << ",gain=" << tree.Stat(nid).loss_chg << ",cover=" << tree.Stat(nid).sum_hess; } } if (format == "json") { @@ -130,11 +138,13 @@ void DumpRegTree(std::stringstream& fo, // NOLINT(*) } else { fo << '\n'; } - DumpRegTree(fo, tree, fmap, tree[nid].cleft(), depth + 1, false, with_stats, format); - DumpRegTree(fo, tree, fmap, tree[nid].cright(), depth + 1, true, with_stats, format); + DumpRegTree(fo, tree, fmap, tree[nid].LeftChild(), depth + 1, false, with_stats, format); + DumpRegTree(fo, tree, fmap, tree[nid].RightChild(), depth + 1, true, with_stats, format); if (format == "json") { fo << std::endl; - for (int i = 0; i < depth+1; ++i) fo << " "; + for (int i = 0; i < depth + 1; ++i) { + fo << " "; + } fo << "]}"; } } diff --git a/src/tree/updater_basemaker-inl.h b/src/tree/updater_basemaker-inl.h index 9f4ae48ea..c0c7f525e 100644 --- a/src/tree/updater_basemaker-inl.h +++ b/src/tree/updater_basemaker-inl.h @@ -29,7 +29,7 @@ namespace tree { class BaseMaker: public TreeUpdater { public: void Init(const std::vector >& args) override { - param.InitAllowUnknown(args); + param_.InitAllowUnknown(args); } protected: @@ -39,8 +39,8 @@ class BaseMaker: public TreeUpdater { /*! \brief find type of each feature, use column format */ inline void InitByCol(DMatrix* p_fmat, const RegTree& tree) { - fminmax.resize(tree.param.num_feature * 2); - std::fill(fminmax.begin(), fminmax.end(), + fminmax_.resize(tree.param.num_feature * 2); + std::fill(fminmax_.begin(), fminmax_.end(), -std::numeric_limits::max()); // start accumulating statistics dmlc::DataIter* iter = p_fmat->ColIterator(); @@ -51,22 +51,22 @@ class BaseMaker: public TreeUpdater { const bst_uint fid = batch.col_index[i]; const ColBatch::Inst& c = batch[i]; if (c.length != 0) { - fminmax[fid * 2 + 0] = std::max(-c[0].fvalue, fminmax[fid * 2 + 0]); - fminmax[fid * 2 + 1] = std::max(c[c.length - 1].fvalue, fminmax[fid * 2 + 1]); + fminmax_[fid * 2 + 0] = std::max(-c[0].fvalue, fminmax_[fid * 2 + 0]); + fminmax_[fid * 2 + 1] = std::max(c[c.length - 1].fvalue, fminmax_[fid * 2 + 1]); } } } } /*! \brief synchronize the information */ inline void SyncInfo() { - rabit::Allreduce(dmlc::BeginPtr(fminmax), fminmax.size()); + rabit::Allreduce(dmlc::BeginPtr(fminmax_), fminmax_.size()); } // get feature type, 0:empty 1:binary 2:real inline int Type(bst_uint fid) const { - CHECK_LT(fid * 2 + 1, fminmax.size()) + CHECK_LT(fid * 2 + 1, fminmax_.size()) << "FeatHelper fid exceed query bound "; - bst_float a = fminmax[fid * 2]; - bst_float b = fminmax[fid * 2 + 1]; + bst_float a = fminmax_[fid * 2]; + bst_float b = fminmax_[fid * 2 + 1]; if (a == -std::numeric_limits::max()) return 0; if (-a == b) { return 1; @@ -75,16 +75,16 @@ class BaseMaker: public TreeUpdater { } } inline bst_float MaxValue(bst_uint fid) const { - return fminmax[fid *2 + 1]; + return fminmax_[fid *2 + 1]; } inline void SampleCol(float p, std::vector *p_findex) const { std::vector &findex = *p_findex; findex.clear(); - for (size_t i = 0; i < fminmax.size(); i += 2) { - const bst_uint fid = static_cast(i / 2); + for (size_t i = 0; i < fminmax_.size(); i += 2) { + const auto fid = static_cast(i / 2); if (this->Type(fid) != 0) findex.push_back(fid); } - unsigned n = static_cast(p * findex.size()); + auto n = static_cast(p * findex.size()); std::shuffle(findex.begin(), findex.end(), common::GlobalRandom()); findex.resize(n); // sync the findex if it is subsample @@ -99,64 +99,64 @@ class BaseMaker: public TreeUpdater { } private: - std::vector fminmax; + std::vector fminmax_; }; // ------static helper functions ------ // helper function to get to next level of the tree /*! \brief this is helper function for row based data*/ inline static int NextLevel(const RowBatch::Inst &inst, const RegTree &tree, int nid) { const RegTree::Node &n = tree[nid]; - bst_uint findex = n.split_index(); + bst_uint findex = n.SplitIndex(); for (unsigned i = 0; i < inst.length; ++i) { if (findex == inst[i].index) { - if (inst[i].fvalue < n.split_cond()) { - return n.cleft(); + if (inst[i].fvalue < n.SplitCond()) { + return n.LeftChild(); } else { - return n.cright(); + return n.RightChild(); } } } - return n.cdefault(); + return n.DefaultChild(); } // ------class member helpers--------- /*! \brief initialize temp data structure */ - inline void InitData(const std::vector &gpair, + inline void InitData(const std::vector &gpair, const DMatrix &fmat, const RegTree &tree) { CHECK_EQ(tree.param.num_nodes, tree.param.num_roots) << "TreeMaker: can only grow new tree"; - const std::vector &root_index = fmat.info().root_index; + const std::vector &root_index = fmat.Info().root_index_; { // setup position - position.resize(gpair.size()); + position_.resize(gpair.size()); if (root_index.size() == 0) { - std::fill(position.begin(), position.end(), 0); + std::fill(position_.begin(), position_.end(), 0); } else { - for (size_t i = 0; i < position.size(); ++i) { - position[i] = root_index[i]; + for (size_t i = 0; i < position_.size(); ++i) { + position_[i] = root_index[i]; CHECK_LT(root_index[i], (unsigned)tree.param.num_roots) << "root index exceed setting"; } } // mark delete for the deleted datas - for (size_t i = 0; i < position.size(); ++i) { - if (gpair[i].GetHess() < 0.0f) position[i] = ~position[i]; + for (size_t i = 0; i < position_.size(); ++i) { + if (gpair[i].GetHess() < 0.0f) position_[i] = ~position_[i]; } // mark subsample - if (param.subsample < 1.0f) { - std::bernoulli_distribution coin_flip(param.subsample); + if (param_.subsample < 1.0f) { + std::bernoulli_distribution coin_flip(param_.subsample); auto& rnd = common::GlobalRandom(); - for (size_t i = 0; i < position.size(); ++i) { + for (size_t i = 0; i < position_.size(); ++i) { if (gpair[i].GetHess() < 0.0f) continue; - if (!coin_flip(rnd)) position[i] = ~position[i]; + if (!coin_flip(rnd)) position_[i] = ~position_[i]; } } } { // expand query - qexpand.reserve(256); qexpand.clear(); + qexpand_.reserve(256); qexpand_.clear(); for (int i = 0; i < tree.param.num_roots; ++i) { - qexpand.push_back(i); + qexpand_.push_back(i); } this->UpdateNode2WorkIndex(tree); } @@ -164,28 +164,27 @@ class BaseMaker: public TreeUpdater { /*! \brief update queue expand add in new leaves */ inline void UpdateQueueExpand(const RegTree &tree) { std::vector newnodes; - for (size_t i = 0; i < qexpand.size(); ++i) { - const int nid = qexpand[i]; - if (!tree[nid].is_leaf()) { - newnodes.push_back(tree[nid].cleft()); - newnodes.push_back(tree[nid].cright()); + for (int nid : qexpand_) { + if (!tree[nid].IsLeaf()) { + newnodes.push_back(tree[nid].LeftChild()); + newnodes.push_back(tree[nid].RightChild()); } } // use new nodes for qexpand - qexpand = newnodes; + qexpand_ = newnodes; this->UpdateNode2WorkIndex(tree); } // return decoded position inline int DecodePosition(bst_uint ridx) const { - const int pid = position[ridx]; + const int pid = position_[ridx]; return pid < 0 ? ~pid : pid; } // encode the encoded position value for ridx inline void SetEncodePosition(bst_uint ridx, int nid) { - if (position[ridx] < 0) { - position[ridx] = ~nid; + if (position_[ridx] < 0) { + position_[ridx] = ~nid; } else { - position[ridx] = nid; + position_[ridx] = nid; } } /*! @@ -211,27 +210,27 @@ class BaseMaker: public TreeUpdater { inline void SetDefaultPostion(DMatrix *p_fmat, const RegTree &tree) { // set rest of instances to default position - const RowSet &rowset = p_fmat->buffered_rowset(); + const RowSet &rowset = p_fmat->BufferedRowset(); // set default direct nodes to default // for leaf nodes that are not fresh, mark then to ~nid, // so that they are ignored in future statistics collection - const bst_omp_uint ndata = static_cast(rowset.size()); + const auto ndata = static_cast(rowset.Size()); #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < ndata; ++i) { const bst_uint ridx = rowset[i]; const int nid = this->DecodePosition(ridx); - if (tree[nid].is_leaf()) { + if (tree[nid].IsLeaf()) { // mark finish when it is not a fresh leaf - if (tree[nid].cright() == -1) { - position[ridx] = ~nid; + if (tree[nid].RightChild() == -1) { + position_[ridx] = ~nid; } } else { // push to default branch - if (tree[nid].default_left()) { - this->SetEncodePosition(ridx, tree[nid].cleft()); + if (tree[nid].DefaultLeft()) { + this->SetEncodePosition(ridx, tree[nid].LeftChild()); } else { - this->SetEncodePosition(ridx, tree[nid].cright()); + this->SetEncodePosition(ridx, tree[nid].RightChild()); } } } @@ -254,21 +253,21 @@ class BaseMaker: public TreeUpdater { auto it = std::lower_bound(sorted_split_set.begin(), sorted_split_set.end(), fid); if (it != sorted_split_set.end() && *it == fid) { - const bst_omp_uint ndata = static_cast(col.length); + const auto ndata = static_cast(col.length); #pragma omp parallel for schedule(static) for (bst_omp_uint j = 0; j < ndata; ++j) { const bst_uint ridx = col[j].index; const bst_float fvalue = col[j].fvalue; const int nid = this->DecodePosition(ridx); - CHECK(tree[nid].is_leaf()); - int pid = tree[nid].parent(); + CHECK(tree[nid].IsLeaf()); + int pid = tree[nid].Parent(); // go back to parent, correct those who are not default - if (!tree[nid].is_root() && tree[pid].split_index() == fid) { - if (fvalue < tree[pid].split_cond()) { - this->SetEncodePosition(ridx, tree[pid].cleft()); + if (!tree[nid].IsRoot() && tree[pid].SplitIndex() == fid) { + if (fvalue < tree[pid].SplitCond()) { + this->SetEncodePosition(ridx, tree[pid].LeftChild()); } else { - this->SetEncodePosition(ridx, tree[pid].cright()); + this->SetEncodePosition(ridx, tree[pid].RightChild()); } } } @@ -287,10 +286,9 @@ class BaseMaker: public TreeUpdater { std::vector& fsplits = *out_split_set; fsplits.clear(); // step 1, classify the non-default data into right places - for (size_t i = 0; i < nodes.size(); ++i) { - const int nid = nodes[i]; - if (!tree[nid].is_leaf()) { - fsplits.push_back(tree[nid].split_index()); + for (int nid : nodes) { + if (!tree[nid].IsLeaf()) { + fsplits.push_back(tree[nid].SplitIndex()); } } std::sort(fsplits.begin(), fsplits.end()); @@ -314,18 +312,18 @@ class BaseMaker: public TreeUpdater { for (size_t i = 0; i < batch.size; ++i) { ColBatch::Inst col = batch[i]; const bst_uint fid = batch.col_index[i]; - const bst_omp_uint ndata = static_cast(col.length); + const auto ndata = static_cast(col.length); #pragma omp parallel for schedule(static) for (bst_omp_uint j = 0; j < ndata; ++j) { const bst_uint ridx = col[j].index; const bst_float fvalue = col[j].fvalue; const int nid = this->DecodePosition(ridx); // go back to parent, correct those who are not default - if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) { - if (fvalue < tree[nid].split_cond()) { - this->SetEncodePosition(ridx, tree[nid].cleft()); + if (!tree[nid].IsLeaf() && tree[nid].SplitIndex() == fid) { + if (fvalue < tree[nid].SplitCond()) { + this->SetEncodePosition(ridx, tree[nid].LeftChild()); } else { - this->SetEncodePosition(ridx, tree[nid].cright()); + this->SetEncodePosition(ridx, tree[nid].RightChild()); } } } @@ -334,39 +332,37 @@ class BaseMaker: public TreeUpdater { } /*! \brief helper function to get statistics from a tree */ template - inline void GetNodeStats(const std::vector &gpair, + inline void GetNodeStats(const std::vector &gpair, const DMatrix &fmat, const RegTree &tree, std::vector< std::vector > *p_thread_temp, std::vector *p_node_stats) { std::vector< std::vector > &thread_temp = *p_thread_temp; - const MetaInfo &info = fmat.info(); + const MetaInfo &info = fmat.Info(); thread_temp.resize(omp_get_max_threads()); p_node_stats->resize(tree.param.num_nodes); #pragma omp parallel { const int tid = omp_get_thread_num(); - thread_temp[tid].resize(tree.param.num_nodes, TStats(param)); - for (size_t i = 0; i < qexpand.size(); ++i) { - const unsigned nid = qexpand[i]; + thread_temp[tid].resize(tree.param.num_nodes, TStats(param_)); + for (unsigned int nid : qexpand_) { thread_temp[tid][nid].Clear(); } } - const RowSet &rowset = fmat.buffered_rowset(); + const RowSet &rowset = fmat.BufferedRowset(); // setup position - const bst_omp_uint ndata = static_cast(rowset.size()); + const auto ndata = static_cast(rowset.Size()); #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < ndata; ++i) { const bst_uint ridx = rowset[i]; - const int nid = position[ridx]; + const int nid = position_[ridx]; const int tid = omp_get_thread_num(); if (nid >= 0) { thread_temp[tid][nid].Add(gpair, info, ridx); } } // sum the per thread statistics together - for (size_t j = 0; j < qexpand.size(); ++j) { - const int nid = qexpand[j]; + for (int nid : qexpand_) { TStats &s = (*p_node_stats)[nid]; s.Clear(); for (size_t tid = 0; tid < thread_temp.size(); ++tid) { @@ -461,28 +457,28 @@ class BaseMaker: public TreeUpdater { } }; /*! \brief training parameter of tree grower */ - TrainParam param; + TrainParam param_; /*! \brief queue of nodes to be expanded */ - std::vector qexpand; + std::vector qexpand_; /*! * \brief map active node to is working index offset in qexpand, * can be -1, which means the node is node actively expanding */ - std::vector node2workindex; + std::vector node2workindex_; /*! * \brief position of each instance in the tree * can be negative, which means this position is no longer expanding * see also Decode/EncodePosition */ - std::vector position; + std::vector position_; private: inline void UpdateNode2WorkIndex(const RegTree &tree) { // update the node2workindex - std::fill(node2workindex.begin(), node2workindex.end(), -1); - node2workindex.resize(tree.param.num_nodes); - for (size_t i = 0; i < qexpand.size(); ++i) { - node2workindex[qexpand[i]] = static_cast(i); + std::fill(node2workindex_.begin(), node2workindex_.end(), -1); + node2workindex_.resize(tree.param.num_nodes); + for (size_t i = 0; i < qexpand_.size(); ++i) { + node2workindex_[qexpand_[i]] = static_cast(i); } } }; diff --git a/src/tree/updater_colmaker.cc b/src/tree/updater_colmaker.cc index 5d687f2c4..fab5eb707 100644 --- a/src/tree/updater_colmaker.cc +++ b/src/tree/updater_colmaker.cc @@ -5,6 +5,7 @@ * \author Tianqi Chen */ #include +#include #include #include #include @@ -23,28 +24,28 @@ template class ColMaker: public TreeUpdater { public: void Init(const std::vector >& args) override { - param.InitAllowUnknown(args); + param_.InitAllowUnknown(args); } - void Update(HostDeviceVector *gpair, + void Update(HostDeviceVector *gpair, DMatrix* dmat, const std::vector &trees) override { - TStats::CheckInfo(dmat->info()); + TStats::CheckInfo(dmat->Info()); // rescale learning rate according to size of trees - float lr = param.learning_rate; - param.learning_rate = lr / trees.size(); - TConstraint::Init(¶m, dmat->info().num_col); + float lr = param_.learning_rate; + param_.learning_rate = lr / trees.size(); + TConstraint::Init(¶m_, dmat->Info().num_col_); // build tree - for (size_t i = 0; i < trees.size(); ++i) { - Builder builder(param); - builder.Update(gpair->data_h(), dmat, trees[i]); + for (auto tree : trees) { + Builder builder(param_); + builder.Update(gpair->HostVector(), dmat, tree); } - param.learning_rate = lr; + param_.learning_rate = lr; } protected: // training parameter - TrainParam param; + TrainParam param_; // data structure /*! \brief per thread x per node entry to store tmp data */ struct ThreadEntry { @@ -78,17 +79,17 @@ class ColMaker: public TreeUpdater { } }; // actual builder that runs the algorithm - struct Builder { + class Builder { public: // constructor - explicit Builder(const TrainParam& param) : param(param), nthread(omp_get_max_threads()) {} + explicit Builder(const TrainParam& param) : param_(param), nthread_(omp_get_max_threads()) {} // update one tree, growing - virtual void Update(const std::vector& gpair, + virtual void Update(const std::vector& gpair, DMatrix* p_fmat, RegTree* p_tree) { this->InitData(gpair, *p_fmat, *p_tree); this->InitNewNode(qexpand_, gpair, *p_fmat, *p_tree); - for (int depth = 0; depth < param.max_depth; ++depth) { + for (int depth = 0; depth < param_.max_depth; ++depth) { this->FindSplit(depth, qexpand_, gpair, p_fmat, p_tree); this->ResetPosition(qexpand_, p_fmat, *p_tree); this->UpdateQueueExpand(*p_tree, &qexpand_); @@ -99,80 +100,80 @@ class ColMaker: public TreeUpdater { // set all the rest expanding nodes to leaf for (size_t i = 0; i < qexpand_.size(); ++i) { const int nid = qexpand_[i]; - (*p_tree)[nid].set_leaf(snode[nid].weight * param.learning_rate); + (*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate); } // remember auxiliary statistics in the tree node for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) { - p_tree->stat(nid).loss_chg = snode[nid].best.loss_chg; - p_tree->stat(nid).base_weight = snode[nid].weight; - p_tree->stat(nid).sum_hess = static_cast(snode[nid].stats.sum_hess); - snode[nid].stats.SetLeafVec(param, p_tree->leafvec(nid)); + p_tree->Stat(nid).loss_chg = snode_[nid].best.loss_chg; + p_tree->Stat(nid).base_weight = snode_[nid].weight; + p_tree->Stat(nid).sum_hess = static_cast(snode_[nid].stats.sum_hess); + snode_[nid].stats.SetLeafVec(param_, p_tree->Leafvec(nid)); } } protected: // initialize temp data structure - inline void InitData(const std::vector& gpair, + inline void InitData(const std::vector& gpair, const DMatrix& fmat, const RegTree& tree) { CHECK_EQ(tree.param.num_nodes, tree.param.num_roots) << "ColMaker: can only grow new tree"; - const std::vector& root_index = fmat.info().root_index; - const RowSet& rowset = fmat.buffered_rowset(); + const std::vector& root_index = fmat.Info().root_index_; + const RowSet& rowset = fmat.BufferedRowset(); { // setup position - position.resize(gpair.size()); + position_.resize(gpair.size()); if (root_index.size() == 0) { - for (size_t i = 0; i < rowset.size(); ++i) { - position[rowset[i]] = 0; + for (size_t i = 0; i < rowset.Size(); ++i) { + position_[rowset[i]] = 0; } } else { - for (size_t i = 0; i < rowset.size(); ++i) { + for (size_t i = 0; i < rowset.Size(); ++i) { const bst_uint ridx = rowset[i]; - position[ridx] = root_index[ridx]; + position_[ridx] = root_index[ridx]; CHECK_LT(root_index[ridx], (unsigned)tree.param.num_roots); } } // mark delete for the deleted datas - for (size_t i = 0; i < rowset.size(); ++i) { + for (size_t i = 0; i < rowset.Size(); ++i) { const bst_uint ridx = rowset[i]; - if (gpair[ridx].GetHess() < 0.0f) position[ridx] = ~position[ridx]; + if (gpair[ridx].GetHess() < 0.0f) position_[ridx] = ~position_[ridx]; } // mark subsample - if (param.subsample < 1.0f) { - std::bernoulli_distribution coin_flip(param.subsample); + if (param_.subsample < 1.0f) { + std::bernoulli_distribution coin_flip(param_.subsample); auto& rnd = common::GlobalRandom(); - for (size_t i = 0; i < rowset.size(); ++i) { + for (size_t i = 0; i < rowset.Size(); ++i) { const bst_uint ridx = rowset[i]; if (gpair[ridx].GetHess() < 0.0f) continue; - if (!coin_flip(rnd)) position[ridx] = ~position[ridx]; + if (!coin_flip(rnd)) position_[ridx] = ~position_[ridx]; } } } { // initialize feature index - unsigned ncol = static_cast(fmat.info().num_col); + auto ncol = static_cast(fmat.Info().num_col_); for (unsigned i = 0; i < ncol; ++i) { if (fmat.GetColSize(i) != 0) { - feat_index.push_back(i); + feat_index_.push_back(i); } } unsigned n = std::max(static_cast(1), - static_cast(param.colsample_bytree * feat_index.size())); - std::shuffle(feat_index.begin(), feat_index.end(), common::GlobalRandom()); - CHECK_GT(param.colsample_bytree, 0U) + static_cast(param_.colsample_bytree * feat_index_.size())); + std::shuffle(feat_index_.begin(), feat_index_.end(), common::GlobalRandom()); + CHECK_GT(param_.colsample_bytree, 0U) << "colsample_bytree cannot be zero."; - feat_index.resize(n); + feat_index_.resize(n); } { // setup temp space for each thread // reserve a small space - stemp.clear(); - stemp.resize(this->nthread, std::vector()); - for (size_t i = 0; i < stemp.size(); ++i) { - stemp[i].clear(); stemp[i].reserve(256); + stemp_.clear(); + stemp_.resize(this->nthread_, std::vector()); + for (size_t i = 0; i < stemp_.size(); ++i) { + stemp_[i].clear(); stemp_[i].reserve(256); } - snode.reserve(256); + snode_.reserve(256); } { // expand query @@ -187,67 +188,63 @@ class ColMaker: public TreeUpdater { * and NodeEntry for all the new nodes in qexpand */ inline void InitNewNode(const std::vector& qexpand, - const std::vector& gpair, + const std::vector& gpair, const DMatrix& fmat, const RegTree& tree) { { // setup statistics space for each tree node - for (size_t i = 0; i < stemp.size(); ++i) { - stemp[i].resize(tree.param.num_nodes, ThreadEntry(param)); + for (size_t i = 0; i < stemp_.size(); ++i) { + stemp_[i].resize(tree.param.num_nodes, ThreadEntry(param_)); } - snode.resize(tree.param.num_nodes, NodeEntry(param)); + snode_.resize(tree.param.num_nodes, NodeEntry(param_)); constraints_.resize(tree.param.num_nodes); } - const RowSet &rowset = fmat.buffered_rowset(); - const MetaInfo& info = fmat.info(); + const RowSet &rowset = fmat.BufferedRowset(); + const MetaInfo& info = fmat.Info(); // setup position - const bst_omp_uint ndata = static_cast(rowset.size()); + const auto ndata = static_cast(rowset.Size()); #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < ndata; ++i) { const bst_uint ridx = rowset[i]; const int tid = omp_get_thread_num(); - if (position[ridx] < 0) continue; - stemp[tid][position[ridx]].stats.Add(gpair, info, ridx); + if (position_[ridx] < 0) continue; + stemp_[tid][position_[ridx]].stats.Add(gpair, info, ridx); } // sum the per thread statistics together - for (size_t j = 0; j < qexpand.size(); ++j) { - const int nid = qexpand[j]; - TStats stats(param); - for (size_t tid = 0; tid < stemp.size(); ++tid) { - stats.Add(stemp[tid][nid].stats); + for (int nid : qexpand) { + TStats stats(param_); + for (size_t tid = 0; tid < stemp_.size(); ++tid) { + stats.Add(stemp_[tid][nid].stats); } // update node statistics - snode[nid].stats = stats; + snode_[nid].stats = stats; } // setup constraints before calculating the weight - for (size_t j = 0; j < qexpand.size(); ++j) { - const int nid = qexpand[j]; - if (tree[nid].is_root()) continue; - const int pid = tree[nid].parent(); - constraints_[pid].SetChild(param, tree[pid].split_index(), - snode[tree[pid].cleft()].stats, - snode[tree[pid].cright()].stats, - &constraints_[tree[pid].cleft()], - &constraints_[tree[pid].cright()]); + for (int nid : qexpand) { + if (tree[nid].IsRoot()) continue; + const int pid = tree[nid].Parent(); + constraints_[pid].SetChild(param_, tree[pid].SplitIndex(), + snode_[tree[pid].LeftChild()].stats, + snode_[tree[pid].RightChild()].stats, + &constraints_[tree[pid].LeftChild()], + &constraints_[tree[pid].RightChild()]); } // calculating the weights - for (size_t j = 0; j < qexpand.size(); ++j) { - const int nid = qexpand[j]; - snode[nid].root_gain = static_cast( - constraints_[nid].CalcGain(param, snode[nid].stats)); - snode[nid].weight = static_cast( - constraints_[nid].CalcWeight(param, snode[nid].stats)); + for (int nid : qexpand) { + snode_[nid].root_gain = static_cast( + constraints_[nid].CalcGain(param_, snode_[nid].stats)); + snode_[nid].weight = static_cast( + constraints_[nid].CalcWeight(param_, snode_[nid].stats)); } } /*! \brief update queue expand add in new leaves */ inline void UpdateQueueExpand(const RegTree& tree, std::vector* p_qexpand) { std::vector &qexpand = *p_qexpand; std::vector newnodes; - for (size_t i = 0; i < qexpand.size(); ++i) { - const int nid = qexpand[i]; - if (!tree[ nid ].is_leaf()) { - newnodes.push_back(tree[nid].cleft()); - newnodes.push_back(tree[nid].cright()); + for (int nid : qexpand) { + if (!tree[ nid ].IsLeaf()) { + newnodes.push_back(tree[nid].LeftChild()); + newnodes.push_back(tree[nid].RightChild()); } } // use new nodes for qexpand @@ -258,26 +255,26 @@ class ColMaker: public TreeUpdater { inline void ParallelFindSplit(const ColBatch::Inst &col, bst_uint fid, const DMatrix &fmat, - const std::vector &gpair) { + const std::vector &gpair) { // TODO(tqchen): double check stats order. - const MetaInfo& info = fmat.info(); + const MetaInfo& info = fmat.Info(); const bool ind = col.length != 0 && col.data[0].fvalue == col.data[col.length - 1].fvalue; - bool need_forward = param.need_forward_search(fmat.GetColDensity(fid), ind); - bool need_backward = param.need_backward_search(fmat.GetColDensity(fid), ind); + bool need_forward = param_.NeedForwardSearch(fmat.GetColDensity(fid), ind); + bool need_backward = param_.NeedBackwardSearch(fmat.GetColDensity(fid), ind); const std::vector &qexpand = qexpand_; #pragma omp parallel { const int tid = omp_get_thread_num(); - std::vector &temp = stemp[tid]; + std::vector &temp = stemp_[tid]; // cleanup temp statistics - for (size_t j = 0; j < qexpand.size(); ++j) { - temp[qexpand[j]].stats.Clear(); + for (int j : qexpand) { + temp[j].stats.Clear(); } - bst_uint step = (col.length + this->nthread - 1) / this->nthread; + bst_uint step = (col.length + this->nthread_ - 1) / this->nthread_; bst_uint end = std::min(col.length, step * (tid + 1)); for (bst_uint i = tid * step; i < end; ++i) { const bst_uint ridx = col[i].index; - const int nid = position[ridx]; + const int nid = position_[ridx]; if (nid < 0) continue; const bst_float fvalue = col[i].fvalue; if (temp[nid].stats.Empty()) { @@ -288,81 +285,81 @@ class ColMaker: public TreeUpdater { } } // start collecting the partial sum statistics - bst_omp_uint nnode = static_cast(qexpand.size()); + auto nnode = static_cast(qexpand.size()); #pragma omp parallel for schedule(static) for (bst_omp_uint j = 0; j < nnode; ++j) { const int nid = qexpand[j]; - TStats sum(param), tmp(param), c(param); - for (int tid = 0; tid < this->nthread; ++tid) { - tmp = stemp[tid][nid].stats; - stemp[tid][nid].stats = sum; + TStats sum(param_), tmp(param_), c(param_); + for (int tid = 0; tid < this->nthread_; ++tid) { + tmp = stemp_[tid][nid].stats; + stemp_[tid][nid].stats = sum; sum.Add(tmp); if (tid != 0) { - std::swap(stemp[tid - 1][nid].last_fvalue, stemp[tid][nid].first_fvalue); + std::swap(stemp_[tid - 1][nid].last_fvalue, stemp_[tid][nid].first_fvalue); } } - for (int tid = 0; tid < this->nthread; ++tid) { - stemp[tid][nid].stats_extra = sum; - ThreadEntry &e = stemp[tid][nid]; + for (int tid = 0; tid < this->nthread_; ++tid) { + stemp_[tid][nid].stats_extra = sum; + ThreadEntry &e = stemp_[tid][nid]; bst_float fsplit; if (tid != 0) { - if (stemp[tid - 1][nid].last_fvalue != e.first_fvalue) { - fsplit = (stemp[tid - 1][nid].last_fvalue + e.first_fvalue) * 0.5f; + if (stemp_[tid - 1][nid].last_fvalue != e.first_fvalue) { + fsplit = (stemp_[tid - 1][nid].last_fvalue + e.first_fvalue) * 0.5f; } else { continue; } } else { - fsplit = e.first_fvalue - rt_eps; + fsplit = e.first_fvalue - kRtEps; } if (need_forward && tid != 0) { - c.SetSubstract(snode[nid].stats, e.stats); - if (c.sum_hess >= param.min_child_weight && - e.stats.sum_hess >= param.min_child_weight) { - bst_float loss_chg = static_cast( + c.SetSubstract(snode_[nid].stats, e.stats); + if (c.sum_hess >= param_.min_child_weight && + e.stats.sum_hess >= param_.min_child_weight) { + auto loss_chg = static_cast( constraints_[nid].CalcSplitGain( - param, param.monotone_constraints[fid], e.stats, c) - - snode[nid].root_gain); + param_, param_.monotone_constraints[fid], e.stats, c) - + snode_[nid].root_gain); e.best.Update(loss_chg, fid, fsplit, false); } } if (need_backward) { tmp.SetSubstract(sum, e.stats); - c.SetSubstract(snode[nid].stats, tmp); - if (c.sum_hess >= param.min_child_weight && - tmp.sum_hess >= param.min_child_weight) { - bst_float loss_chg = static_cast( + c.SetSubstract(snode_[nid].stats, tmp); + if (c.sum_hess >= param_.min_child_weight && + tmp.sum_hess >= param_.min_child_weight) { + auto loss_chg = static_cast( constraints_[nid].CalcSplitGain( - param, param.monotone_constraints[fid], tmp, c) - - snode[nid].root_gain); + param_, param_.monotone_constraints[fid], tmp, c) - + snode_[nid].root_gain); e.best.Update(loss_chg, fid, fsplit, true); } } } if (need_backward) { tmp = sum; - ThreadEntry &e = stemp[this->nthread-1][nid]; - c.SetSubstract(snode[nid].stats, tmp); - if (c.sum_hess >= param.min_child_weight && - tmp.sum_hess >= param.min_child_weight) { - bst_float loss_chg = static_cast( + ThreadEntry &e = stemp_[this->nthread_-1][nid]; + c.SetSubstract(snode_[nid].stats, tmp); + if (c.sum_hess >= param_.min_child_weight && + tmp.sum_hess >= param_.min_child_weight) { + auto loss_chg = static_cast( constraints_[nid].CalcSplitGain( - param, param.monotone_constraints[fid], tmp, c) - - snode[nid].root_gain); - e.best.Update(loss_chg, fid, e.last_fvalue + rt_eps, true); + param_, param_.monotone_constraints[fid], tmp, c) - + snode_[nid].root_gain); + e.best.Update(loss_chg, fid, e.last_fvalue + kRtEps, true); } } } // rescan, generate candidate split #pragma omp parallel { - TStats c(param), cright(param); + TStats c(param_), cright(param_); const int tid = omp_get_thread_num(); - std::vector &temp = stemp[tid]; - bst_uint step = (col.length + this->nthread - 1) / this->nthread; + std::vector &temp = stemp_[tid]; + bst_uint step = (col.length + this->nthread_ - 1) / this->nthread_; bst_uint end = std::min(col.length, step * (tid + 1)); for (bst_uint i = tid * step; i < end; ++i) { const bst_uint ridx = col[i].index; - const int nid = position[ridx]; + const int nid = position_[ridx]; if (nid < 0) continue; const bst_float fvalue = col[i].fvalue; // get the statistics of nid @@ -374,26 +371,26 @@ class ColMaker: public TreeUpdater { // forward default right if (fvalue != e.first_fvalue) { if (need_forward) { - c.SetSubstract(snode[nid].stats, e.stats); - if (c.sum_hess >= param.min_child_weight && - e.stats.sum_hess >= param.min_child_weight) { - bst_float loss_chg = static_cast( + c.SetSubstract(snode_[nid].stats, e.stats); + if (c.sum_hess >= param_.min_child_weight && + e.stats.sum_hess >= param_.min_child_weight) { + auto loss_chg = static_cast( constraints_[nid].CalcSplitGain( - param, param.monotone_constraints[fid], e.stats, c) - - snode[nid].root_gain); + param_, param_.monotone_constraints[fid], e.stats, c) - + snode_[nid].root_gain); e.best.Update(loss_chg, fid, (fvalue + e.first_fvalue) * 0.5f, false); } } if (need_backward) { cright.SetSubstract(e.stats_extra, e.stats); - c.SetSubstract(snode[nid].stats, cright); - if (c.sum_hess >= param.min_child_weight && - cright.sum_hess >= param.min_child_weight) { - bst_float loss_chg = static_cast( + c.SetSubstract(snode_[nid].stats, cright); + if (c.sum_hess >= param_.min_child_weight && + cright.sum_hess >= param_.min_child_weight) { + auto loss_chg = static_cast( constraints_[nid].CalcSplitGain( - param, param.monotone_constraints[fid], c, cright) - - snode[nid].root_gain); + param_, param_.monotone_constraints[fid], c, cright) - + snode_[nid].root_gain); e.best.Update(loss_chg, fid, (fvalue + e.first_fvalue) * 0.5f, true); } } @@ -405,7 +402,7 @@ class ColMaker: public TreeUpdater { } } // update enumeration solution - inline void UpdateEnumeration(int nid, bst_gpair gstats, + inline void UpdateEnumeration(int nid, GradientPair gstats, bst_float fvalue, int d_step, bst_uint fid, TStats &c, std::vector &temp) { // NOLINT(*) // get the statistics of nid @@ -417,20 +414,20 @@ class ColMaker: public TreeUpdater { } else { // try to find a split if (fvalue != e.last_fvalue && - e.stats.sum_hess >= param.min_child_weight) { - c.SetSubstract(snode[nid].stats, e.stats); - if (c.sum_hess >= param.min_child_weight) { + e.stats.sum_hess >= param_.min_child_weight) { + c.SetSubstract(snode_[nid].stats, e.stats); + if (c.sum_hess >= param_.min_child_weight) { bst_float loss_chg; if (d_step == -1) { loss_chg = static_cast( constraints_[nid].CalcSplitGain( - param, param.monotone_constraints[fid], c, e.stats) - - snode[nid].root_gain); + param_, param_.monotone_constraints[fid], c, e.stats) - + snode_[nid].root_gain); } else { loss_chg = static_cast( constraints_[nid].CalcSplitGain( - param, param.monotone_constraints[fid], e.stats, c) - - snode[nid].root_gain); + param_, param_.monotone_constraints[fid], e.stats, c) - + snode_[nid].root_gain); } e.best.Update(loss_chg, fid, (fvalue + e.last_fvalue) * 0.5f, d_step == -1); @@ -446,19 +443,19 @@ class ColMaker: public TreeUpdater { const ColBatch::Entry *end, int d_step, bst_uint fid, - const std::vector &gpair, + const std::vector &gpair, std::vector &temp) { // NOLINT(*) const std::vector &qexpand = qexpand_; // clear all the temp statistics - for (size_t j = 0; j < qexpand.size(); ++j) { - temp[qexpand[j]].stats.Clear(); + for (auto nid : qexpand) { + temp[nid].stats.Clear(); } // left statistics - TStats c(param); + TStats c(param_); // local cache buffer for position and gradient pair - const int kBuffer = 32; - int buf_position[kBuffer]; - bst_gpair buf_gpair[kBuffer]; + constexpr int kBuffer = 32; + int buf_position[kBuffer] = {}; + GradientPair buf_gpair[kBuffer] = {}; // aligned ending position const ColBatch::Entry *align_end; if (d_step > 0) { @@ -473,7 +470,7 @@ class ColMaker: public TreeUpdater { for (it = begin; it != align_end; it += align_step) { const ColBatch::Entry *p; for (i = 0, p = it; i < kBuffer; ++i, p += d_step) { - buf_position[i] = position[p->index]; + buf_position[i] = position_[p->index]; buf_gpair[i] = gpair[p->index]; } for (i = 0, p = it; i < kBuffer; ++i, p += d_step) { @@ -486,7 +483,7 @@ class ColMaker: public TreeUpdater { } // finish up the ending piece for (it = align_end, i = 0; it != end; ++i, it += d_step) { - buf_position[i] = position[it->index]; + buf_position[i] = position_[it->index]; buf_gpair[i] = gpair[it->index]; } for (it = align_end, i = 0; it != end; ++i, it += d_step) { @@ -497,25 +494,24 @@ class ColMaker: public TreeUpdater { fid, c, temp); } // finish updating all statistics, check if it is possible to include all sum statistics - for (size_t i = 0; i < qexpand.size(); ++i) { - const int nid = qexpand[i]; + for (int nid : qexpand) { ThreadEntry &e = temp[nid]; - c.SetSubstract(snode[nid].stats, e.stats); - if (e.stats.sum_hess >= param.min_child_weight && - c.sum_hess >= param.min_child_weight) { + c.SetSubstract(snode_[nid].stats, e.stats); + if (e.stats.sum_hess >= param_.min_child_weight && + c.sum_hess >= param_.min_child_weight) { bst_float loss_chg; if (d_step == -1) { loss_chg = static_cast( constraints_[nid].CalcSplitGain( - param, param.monotone_constraints[fid], c, e.stats) - - snode[nid].root_gain); + param_, param_.monotone_constraints[fid], c, e.stats) - + snode_[nid].root_gain); } else { loss_chg = static_cast( constraints_[nid].CalcSplitGain( - param, param.monotone_constraints[fid], e.stats, c) - - snode[nid].root_gain); + param_, param_.monotone_constraints[fid], e.stats, c) - + snode_[nid].root_gain); } - const bst_float gap = std::abs(e.last_fvalue) + rt_eps; + const bst_float gap = std::abs(e.last_fvalue) + kRtEps; const bst_float delta = d_step == +1 ? gap: -gap; e.best.Update(loss_chg, fid, e.last_fvalue + delta, d_step == -1); } @@ -527,24 +523,24 @@ class ColMaker: public TreeUpdater { const ColBatch::Entry *end, int d_step, bst_uint fid, - const std::vector &gpair, + const std::vector &gpair, const MetaInfo &info, std::vector &temp) { // NOLINT(*) // use cacheline aware optimization - if (TStats::kSimpleStats != 0 && param.cache_opt != 0) { + if (TStats::kSimpleStats != 0 && param_.cache_opt != 0) { EnumerateSplitCacheOpt(begin, end, d_step, fid, gpair, temp); return; } const std::vector &qexpand = qexpand_; // clear all the temp statistics - for (size_t j = 0; j < qexpand.size(); ++j) { - temp[qexpand[j]].stats.Clear(); + for (auto nid : qexpand) { + temp[nid].stats.Clear(); } // left statistics - TStats c(param); + TStats c(param_); for (const ColBatch::Entry *it = begin; it != end; it += d_step) { const bst_uint ridx = it->index; - const int nid = position[ridx]; + const int nid = position_[ridx]; if (nid < 0) continue; // start working const bst_float fvalue = it->fvalue; @@ -557,20 +553,20 @@ class ColMaker: public TreeUpdater { } else { // try to find a split if (fvalue != e.last_fvalue && - e.stats.sum_hess >= param.min_child_weight) { - c.SetSubstract(snode[nid].stats, e.stats); - if (c.sum_hess >= param.min_child_weight) { + e.stats.sum_hess >= param_.min_child_weight) { + c.SetSubstract(snode_[nid].stats, e.stats); + if (c.sum_hess >= param_.min_child_weight) { bst_float loss_chg; if (d_step == -1) { loss_chg = static_cast( constraints_[nid].CalcSplitGain( - param, param.monotone_constraints[fid], c, e.stats) - - snode[nid].root_gain); + param_, param_.monotone_constraints[fid], c, e.stats) - + snode_[nid].root_gain); } else { loss_chg = static_cast( constraints_[nid].CalcSplitGain( - param, param.monotone_constraints[fid], e.stats, c) - - snode[nid].root_gain); + param_, param_.monotone_constraints[fid], e.stats, c) - + snode_[nid].root_gain); } e.best.Update(loss_chg, fid, (fvalue + e.last_fvalue) * 0.5f, d_step == -1); } @@ -581,25 +577,24 @@ class ColMaker: public TreeUpdater { } } // finish updating all statistics, check if it is possible to include all sum statistics - for (size_t i = 0; i < qexpand.size(); ++i) { - const int nid = qexpand[i]; + for (int nid : qexpand) { ThreadEntry &e = temp[nid]; - c.SetSubstract(snode[nid].stats, e.stats); - if (e.stats.sum_hess >= param.min_child_weight && - c.sum_hess >= param.min_child_weight) { + c.SetSubstract(snode_[nid].stats, e.stats); + if (e.stats.sum_hess >= param_.min_child_weight && + c.sum_hess >= param_.min_child_weight) { bst_float loss_chg; if (d_step == -1) { loss_chg = static_cast( constraints_[nid].CalcSplitGain( - param, param.monotone_constraints[fid], c, e.stats) - - snode[nid].root_gain); + param_, param_.monotone_constraints[fid], c, e.stats) - + snode_[nid].root_gain); } else { loss_chg = static_cast( constraints_[nid].CalcSplitGain( - param, param.monotone_constraints[fid], e.stats, c) - - snode[nid].root_gain); + param_, param_.monotone_constraints[fid], e.stats, c) - + snode_[nid].root_gain); } - const bst_float gap = std::abs(e.last_fvalue) + rt_eps; + const bst_float gap = std::abs(e.last_fvalue) + kRtEps; const bst_float delta = d_step == +1 ? gap: -gap; e.best.Update(loss_chg, fid, e.last_fvalue + delta, d_step == -1); } @@ -608,17 +603,17 @@ class ColMaker: public TreeUpdater { // update the solution candidate virtual void UpdateSolution(const ColBatch& batch, - const std::vector& gpair, + const std::vector& gpair, const DMatrix& fmat) { - const MetaInfo& info = fmat.info(); + const MetaInfo& info = fmat.Info(); // start enumeration - const bst_omp_uint nsize = static_cast(batch.size); + const auto nsize = static_cast(batch.size); #if defined(_OPENMP) - const int batch_size = std::max(static_cast(nsize / this->nthread / 32), 1); + const int batch_size = std::max(static_cast(nsize / this->nthread_ / 32), 1); #endif - int poption = param.parallel_option; + int poption = param_.parallel_option; if (poption == 2) { - poption = static_cast(nsize) * 2 < this->nthread ? 1 : 0; + poption = static_cast(nsize) * 2 < this->nthread_ ? 1 : 0; } if (poption == 0) { #pragma omp parallel for schedule(dynamic, batch_size) @@ -627,13 +622,13 @@ class ColMaker: public TreeUpdater { const int tid = omp_get_thread_num(); const ColBatch::Inst c = batch[i]; const bool ind = c.length != 0 && c.data[0].fvalue == c.data[c.length - 1].fvalue; - if (param.need_forward_search(fmat.GetColDensity(fid), ind)) { + if (param_.NeedForwardSearch(fmat.GetColDensity(fid), ind)) { this->EnumerateSplit(c.data, c.data + c.length, +1, - fid, gpair, info, stemp[tid]); + fid, gpair, info, stemp_[tid]); } - if (param.need_backward_search(fmat.GetColDensity(fid), ind)) { + if (param_.NeedBackwardSearch(fmat.GetColDensity(fid), ind)) { this->EnumerateSplit(c.data + c.length - 1, c.data - 1, -1, - fid, gpair, info, stemp[tid]); + fid, gpair, info, stemp_[tid]); } } } else { @@ -646,15 +641,15 @@ class ColMaker: public TreeUpdater { // find splits at current level, do split per level inline void FindSplit(int depth, const std::vector &qexpand, - const std::vector &gpair, + const std::vector &gpair, DMatrix *p_fmat, RegTree *p_tree) { - std::vector feat_set = feat_index; - if (param.colsample_bylevel != 1.0f) { + std::vector feat_set = feat_index_; + if (param_.colsample_bylevel != 1.0f) { std::shuffle(feat_set.begin(), feat_set.end(), common::GlobalRandom()); unsigned n = std::max(static_cast(1), - static_cast(param.colsample_bylevel * feat_index.size())); - CHECK_GT(param.colsample_bylevel, 0U) + static_cast(param_.colsample_bylevel * feat_index_.size())); + CHECK_GT(param_.colsample_bylevel, 0U) << "colsample_bylevel cannot be zero."; feat_set.resize(n); } @@ -665,18 +660,17 @@ class ColMaker: public TreeUpdater { // after this each thread's stemp will get the best candidates, aggregate results this->SyncBestSolution(qexpand); // get the best result, we can synchronize the solution - for (size_t i = 0; i < qexpand.size(); ++i) { - const int nid = qexpand[i]; - NodeEntry &e = snode[nid]; + for (int nid : qexpand) { + NodeEntry &e = snode_[nid]; // now we know the solution in snode[nid], set split - if (e.best.loss_chg > rt_eps) { + if (e.best.loss_chg > kRtEps) { p_tree->AddChilds(nid); - (*p_tree)[nid].set_split(e.best.split_index(), e.best.split_value, e.best.default_left()); + (*p_tree)[nid].SetSplit(e.best.SplitIndex(), e.best.split_value, e.best.DefaultLeft()); // mark right child as 0, to indicate fresh leaf - (*p_tree)[(*p_tree)[nid].cleft()].set_leaf(0.0f, 0); - (*p_tree)[(*p_tree)[nid].cright()].set_leaf(0.0f, 0); + (*p_tree)[(*p_tree)[nid].LeftChild()].SetLeaf(0.0f, 0); + (*p_tree)[(*p_tree)[nid].RightChild()].SetLeaf(0.0f, 0); } else { - (*p_tree)[nid].set_leaf(e.weight * param.learning_rate); + (*p_tree)[nid].SetLeaf(e.weight * param_.learning_rate); } } } @@ -687,29 +681,29 @@ class ColMaker: public TreeUpdater { // set the positions in the nondefault this->SetNonDefaultPosition(qexpand, p_fmat, tree); // set rest of instances to default position - const RowSet &rowset = p_fmat->buffered_rowset(); + const RowSet &rowset = p_fmat->BufferedRowset(); // set default direct nodes to default // for leaf nodes that are not fresh, mark then to ~nid, // so that they are ignored in future statistics collection - const bst_omp_uint ndata = static_cast(rowset.size()); + const auto ndata = static_cast(rowset.Size()); #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < ndata; ++i) { const bst_uint ridx = rowset[i]; - CHECK_LT(ridx, position.size()) - << "ridx exceed bound " << "ridx="<< ridx << " pos=" << position.size(); + CHECK_LT(ridx, position_.size()) + << "ridx exceed bound " << "ridx="<< ridx << " pos=" << position_.size(); const int nid = this->DecodePosition(ridx); - if (tree[nid].is_leaf()) { + if (tree[nid].IsLeaf()) { // mark finish when it is not a fresh leaf - if (tree[nid].cright() == -1) { - position[ridx] = ~nid; + if (tree[nid].RightChild() == -1) { + position_[ridx] = ~nid; } } else { // push to default branch - if (tree[nid].default_left()) { - this->SetEncodePosition(ridx, tree[nid].cleft()); + if (tree[nid].DefaultLeft()) { + this->SetEncodePosition(ridx, tree[nid].LeftChild()); } else { - this->SetEncodePosition(ridx, tree[nid].cright()); + this->SetEncodePosition(ridx, tree[nid].RightChild()); } } } @@ -717,11 +711,10 @@ class ColMaker: public TreeUpdater { // customization part // synchronize the best solution of each node virtual void SyncBestSolution(const std::vector &qexpand) { - for (size_t i = 0; i < qexpand.size(); ++i) { - const int nid = qexpand[i]; - NodeEntry &e = snode[nid]; - for (int tid = 0; tid < this->nthread; ++tid) { - e.best.Update(stemp[tid][nid].best); + for (int nid : qexpand) { + NodeEntry &e = snode_[nid]; + for (int tid = 0; tid < this->nthread_; ++tid) { + e.best.Update(stemp_[tid][nid].best); } } } @@ -730,10 +723,9 @@ class ColMaker: public TreeUpdater { const RegTree &tree) { // step 1, classify the non-default data into right places std::vector fsplits; - for (size_t i = 0; i < qexpand.size(); ++i) { - const int nid = qexpand[i]; - if (!tree[nid].is_leaf()) { - fsplits.push_back(tree[nid].split_index()); + for (int nid : qexpand) { + if (!tree[nid].IsLeaf()) { + fsplits.push_back(tree[nid].SplitIndex()); } } std::sort(fsplits.begin(), fsplits.end()); @@ -744,18 +736,18 @@ class ColMaker: public TreeUpdater { for (size_t i = 0; i < batch.size; ++i) { ColBatch::Inst col = batch[i]; const bst_uint fid = batch.col_index[i]; - const bst_omp_uint ndata = static_cast(col.length); + const auto ndata = static_cast(col.length); #pragma omp parallel for schedule(static) for (bst_omp_uint j = 0; j < ndata; ++j) { const bst_uint ridx = col[j].index; const int nid = this->DecodePosition(ridx); const bst_float fvalue = col[j].fvalue; // go back to parent, correct those who are not default - if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) { - if (fvalue < tree[nid].split_cond()) { - this->SetEncodePosition(ridx, tree[nid].cleft()); + if (!tree[nid].IsLeaf() && tree[nid].SplitIndex() == fid) { + if (fvalue < tree[nid].SplitCond()) { + this->SetEncodePosition(ridx, tree[nid].LeftChild()); } else { - this->SetEncodePosition(ridx, tree[nid].cright()); + this->SetEncodePosition(ridx, tree[nid].RightChild()); } } } @@ -765,29 +757,29 @@ class ColMaker: public TreeUpdater { // utils to get/set position, with encoded format // return decoded position inline int DecodePosition(bst_uint ridx) const { - const int pid = position[ridx]; + const int pid = position_[ridx]; return pid < 0 ? ~pid : pid; } // encode the encoded position value for ridx inline void SetEncodePosition(bst_uint ridx, int nid) { - if (position[ridx] < 0) { - position[ridx] = ~nid; + if (position_[ridx] < 0) { + position_[ridx] = ~nid; } else { - position[ridx] = nid; + position_[ridx] = nid; } } // --data fields-- - const TrainParam& param; + const TrainParam& param_; // number of omp thread used during training - const int nthread; + const int nthread_; // Per feature: shuffle index of each feature index - std::vector feat_index; + std::vector feat_index_; // Instance Data: current node position in the tree of each instance - std::vector position; + std::vector position_; // PerThread x PerTreeNode: statistics for per thread construction - std::vector< std::vector > stemp; + std::vector< std::vector > stemp_; /*! \brief TreeNode Data: statistics for each constructed node */ - std::vector snode; + std::vector snode_; /*! \brief queue of nodes to be expanded */ std::vector qexpand_; // constraint value @@ -799,75 +791,72 @@ class ColMaker: public TreeUpdater { template class DistColMaker : public ColMaker { public: - DistColMaker() : builder(param) { - pruner.reset(TreeUpdater::Create("prune")); + DistColMaker() : builder_(param_) { + pruner_.reset(TreeUpdater::Create("prune")); } void Init(const std::vector >& args) override { - param.InitAllowUnknown(args); - pruner->Init(args); + param_.InitAllowUnknown(args); + pruner_->Init(args); } - void Update(HostDeviceVector *gpair, + void Update(HostDeviceVector *gpair, DMatrix* dmat, const std::vector &trees) override { - TStats::CheckInfo(dmat->info()); + TStats::CheckInfo(dmat->Info()); CHECK_EQ(trees.size(), 1U) << "DistColMaker: only support one tree at a time"; // build the tree - builder.Update(gpair->data_h(), dmat, trees[0]); + builder_.Update(gpair->HostVector(), dmat, trees[0]); //// prune the tree, note that pruner will sync the tree - pruner->Update(gpair, dmat, trees); + pruner_->Update(gpair, dmat, trees); // update position after the tree is pruned - builder.UpdatePosition(dmat, *trees[0]); + builder_.UpdatePosition(dmat, *trees[0]); } private: - struct Builder : public ColMaker::Builder { + class Builder : public ColMaker::Builder { public: explicit Builder(const TrainParam ¶m) - : ColMaker::Builder(param) { - } + : ColMaker::Builder(param) {} inline void UpdatePosition(DMatrix* p_fmat, const RegTree &tree) { - const RowSet &rowset = p_fmat->buffered_rowset(); - const bst_omp_uint ndata = static_cast(rowset.size()); + const RowSet &rowset = p_fmat->BufferedRowset(); + const auto ndata = static_cast(rowset.Size()); #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < ndata; ++i) { const bst_uint ridx = rowset[i]; int nid = this->DecodePosition(ridx); - while (tree[nid].is_deleted()) { - nid = tree[nid].parent(); + while (tree[nid].IsDeleted()) { + nid = tree[nid].Parent(); CHECK_GE(nid, 0); } - this->position[ridx] = nid; + this->position_[ridx] = nid; } } inline const int* GetLeafPosition() const { - return dmlc::BeginPtr(this->position); + return dmlc::BeginPtr(this->position_); } protected: - void SetNonDefaultPosition(const std::vector &qexpand, - DMatrix *p_fmat, + void SetNonDefaultPosition(const std::vector &qexpand, DMatrix *p_fmat, const RegTree &tree) override { - // step 2, classify the non-default data into right places + // step 2, classify the non-default data into right places std::vector fsplits; - for (size_t i = 0; i < qexpand.size(); ++i) { - const int nid = qexpand[i]; - if (!tree[nid].is_leaf()) { - fsplits.push_back(tree[nid].split_index()); + for (int nid : qexpand) { + if (!tree[nid].IsLeaf()) { + fsplits.push_back(tree[nid].SplitIndex()); } } // get the candidate split index std::sort(fsplits.begin(), fsplits.end()); fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin()); - while (fsplits.size() != 0 && fsplits.back() >= p_fmat->info().num_col) { + while (fsplits.size() != 0 && fsplits.back() >= p_fmat->Info().num_col_) { fsplits.pop_back(); } // bitmap is only word concurrent, set to bool first { - bst_omp_uint ndata = static_cast(this->position.size()); - boolmap.resize(ndata); + auto ndata = static_cast(this->position_.size()); + boolmap_.resize(ndata); #pragma omp parallel for schedule(static) for (bst_omp_uint j = 0; j < ndata; ++j) { - boolmap[j] = 0; + boolmap_[j] = 0; } } dmlc::DataIter *iter = p_fmat->ColIterator(fsplits); @@ -876,39 +865,39 @@ class DistColMaker : public ColMaker { for (size_t i = 0; i < batch.size; ++i) { ColBatch::Inst col = batch[i]; const bst_uint fid = batch.col_index[i]; - const bst_omp_uint ndata = static_cast(col.length); + const auto ndata = static_cast(col.length); #pragma omp parallel for schedule(static) for (bst_omp_uint j = 0; j < ndata; ++j) { const bst_uint ridx = col[j].index; const bst_float fvalue = col[j].fvalue; const int nid = this->DecodePosition(ridx); - if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) { - if (fvalue < tree[nid].split_cond()) { - if (!tree[nid].default_left()) boolmap[ridx] = 1; + if (!tree[nid].IsLeaf() && tree[nid].SplitIndex() == fid) { + if (fvalue < tree[nid].SplitCond()) { + if (!tree[nid].DefaultLeft()) boolmap_[ridx] = 1; } else { - if (tree[nid].default_left()) boolmap[ridx] = 1; + if (tree[nid].DefaultLeft()) boolmap_[ridx] = 1; } } } } } - bitmap.InitFromBool(boolmap); + bitmap_.InitFromBool(boolmap_); // communicate bitmap - rabit::Allreduce(dmlc::BeginPtr(bitmap.data), bitmap.data.size()); - const RowSet &rowset = p_fmat->buffered_rowset(); + rabit::Allreduce(dmlc::BeginPtr(bitmap_.data), bitmap_.data.size()); + const RowSet &rowset = p_fmat->BufferedRowset(); // get the new position - const bst_omp_uint ndata = static_cast(rowset.size()); + const auto ndata = static_cast(rowset.Size()); #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < ndata; ++i) { const bst_uint ridx = rowset[i]; const int nid = this->DecodePosition(ridx); - if (bitmap.Get(ridx)) { - CHECK(!tree[nid].is_leaf()) << "inconsistent reduce information"; - if (tree[nid].default_left()) { - this->SetEncodePosition(ridx, tree[nid].cright()); + if (bitmap_.Get(ridx)) { + CHECK(!tree[nid].IsLeaf()) << "inconsistent reduce information"; + if (tree[nid].DefaultLeft()) { + this->SetEncodePosition(ridx, tree[nid].RightChild()); } else { - this->SetEncodePosition(ridx, tree[nid].cleft()); + this->SetEncodePosition(ridx, tree[nid].LeftChild()); } } } @@ -916,47 +905,46 @@ class DistColMaker : public ColMaker { // synchronize the best solution of each node void SyncBestSolution(const std::vector &qexpand) override { std::vector vec; - for (size_t i = 0; i < qexpand.size(); ++i) { - const int nid = qexpand[i]; - for (int tid = 0; tid < this->nthread; ++tid) { - this->snode[nid].best.Update(this->stemp[tid][nid].best); + for (int nid : qexpand) { + for (int tid = 0; tid < this->nthread_; ++tid) { + this->snode_[nid].best.Update(this->stemp_[tid][nid].best); } - vec.push_back(this->snode[nid].best); + vec.push_back(this->snode_[nid].best); } // TODO(tqchen) lazy version // communicate best solution - reducer.Allreduce(dmlc::BeginPtr(vec), vec.size()); + reducer_.Allreduce(dmlc::BeginPtr(vec), vec.size()); // assign solution back for (size_t i = 0; i < qexpand.size(); ++i) { const int nid = qexpand[i]; - this->snode[nid].best = vec[i]; + this->snode_[nid].best = vec[i]; } } private: - common::BitMap bitmap; - std::vector boolmap; - rabit::Reducer reducer; + common::BitMap bitmap_; + std::vector boolmap_; + rabit::Reducer reducer_; }; // we directly introduce pruner here - std::unique_ptr pruner; + std::unique_ptr pruner_; // training parameter - TrainParam param; + TrainParam param_; // pointer to the builder - Builder builder; + Builder builder_; }; // simple switch to defer implementation. class TreeUpdaterSwitch : public TreeUpdater { public: - TreeUpdaterSwitch() : monotone_(false) {} + TreeUpdaterSwitch() = default; void Init(const std::vector >& args) override { for (auto &kv : args) { if (kv.first == "monotone_constraints" && kv.second.length() != 0) { monotone_ = true; } } - if (inner_.get() == nullptr) { + if (inner_ == nullptr) { if (monotone_) { inner_.reset(new ColMaker()); } else { @@ -967,7 +955,7 @@ class TreeUpdaterSwitch : public TreeUpdater { inner_->Init(args); } - void Update(HostDeviceVector* gpair, + void Update(HostDeviceVector* gpair, DMatrix* data, const std::vector& trees) override { CHECK(inner_ != nullptr); @@ -976,7 +964,7 @@ class TreeUpdaterSwitch : public TreeUpdater { private: // monotone constraints - bool monotone_; + bool monotone_{false}; // internal implementation std::unique_ptr inner_; }; diff --git a/src/tree/updater_fast_hist.cc b/src/tree/updater_fast_hist.cc index 9f5f6024e..5cb77c94b 100644 --- a/src/tree/updater_fast_hist.cc +++ b/src/tree/updater_fast_hist.cc @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -50,47 +51,47 @@ class FastHistMaker: public TreeUpdater { pruner_.reset(TreeUpdater::Create("prune")); } pruner_->Init(args); - param.InitAllowUnknown(args); - fhparam.InitAllowUnknown(args); + param_.InitAllowUnknown(args); + fhparam_.InitAllowUnknown(args); is_gmat_initialized_ = false; } - void Update(HostDeviceVector* gpair, + void Update(HostDeviceVector* gpair, DMatrix* dmat, const std::vector& trees) override { - TStats::CheckInfo(dmat->info()); + TStats::CheckInfo(dmat->Info()); if (is_gmat_initialized_ == false) { double tstart = dmlc::GetTime(); - hmat_.Init(dmat, static_cast(param.max_bin)); + hmat_.Init(dmat, static_cast(param_.max_bin)); gmat_.cut = &hmat_; gmat_.Init(dmat); - column_matrix_.Init(gmat_, fhparam); - if (fhparam.enable_feature_grouping > 0) { - gmatb_.Init(gmat_, column_matrix_, fhparam); + column_matrix_.Init(gmat_, fhparam_); + if (fhparam_.enable_feature_grouping > 0) { + gmatb_.Init(gmat_, column_matrix_, fhparam_); } is_gmat_initialized_ = true; - if (param.debug_verbose > 0) { + if (param_.debug_verbose > 0) { LOG(INFO) << "Generating gmat: " << dmlc::GetTime() - tstart << " sec"; } } // rescale learning rate according to size of trees - float lr = param.learning_rate; - param.learning_rate = lr / trees.size(); - TConstraint::Init(¶m, dmat->info().num_col); + float lr = param_.learning_rate; + param_.learning_rate = lr / trees.size(); + TConstraint::Init(¶m_, dmat->Info().num_col_); // build tree if (!builder_) { - builder_.reset(new Builder(param, fhparam, std::move(pruner_))); + builder_.reset(new Builder(param_, fhparam_, std::move(pruner_))); } - for (size_t i = 0; i < trees.size(); ++i) { + for (auto tree : trees) { builder_->Update - (gmat_, gmatb_, column_matrix_, gpair, dmat, trees[i]); + (gmat_, gmatb_, column_matrix_, gpair, dmat, tree); } - param.learning_rate = lr; + param_.learning_rate = lr; } bool UpdatePredictionCache(const DMatrix* data, HostDeviceVector* out_preds) override { - if (!builder_ || param.subsample < 1.0f) { + if (!builder_ || param_.subsample < 1.0f) { return false; } else { return builder_->UpdatePredictionCache(data, out_preds); @@ -99,8 +100,8 @@ class FastHistMaker: public TreeUpdater { protected: // training parameter - TrainParam param; - FastHistParam fhparam; + TrainParam param_; + FastHistParam fhparam_; // data sketch HistCutMatrix hmat_; // quantized data matrix @@ -134,13 +135,13 @@ class FastHistMaker: public TreeUpdater { explicit Builder(const TrainParam& param, const FastHistParam& fhparam, std::unique_ptr pruner) - : param(param), fhparam(fhparam), pruner_(std::move(pruner)), + : param_(param), fhparam_(fhparam), pruner_(std::move(pruner)), p_last_tree_(nullptr), p_last_fmat_(nullptr) {} // update one tree, growing virtual void Update(const GHistIndexMatrix& gmat, const GHistIndexBlockMatrix& gmatb, const ColumnMatrix& column_matrix, - HostDeviceVector* gpair, + HostDeviceVector* gpair, DMatrix* p_fmat, RegTree* p_tree) { double gstart = dmlc::GetTime(); @@ -155,11 +156,11 @@ class FastHistMaker: public TreeUpdater { double time_evaluate_split = 0; double time_apply_split = 0; - std::vector& gpair_h = gpair->data_h(); + std::vector& gpair_h = gpair->HostVector(); tstart = dmlc::GetTime(); this->InitData(gmat, gpair_h, *p_fmat, *p_tree); - std::vector feat_set = feat_index; + std::vector feat_set = feat_index_; time_init_data = dmlc::GetTime() - tstart; // FIXME(hcho3): this code is broken when param.num_roots > 1. Please fix it @@ -179,7 +180,7 @@ class FastHistMaker: public TreeUpdater { this->EvaluateSplit(nid, gmat, hist_, *p_fmat, *p_tree, feat_set); time_evaluate_split += dmlc::GetTime() - tstart; qexpand_->push(ExpandEntry(nid, p_tree->GetDepth(nid), - snode[nid].best.loss_chg, + snode_[nid].best.loss_chg, timestamp++)); ++num_leaves; } @@ -188,21 +189,21 @@ class FastHistMaker: public TreeUpdater { const ExpandEntry candidate = qexpand_->top(); const int nid = candidate.nid; qexpand_->pop(); - if (candidate.loss_chg <= rt_eps - || (param.max_depth > 0 && candidate.depth == param.max_depth) - || (param.max_leaves > 0 && num_leaves == param.max_leaves) ) { - (*p_tree)[nid].set_leaf(snode[nid].weight * param.learning_rate); + if (candidate.loss_chg <= kRtEps + || (param_.max_depth > 0 && candidate.depth == param_.max_depth) + || (param_.max_leaves > 0 && num_leaves == param_.max_leaves) ) { + (*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate); } else { tstart = dmlc::GetTime(); this->ApplySplit(nid, gmat, column_matrix, hist_, *p_fmat, p_tree); time_apply_split += dmlc::GetTime() - tstart; tstart = dmlc::GetTime(); - const int cleft = (*p_tree)[nid].cleft(); - const int cright = (*p_tree)[nid].cright(); + const int cleft = (*p_tree)[nid].LeftChild(); + const int cright = (*p_tree)[nid].RightChild(); hist_.AddHistRow(cleft); hist_.AddHistRow(cright); - if (row_set_collection_[cleft].size() < row_set_collection_[cright].size()) { + if (row_set_collection_[cleft].Size() < row_set_collection_[cright].Size()) { BuildHist(gpair_h, row_set_collection_[cleft], gmat, gmatb, feat_set, hist_[cleft]); SubtractionTrick(hist_[cright], hist_[cleft], hist_[nid]); } else { @@ -222,10 +223,10 @@ class FastHistMaker: public TreeUpdater { time_evaluate_split += dmlc::GetTime() - tstart; qexpand_->push(ExpandEntry(cleft, p_tree->GetDepth(cleft), - snode[cleft].best.loss_chg, + snode_[cleft].best.loss_chg, timestamp++)); qexpand_->push(ExpandEntry(cright, p_tree->GetDepth(cright), - snode[cright].best.loss_chg, + snode_[cright].best.loss_chg, timestamp++)); ++num_leaves; // give two and take one, as parent is no longer a leaf @@ -238,19 +239,19 @@ class FastHistMaker: public TreeUpdater { while (!qexpand_->empty()) { const int nid = qexpand_->top().nid; qexpand_->pop(); - (*p_tree)[nid].set_leaf(snode[nid].weight * param.learning_rate); + (*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate); } // remember auxiliary statistics in the tree node for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) { - p_tree->stat(nid).loss_chg = snode[nid].best.loss_chg; - p_tree->stat(nid).base_weight = snode[nid].weight; - p_tree->stat(nid).sum_hess = static_cast(snode[nid].stats.sum_hess); - snode[nid].stats.SetLeafVec(param, p_tree->leafvec(nid)); + p_tree->Stat(nid).loss_chg = snode_[nid].best.loss_chg; + p_tree->Stat(nid).base_weight = snode_[nid].weight; + p_tree->Stat(nid).sum_hess = static_cast(snode_[nid].stats.sum_hess); + snode_[nid].stats.SetLeafVec(param_, p_tree->Leafvec(nid)); } pruner_->Update(gpair, p_fmat, std::vector{p_tree}); - if (param.debug_verbose > 0) { + if (param_.debug_verbose > 0) { double total_time = dmlc::GetTime() - gstart; LOG(INFO) << "\nInitData: " << std::fixed << std::setw(6) << std::setprecision(4) << time_init_data @@ -278,13 +279,13 @@ class FastHistMaker: public TreeUpdater { } } - inline void BuildHist(const std::vector& gpair, + inline void BuildHist(const std::vector& gpair, const RowSetCollection::Elem row_indices, const GHistIndexMatrix& gmat, const GHistIndexBlockMatrix& gmatb, const std::vector& feat_set, GHistRow hist) { - if (fhparam.enable_feature_grouping > 0) { + if (fhparam_.enable_feature_grouping > 0) { hist_builder_.BuildBlockHist(gpair, row_indices, gmatb, feat_set, hist); } else { hist_builder_.BuildHist(gpair, row_indices, gmat, feat_set, hist); @@ -297,7 +298,7 @@ class FastHistMaker: public TreeUpdater { inline bool UpdatePredictionCache(const DMatrix* data, HostDeviceVector* p_out_preds) { - std::vector& out_preds = p_out_preds->data_h(); + std::vector& out_preds = p_out_preds->HostVector(); // p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in // conjunction with Update(). @@ -318,13 +319,13 @@ class FastHistMaker: public TreeUpdater { bst_float leaf_value; // if a node is marked as deleted by the pruner, traverse upward to locate // a non-deleted leaf. - if ((*p_last_tree_)[nid].is_deleted()) { - while ((*p_last_tree_)[nid].is_deleted()) { - nid = (*p_last_tree_)[nid].parent(); + if ((*p_last_tree_)[nid].IsDeleted()) { + while ((*p_last_tree_)[nid].IsDeleted()) { + nid = (*p_last_tree_)[nid].Parent(); } - CHECK((*p_last_tree_)[nid].is_leaf()); + CHECK((*p_last_tree_)[nid].IsLeaf()); } - leaf_value = (*p_last_tree_)[nid].leaf_value(); + leaf_value = (*p_last_tree_)[nid].LeafValue(); for (const size_t* it = rowset.begin; it < rowset.end; ++it) { out_preds[*it] += leaf_value; @@ -338,19 +339,19 @@ class FastHistMaker: public TreeUpdater { protected: // initialize temp data structure inline void InitData(const GHistIndexMatrix& gmat, - const std::vector& gpair, + const std::vector& gpair, const DMatrix& fmat, const RegTree& tree) { CHECK_EQ(tree.param.num_nodes, tree.param.num_roots) << "ColMakerHist: can only grow new tree"; - CHECK((param.max_depth > 0 || param.max_leaves > 0)) + CHECK((param_.max_depth > 0 || param_.max_leaves > 0)) << "max_depth or max_leaves cannot be both 0 (unlimited); " << "at least one should be a positive quantity."; - if (param.grow_policy == TrainParam::kDepthWise) { - CHECK(param.max_depth > 0) << "max_depth cannot be 0 (unlimited) " + if (param_.grow_policy == TrainParam::kDepthWise) { + CHECK(param_.max_depth > 0) << "max_depth cannot be 0 (unlimited) " << "when grow_policy is depthwise."; } - const auto& info = fmat.info(); + const auto& info = fmat.Info(); { // initialize the row set @@ -364,23 +365,23 @@ class FastHistMaker: public TreeUpdater { // initialize histogram builder #pragma omp parallel { - this->nthread = omp_get_num_threads(); + this->nthread_ = omp_get_num_threads(); } - hist_builder_.Init(this->nthread, nbins); + hist_builder_.Init(this->nthread_, nbins); - CHECK_EQ(info.root_index.size(), 0U); + CHECK_EQ(info.root_index_.size(), 0U); std::vector& row_indices = row_set_collection_.row_indices_; // mark subsample and build list of member rows - if (param.subsample < 1.0f) { - std::bernoulli_distribution coin_flip(param.subsample); + if (param_.subsample < 1.0f) { + std::bernoulli_distribution coin_flip(param_.subsample); auto& rnd = common::GlobalRandom(); - for (size_t i = 0; i < info.num_row; ++i) { + for (size_t i = 0; i < info.num_row_; ++i) { if (gpair[i].GetHess() >= 0.0f && coin_flip(rnd)) { row_indices.push_back(i); } } } else { - for (size_t i = 0; i < info.num_row; ++i) { + for (size_t i = 0; i < info.num_row_; ++i) { if (gpair[i].GetHess() >= 0.0f) { row_indices.push_back(i); } @@ -391,9 +392,9 @@ class FastHistMaker: public TreeUpdater { { /* determine layout of data */ - const size_t nrow = info.num_row; - const size_t ncol = info.num_col; - const size_t nnz = info.num_nonzero; + const size_t nrow = info.num_row_; + const size_t ncol = info.num_col_; + const size_t nnz = info.num_nonzero_; // number of discrete bins for feature 0 const uint32_t nbins_f0 = gmat.cut->row_ptr[1] - gmat.cut->row_ptr[0]; if (nrow * ncol == nnz) { @@ -413,23 +414,23 @@ class FastHistMaker: public TreeUpdater { // store a pointer to training data p_last_fmat_ = &fmat; // initialize feature index - bst_uint ncol = static_cast(info.num_col); - feat_index.clear(); + auto ncol = static_cast(info.num_col_); + feat_index_.clear(); if (data_layout_ == kDenseDataOneBased) { for (bst_uint i = 1; i < ncol; ++i) { - feat_index.push_back(i); + feat_index_.push_back(i); } } else { for (bst_uint i = 0; i < ncol; ++i) { - feat_index.push_back(i); + feat_index_.push_back(i); } } bst_uint n = std::max(static_cast(1), - static_cast(param.colsample_bytree * feat_index.size())); - std::shuffle(feat_index.begin(), feat_index.end(), common::GlobalRandom()); - CHECK_GT(param.colsample_bytree, 0U) + static_cast(param_.colsample_bytree * feat_index_.size())); + std::shuffle(feat_index_.begin(), feat_index_.end(), common::GlobalRandom()); + CHECK_GT(param_.colsample_bytree, 0U) << "colsample_bytree cannot be zero."; - feat_index.resize(n); + feat_index_.resize(n); } if (data_layout_ == kDenseDataZeroBased || data_layout_ == kDenseDataOneBased) { /* specialized code for dense data: @@ -437,7 +438,7 @@ class FastHistMaker: public TreeUpdater { For dense data (with no missing value), the sum of gradient histogram is equal to snode[nid] */ const std::vector& row_ptr = gmat.cut->row_ptr; - const bst_uint nfeature = static_cast(row_ptr.size() - 1); + const auto nfeature = static_cast(row_ptr.size() - 1); uint32_t min_nbins_per_feature = 0; for (bst_uint i = 0; i < nfeature; ++i) { const uint32_t nbins = row_ptr[i + 1] - row_ptr[i]; @@ -451,14 +452,14 @@ class FastHistMaker: public TreeUpdater { CHECK_GT(min_nbins_per_feature, 0U); } { - snode.reserve(256); - snode.clear(); + snode_.reserve(256); + snode_.clear(); } { - if (param.grow_policy == TrainParam::kLossGuide) { - qexpand_.reset(new ExpandQueue(loss_guide)); + if (param_.grow_policy == TrainParam::kLossGuide) { + qexpand_.reset(new ExpandQueue(LossGuide)); } else { - qexpand_.reset(new ExpandQueue(depth_wise)); + qexpand_.reset(new ExpandQueue(DepthWise)); } } } @@ -470,25 +471,25 @@ class FastHistMaker: public TreeUpdater { const RegTree& tree, const std::vector& feat_set) { // start enumeration - const MetaInfo& info = fmat.info(); - const bst_uint nfeature = static_cast(feat_set.size()); - const bst_omp_uint nthread = static_cast(this->nthread); + const MetaInfo& info = fmat.Info(); + const auto nfeature = static_cast(feat_set.size()); + const auto nthread = static_cast(this->nthread_); best_split_tloc_.resize(nthread); #pragma omp parallel for schedule(static) num_threads(nthread) for (bst_omp_uint tid = 0; tid < nthread; ++tid) { - best_split_tloc_[tid] = snode[nid].best; + best_split_tloc_[tid] = snode_[nid].best; } #pragma omp parallel for schedule(dynamic) num_threads(nthread) for (bst_omp_uint i = 0; i < nfeature; ++i) { const bst_uint fid = feat_set[i]; const unsigned tid = omp_get_thread_num(); - this->EnumerateSplit(-1, gmat, hist[nid], snode[nid], constraints_[nid], info, + this->EnumerateSplit(-1, gmat, hist[nid], snode_[nid], constraints_[nid], info, &best_split_tloc_[tid], fid); - this->EnumerateSplit(+1, gmat, hist[nid], snode[nid], constraints_[nid], info, + this->EnumerateSplit(+1, gmat, hist[nid], snode_[nid], constraints_[nid], info, &best_split_tloc_[tid], fid); } for (unsigned tid = 0; tid < nthread; ++tid) { - snode[nid].best.Update(best_split_tloc_[tid]); + snode_[nid].best.Update(best_split_tloc_[tid]); } } @@ -499,12 +500,13 @@ class FastHistMaker: public TreeUpdater { const DMatrix& fmat, RegTree* p_tree) { XGBOOST_TYPE_SWITCH(column_matrix.dtype, { - ApplySplit_(nid, gmat, column_matrix, hist, fmat, p_tree); + ApplySplitSpecialize(nid, gmat, column_matrix, hist, fmat, + p_tree); }); } template - inline void ApplySplit_(int nid, + inline void ApplySplitSpecialize(int nid, const GHistIndexMatrix& gmat, const ColumnMatrix& column_matrix, const HistCollection& hist, @@ -513,26 +515,26 @@ class FastHistMaker: public TreeUpdater { // TODO(hcho3): support feature sampling by levels /* 1. Create child nodes */ - NodeEntry& e = snode[nid]; + NodeEntry& e = snode_[nid]; p_tree->AddChilds(nid); - (*p_tree)[nid].set_split(e.best.split_index(), e.best.split_value, e.best.default_left()); + (*p_tree)[nid].SetSplit(e.best.SplitIndex(), e.best.split_value, e.best.DefaultLeft()); // mark right child as 0, to indicate fresh leaf - int cleft = (*p_tree)[nid].cleft(); - int cright = (*p_tree)[nid].cright(); - (*p_tree)[cleft].set_leaf(0.0f, 0); - (*p_tree)[cright].set_leaf(0.0f, 0); + int cleft = (*p_tree)[nid].LeftChild(); + int cright = (*p_tree)[nid].RightChild(); + (*p_tree)[cleft].SetLeaf(0.0f, 0); + (*p_tree)[cright].SetLeaf(0.0f, 0); /* 2. Categorize member rows */ - const bst_omp_uint nthread = static_cast(this->nthread); + const auto nthread = static_cast(this->nthread_); row_split_tloc_.resize(nthread); for (bst_omp_uint i = 0; i < nthread; ++i) { row_split_tloc_[i].left.clear(); row_split_tloc_[i].right.clear(); } - const bool default_left = (*p_tree)[nid].default_left(); - const bst_uint fid = (*p_tree)[nid].split_index(); - const bst_float split_pt = (*p_tree)[nid].split_cond(); + const bool default_left = (*p_tree)[nid].DefaultLeft(); + const bst_uint fid = (*p_tree)[nid].SplitIndex(); + const bst_float split_pt = (*p_tree)[nid].SplitCond(); const uint32_t lower_bound = gmat.cut->row_ptr[fid]; const uint32_t upper_bound = gmat.cut->row_ptr[fid + 1]; int32_t split_cond = -1; @@ -558,7 +560,7 @@ class FastHistMaker: public TreeUpdater { } row_set_collection_.AddSplit( - nid, row_split_tloc_, (*p_tree)[nid].cleft(), (*p_tree)[nid].cright()); + nid, row_split_tloc_, (*p_tree)[nid].LeftChild(), (*p_tree)[nid].RightChild()); } template @@ -569,24 +571,24 @@ class FastHistMaker: public TreeUpdater { bst_int split_cond, bool default_left) { std::vector& row_split_tloc = *p_row_split_tloc; - const int K = 8; // loop unrolling factor + constexpr int kUnroll = 8; // loop unrolling factor const size_t nrows = rowset.end - rowset.begin; - const size_t rest = nrows % K; + const size_t rest = nrows % kUnroll; - #pragma omp parallel for num_threads(nthread) schedule(static) - for (bst_omp_uint i = 0; i < nrows - rest; i += K) { + #pragma omp parallel for num_threads(nthread_) schedule(static) + for (bst_omp_uint i = 0; i < nrows - rest; i += kUnroll) { const bst_uint tid = omp_get_thread_num(); auto& left = row_split_tloc[tid].left; auto& right = row_split_tloc[tid].right; - size_t rid[K]; - T rbin[K]; - for (int k = 0; k < K; ++k) { + size_t rid[kUnroll]; + T rbin[kUnroll]; + for (int k = 0; k < kUnroll; ++k) { rid[k] = rowset.begin[i + k]; } - for (int k = 0; k < K; ++k) { + for (int k = 0; k < kUnroll; ++k) { rbin[k] = column.index[rid[k]]; } - for (int k = 0; k < K; ++k) { + for (int k = 0; k < kUnroll; ++k) { // NOLINT if (rbin[k] == std::numeric_limits::max()) { // missing value if (default_left) { left.push_back(rid[k]); @@ -605,8 +607,8 @@ class FastHistMaker: public TreeUpdater { } } for (size_t i = nrows - rest; i < nrows; ++i) { - auto& left = row_split_tloc[nthread-1].left; - auto& right = row_split_tloc[nthread-1].right; + auto& left = row_split_tloc[nthread_-1].left; + auto& right = row_split_tloc[nthread_-1].right; const size_t rid = rowset.begin[i]; const T rbin = column.index[rid]; if (rbin == std::numeric_limits::max()) { // missing value @@ -635,27 +637,27 @@ class FastHistMaker: public TreeUpdater { bst_int split_cond, bool default_left) { std::vector& row_split_tloc = *p_row_split_tloc; - const int K = 8; // loop unrolling factor + constexpr int kUnroll = 8; // loop unrolling factor const size_t nrows = rowset.end - rowset.begin; - const size_t rest = nrows % K; - #pragma omp parallel for num_threads(nthread) schedule(static) - for (bst_omp_uint i = 0; i < nrows - rest; i += K) { - size_t rid[K]; - GHistIndexRow row[K]; - const uint32_t* p[K]; + const size_t rest = nrows % kUnroll; + #pragma omp parallel for num_threads(nthread_) schedule(static) + for (bst_omp_uint i = 0; i < nrows - rest; i += kUnroll) { + size_t rid[kUnroll]; + GHistIndexRow row[kUnroll]; + const uint32_t* p[kUnroll]; bst_uint tid = omp_get_thread_num(); auto& left = row_split_tloc[tid].left; auto& right = row_split_tloc[tid].right; - for (int k = 0; k < K; ++k) { + for (int k = 0; k < kUnroll; ++k) { rid[k] = rowset.begin[i + k]; } - for (int k = 0; k < K; ++k) { + for (int k = 0; k < kUnroll; ++k) { row[k] = gmat[rid[k]]; } - for (int k = 0; k < K; ++k) { + for (int k = 0; k < kUnroll; ++k) { p[k] = std::lower_bound(row[k].index, row[k].index + row[k].size, lower_bound); } - for (int k = 0; k < K; ++k) { + for (int k = 0; k < kUnroll; ++k) { if (p[k] != row[k].index + row[k].size && *p[k] < upper_bound) { CHECK_LT(*p[k], static_cast(std::numeric_limits::max())); @@ -708,11 +710,11 @@ class FastHistMaker: public TreeUpdater { std::vector& row_split_tloc = *p_row_split_tloc; const size_t nrows = rowset.end - rowset.begin; - #pragma omp parallel num_threads(nthread) + #pragma omp parallel num_threads(nthread_) { - const size_t tid = static_cast(omp_get_thread_num()); - const size_t ibegin = tid * nrows / nthread; - const size_t iend = (tid + 1) * nrows / nthread; + const auto tid = static_cast(omp_get_thread_num()); + const size_t ibegin = tid * nrows / nthread_; + const size_t iend = (tid + 1) * nrows / nthread_; if (ibegin < iend) { // ensure that [ibegin, iend) is nonempty range // search first nonzero row with index >= rowset[ibegin] const size_t* p = std::lower_bound(column.row_ind, @@ -769,17 +771,17 @@ class FastHistMaker: public TreeUpdater { inline void InitNewNode(int nid, const GHistIndexMatrix& gmat, - const std::vector& gpair, + const std::vector& gpair, const DMatrix& fmat, const RegTree& tree) { { - snode.resize(tree.param.num_nodes, NodeEntry(param)); + snode_.resize(tree.param.num_nodes, NodeEntry(param_)); constraints_.resize(tree.param.num_nodes); } // setup constraints before calculating the weight { - auto& stats = snode[nid].stats; + auto& stats = snode_[nid].stats; if (data_layout_ == kDenseDataZeroBased || data_layout_ == kDenseDataOneBased) { /* specialized code for dense data For dense data (with no missing value), @@ -799,22 +801,22 @@ class FastHistMaker: public TreeUpdater { stats.Add(gpair[*it]); } } - if (!tree[nid].is_root()) { - const int pid = tree[nid].parent(); - constraints_[pid].SetChild(param, tree[pid].split_index(), - snode[tree[pid].cleft()].stats, - snode[tree[pid].cright()].stats, - &constraints_[tree[pid].cleft()], - &constraints_[tree[pid].cright()]); + if (!tree[nid].IsRoot()) { + const int pid = tree[nid].Parent(); + constraints_[pid].SetChild(param_, tree[pid].SplitIndex(), + snode_[tree[pid].LeftChild()].stats, + snode_[tree[pid].RightChild()].stats, + &constraints_[tree[pid].LeftChild()], + &constraints_[tree[pid].RightChild()]); } } // calculating the weights { - snode[nid].root_gain = static_cast( - constraints_[nid].CalcGain(param, snode[nid].stats)); - snode[nid].weight = static_cast( - constraints_[nid].CalcWeight(param, snode[nid].stats)); + snode_[nid].root_gain = static_cast( + constraints_[nid].CalcGain(param_, snode_[nid].stats)); + snode_[nid].weight = static_cast( + constraints_[nid].CalcWeight(param_, snode_[nid].stats)); } } @@ -834,8 +836,8 @@ class FastHistMaker: public TreeUpdater { const std::vector& cut_val = gmat.cut->cut; // statistics on both sides of split - TStats c(param); - TStats e(param); + TStats c(param_); + TStats e(param_); // best split so far SplitEntry best; @@ -846,7 +848,7 @@ class FastHistMaker: public TreeUpdater { static_cast(std::numeric_limits::max())); // imin: index (offset) of the minimum value for feature fid // need this for backward enumeration - const int32_t imin = static_cast(cut_ptr[fid]); + const auto imin = static_cast(cut_ptr[fid]); // ibegin, iend: smallest/largest cut points for feature fid // use int to allow for value -1 int32_t ibegin, iend; @@ -862,21 +864,21 @@ class FastHistMaker: public TreeUpdater { // start working // try to find a split e.Add(hist.begin[i].sum_grad, hist.begin[i].sum_hess); - if (e.sum_hess >= param.min_child_weight) { + if (e.sum_hess >= param_.min_child_weight) { c.SetSubstract(snode.stats, e); - if (c.sum_hess >= param.min_child_weight) { + if (c.sum_hess >= param_.min_child_weight) { bst_float loss_chg; bst_float split_pt; if (d_step > 0) { // forward enumeration: split at right bound of each bin loss_chg = static_cast( - constraint.CalcSplitGain(param, param.monotone_constraints[fid], e, c) - + constraint.CalcSplitGain(param_, param_.monotone_constraints[fid], e, c) - snode.root_gain); split_pt = cut_val[i]; } else { // backward enumeration: split at left bound of each bin loss_chg = static_cast( - constraint.CalcSplitGain(param, param.monotone_constraints[fid], c, e) - + constraint.CalcSplitGain(param_, param_.monotone_constraints[fid], c, e) - snode.root_gain); if (i == imin) { // for leftmost bin, left bound is the smallest feature value @@ -901,14 +903,14 @@ class FastHistMaker: public TreeUpdater { ExpandEntry(int nid, int depth, bst_float loss_chg, unsigned tstmp) : nid(nid), depth(depth), loss_chg(loss_chg), timestamp(tstmp) {} }; - inline static bool depth_wise(ExpandEntry lhs, ExpandEntry rhs) { + inline static bool DepthWise(ExpandEntry lhs, ExpandEntry rhs) { if (lhs.depth == rhs.depth) { return lhs.timestamp > rhs.timestamp; // favor small timestamp } else { return lhs.depth > rhs.depth; // favor small depth } } - inline static bool loss_guide(ExpandEntry lhs, ExpandEntry rhs) { + inline static bool LossGuide(ExpandEntry lhs, ExpandEntry rhs) { if (lhs.loss_chg == rhs.loss_chg) { return lhs.timestamp > rhs.timestamp; // favor small timestamp } else { @@ -917,19 +919,19 @@ class FastHistMaker: public TreeUpdater { } // --data fields-- - const TrainParam& param; - const FastHistParam& fhparam; + const TrainParam& param_; + const FastHistParam& fhparam_; // number of omp thread used during training - int nthread; + int nthread_; // Per feature: shuffle index of each feature index - std::vector feat_index; + std::vector feat_index_; // the internal row sets RowSetCollection row_set_collection_; // the temp space for split std::vector row_split_tloc_; std::vector best_split_tloc_; /*! \brief TreeNode Data: statistics for each constructed node */ - std::vector snode; + std::vector snode_; /*! \brief culmulative histogram of gradients. */ HistCollection hist_; /*! \brief feature with least # of bins. to be used for dense specialization @@ -948,9 +950,9 @@ class FastHistMaker: public TreeUpdater { // constraint value std::vector constraints_; - typedef std::priority_queue, - std::function> ExpandQueue; + using ExpandQueue = + std::priority_queue, + std::function>; std::unique_ptr qexpand_; enum DataLayout { kDenseDataZeroBased, kDenseDataOneBased, kSparseData }; @@ -964,14 +966,14 @@ class FastHistMaker: public TreeUpdater { // simple switch to defer implementation. class FastHistTreeUpdaterSwitch : public TreeUpdater { public: - FastHistTreeUpdaterSwitch() : monotone_(false) {} + FastHistTreeUpdaterSwitch() = default; void Init(const std::vector >& args) override { for (auto &kv : args) { if (kv.first == "monotone_constraints" && kv.second.length() != 0) { monotone_ = true; } } - if (inner_.get() == nullptr) { + if (inner_ == nullptr) { if (monotone_) { inner_.reset(new FastHistMaker()); } else { @@ -982,7 +984,7 @@ class FastHistTreeUpdaterSwitch : public TreeUpdater { inner_->Init(args); } - void Update(HostDeviceVector* gpair, + void Update(HostDeviceVector* gpair, DMatrix* data, const std::vector& trees) override { CHECK(inner_ != nullptr); @@ -991,7 +993,7 @@ class FastHistTreeUpdaterSwitch : public TreeUpdater { private: // monotone constraints - bool monotone_; + bool monotone_{false}; // internal implementation std::unique_ptr inner_; }; diff --git a/src/tree/updater_gpu.cu b/src/tree/updater_gpu.cu index 77eba9d72..8ff0ed8fe 100644 --- a/src/tree/updater_gpu.cu +++ b/src/tree/updater_gpu.cu @@ -22,25 +22,25 @@ DMLC_REGISTRY_FILE_TAG(updater_gpu); * @return the uniq key */ -static HOST_DEV_INLINE node_id_t abs2uniqKey(int tid, const node_id_t* abs, +static HOST_DEV_INLINE NodeIdT abs2uniqKey(int tid, const NodeIdT* abs, const int* colIds, - node_id_t nodeStart, int nKeys) { + NodeIdT nodeStart, int nKeys) { int a = abs[tid]; - if (a == UNUSED_NODE) return a; + if (a == kUnusedNode) return a; return ((a - nodeStart) + (colIds[tid] * nKeys)); } /** * @struct Pair - * @brief Pair used for key basd scan operations on bst_gpair + * @brief Pair used for key basd scan operations on GradientPair */ struct Pair { int key; - bst_gpair value; + GradientPair value; }; /** define a key that's not used at all in the entire boosting process */ -static const int NONE_KEY = -100; +static const int kNoneKey = -100; /** * @brief Allocate temporary buffers needed for scan operations @@ -49,9 +49,9 @@ static const int NONE_KEY = -100; * @param size number of elements that will be scanned */ template -int scanTempBufferSize(int size) { - int nBlks = dh::div_round_up(size, BLKDIM_L1L3); - return nBlks; +int ScanTempBufferSize(int size) { + int num_blocks = dh::DivRoundUp(size, BLKDIM_L1L3); + return num_blocks; } struct AddByKey { @@ -76,21 +76,21 @@ struct AddByKey { * @param instIds instance index buffer * @return the expected gradient value */ -HOST_DEV_INLINE bst_gpair get(int id, const bst_gpair* vals, +HOST_DEV_INLINE GradientPair get(int id, const GradientPair* vals, const int* instIds) { id = instIds[id]; return vals[id]; } template -__global__ void cubScanByKeyL1(bst_gpair* scans, const bst_gpair* vals, - const int* instIds, bst_gpair* mScans, - int* mKeys, const node_id_t* keys, int nUniqKeys, - const int* colIds, node_id_t nodeStart, +__global__ void cubScanByKeyL1(GradientPair* scans, const GradientPair* vals, + const int* instIds, GradientPair* mScans, + int* mKeys, const NodeIdT* keys, int nUniqKeys, + const int* colIds, NodeIdT nodeStart, const int size) { - Pair rootPair = {NONE_KEY, bst_gpair(0.f, 0.f)}; + Pair rootPair = {kNoneKey, GradientPair(0.f, 0.f)}; int myKey; - bst_gpair myValue; + GradientPair myValue; typedef cub::BlockScan BlockScan; __shared__ typename BlockScan::TempStorage temp_storage; Pair threadData; @@ -99,8 +99,8 @@ __global__ void cubScanByKeyL1(bst_gpair* scans, const bst_gpair* vals, myKey = abs2uniqKey(tid, keys, colIds, nodeStart, nUniqKeys); myValue = get(tid, vals, instIds); } else { - myKey = NONE_KEY; - myValue = 0.f; + myKey = kNoneKey; + myValue = {}; } threadData.key = myKey; threadData.value = myValue; @@ -119,14 +119,14 @@ __global__ void cubScanByKeyL1(bst_gpair* scans, const bst_gpair* vals, } if (threadIdx.x == BLKDIM_L1L3 - 1) { threadData.value = - (myKey == previousKey) ? threadData.value : bst_gpair(0.0f, 0.0f); + (myKey == previousKey) ? threadData.value : GradientPair(0.0f, 0.0f); mKeys[blockIdx.x] = myKey; mScans[blockIdx.x] = threadData.value + myValue; } } template -__global__ void cubScanByKeyL2(bst_gpair* mScans, int* mKeys, int mLength) { +__global__ void cubScanByKeyL2(GradientPair* mScans, int* mKeys, int mLength) { typedef cub::BlockScan BlockScan; Pair threadData; __shared__ typename BlockScan::TempStorage temp_storage; @@ -140,31 +140,31 @@ __global__ void cubScanByKeyL2(bst_gpair* mScans, int* mKeys, int mLength) { } template -__global__ void cubScanByKeyL3(bst_gpair* sums, bst_gpair* scans, - const bst_gpair* vals, const int* instIds, - const bst_gpair* mScans, const int* mKeys, - const node_id_t* keys, int nUniqKeys, - const int* colIds, node_id_t nodeStart, +__global__ void cubScanByKeyL3(GradientPair* sums, GradientPair* scans, + const GradientPair* vals, const int* instIds, + const GradientPair* mScans, const int* mKeys, + const NodeIdT* keys, int nUniqKeys, + const int* colIds, NodeIdT nodeStart, const int size) { int relId = threadIdx.x; int tid = (blockIdx.x * BLKDIM_L1L3) + relId; // to avoid the following warning from nvcc: // __shared__ memory variable with non-empty constructor or destructor // (potential race between threads) - __shared__ char gradBuff[sizeof(bst_gpair)]; + __shared__ char gradBuff[sizeof(GradientPair)]; __shared__ int s_mKeys; - bst_gpair* s_mScans = reinterpret_cast(gradBuff); + GradientPair* s_mScans = reinterpret_cast(gradBuff); if (tid >= size) return; // cache block-wide partial scan info if (relId == 0) { - s_mKeys = (blockIdx.x > 0) ? mKeys[blockIdx.x - 1] : NONE_KEY; - s_mScans[0] = (blockIdx.x > 0) ? mScans[blockIdx.x - 1] : bst_gpair(); + s_mKeys = (blockIdx.x > 0) ? mKeys[blockIdx.x - 1] : kNoneKey; + s_mScans[0] = (blockIdx.x > 0) ? mScans[blockIdx.x - 1] : GradientPair(); } int myKey = abs2uniqKey(tid, keys, colIds, nodeStart, nUniqKeys); int previousKey = - tid == 0 ? NONE_KEY + tid == 0 ? kNoneKey : abs2uniqKey(tid - 1, keys, colIds, nodeStart, nUniqKeys); - bst_gpair myValue = scans[tid]; + GradientPair myValue = scans[tid]; __syncthreads(); if (blockIdx.x > 0 && s_mKeys == previousKey) { myValue += s_mScans[0]; @@ -174,7 +174,7 @@ __global__ void cubScanByKeyL3(bst_gpair* sums, bst_gpair* scans, } if ((previousKey != myKey) && (previousKey >= 0)) { sums[previousKey] = myValue; - myValue = bst_gpair(0.0f, 0.0f); + myValue = GradientPair(0.0f, 0.0f); } scans[tid] = myValue; } @@ -200,12 +200,12 @@ __global__ void cubScanByKeyL3(bst_gpair* sums, bst_gpair* scans, * @param nodeStart index of the leftmost node in the current level */ template -void reduceScanByKey(bst_gpair* sums, bst_gpair* scans, const bst_gpair* vals, - const int* instIds, const node_id_t* keys, int size, - int nUniqKeys, int nCols, bst_gpair* tmpScans, - int* tmpKeys, const int* colIds, node_id_t nodeStart) { - int nBlks = dh::div_round_up(size, BLKDIM_L1L3); - cudaMemset(sums, 0, nUniqKeys * nCols * sizeof(bst_gpair)); +void reduceScanByKey(GradientPair* sums, GradientPair* scans, const GradientPair* vals, + const int* instIds, const NodeIdT* keys, int size, + int nUniqKeys, int nCols, GradientPair* tmpScans, + int* tmpKeys, const int* colIds, NodeIdT nodeStart) { + int nBlks = dh::DivRoundUp(size, BLKDIM_L1L3); + cudaMemset(sums, 0, nUniqKeys * nCols * sizeof(GradientPair)); cubScanByKeyL1 <<>>(scans, vals, instIds, tmpScans, tmpKeys, keys, nUniqKeys, colIds, nodeStart, size); @@ -243,13 +243,13 @@ struct ExactSplitCandidate { */ enum ArgMaxByKeyAlgo { /** simplest, use gmem-atomics for all updates */ - ABK_GMEM = 0, + kAbkGmem = 0, /** use smem-atomics for updates (when number of keys are less) */ - ABK_SMEM + kAbkSmem }; /** max depth until which to use shared mem based atomics for argmax */ -static const int MAX_ABK_LEVELS = 3; +static const int kMaxAbkLevels = 3; HOST_DEV_INLINE ExactSplitCandidate maxSplit(ExactSplitCandidate a, ExactSplitCandidate b) { @@ -281,27 +281,27 @@ DEV_INLINE void atomicArgMax(ExactSplitCandidate* address, } DEV_INLINE void argMaxWithAtomics( - int id, ExactSplitCandidate* nodeSplits, const bst_gpair* gradScans, - const bst_gpair* gradSums, const float* vals, const int* colIds, - const node_id_t* nodeAssigns, const DeviceNodeStats* nodes, int nUniqKeys, - node_id_t nodeStart, int len, const GPUTrainingParam& param) { + int id, ExactSplitCandidate* nodeSplits, const GradientPair* gradScans, + const GradientPair* gradSums, const float* vals, const int* colIds, + const NodeIdT* nodeAssigns, const DeviceNodeStats* nodes, int nUniqKeys, + NodeIdT nodeStart, int len, const GPUTrainingParam& param) { int nodeId = nodeAssigns[id]; // @todo: this is really a bad check! but will be fixed when we move // to key-based reduction if ((id == 0) || !((nodeId == nodeAssigns[id - 1]) && (colIds[id] == colIds[id - 1]) && (vals[id] == vals[id - 1]))) { - if (nodeId != UNUSED_NODE) { + if (nodeId != kUnusedNode) { int sumId = abs2uniqKey(id, nodeAssigns, colIds, nodeStart, nUniqKeys); - bst_gpair colSum = gradSums[sumId]; + GradientPair colSum = gradSums[sumId]; int uid = nodeId - nodeStart; DeviceNodeStats n = nodes[nodeId]; - bst_gpair parentSum = n.sum_gradients; + GradientPair parentSum = n.sum_gradients; float parentGain = n.root_gain; bool tmp; ExactSplitCandidate s; - bst_gpair missing = parentSum - colSum; - s.score = loss_chg_missing(gradScans[id], missing, parentSum, parentGain, + GradientPair missing = parentSum - colSum; + s.score = LossChangeMissing(gradScans[id], missing, parentSum, parentGain, param, tmp); s.index = id; atomicArgMax(nodeSplits + uid, s); @@ -310,10 +310,10 @@ DEV_INLINE void argMaxWithAtomics( } __global__ void atomicArgMaxByKeyGmem( - ExactSplitCandidate* nodeSplits, const bst_gpair* gradScans, - const bst_gpair* gradSums, const float* vals, const int* colIds, - const node_id_t* nodeAssigns, const DeviceNodeStats* nodes, int nUniqKeys, - node_id_t nodeStart, int len, const TrainParam param) { + ExactSplitCandidate* nodeSplits, const GradientPair* gradScans, + const GradientPair* gradSums, const float* vals, const int* colIds, + const NodeIdT* nodeAssigns, const DeviceNodeStats* nodes, int nUniqKeys, + NodeIdT nodeStart, int len, const TrainParam param) { int id = threadIdx.x + (blockIdx.x * blockDim.x); const int stride = blockDim.x * gridDim.x; for (; id < len; id += stride) { @@ -324,10 +324,10 @@ __global__ void atomicArgMaxByKeyGmem( } __global__ void atomicArgMaxByKeySmem( - ExactSplitCandidate* nodeSplits, const bst_gpair* gradScans, - const bst_gpair* gradSums, const float* vals, const int* colIds, - const node_id_t* nodeAssigns, const DeviceNodeStats* nodes, int nUniqKeys, - node_id_t nodeStart, int len, const TrainParam param) { + ExactSplitCandidate* nodeSplits, const GradientPair* gradScans, + const GradientPair* gradSums, const float* vals, const int* colIds, + const NodeIdT* nodeAssigns, const DeviceNodeStats* nodes, int nUniqKeys, + NodeIdT nodeStart, int len, const GPUTrainingParam param) { extern __shared__ char sArr[]; ExactSplitCandidate* sNodeSplits = reinterpret_cast(sArr); @@ -368,27 +368,27 @@ __global__ void atomicArgMaxByKeySmem( * @param algo which algorithm to use for argmax_by_key */ template -void argMaxByKey(ExactSplitCandidate* nodeSplits, const bst_gpair* gradScans, - const bst_gpair* gradSums, const float* vals, - const int* colIds, const node_id_t* nodeAssigns, +void argMaxByKey(ExactSplitCandidate* nodeSplits, const GradientPair* gradScans, + const GradientPair* gradSums, const float* vals, + const int* colIds, const NodeIdT* nodeAssigns, const DeviceNodeStats* nodes, int nUniqKeys, - node_id_t nodeStart, int len, const TrainParam param, + NodeIdT nodeStart, int len, const TrainParam param, ArgMaxByKeyAlgo algo) { - dh::fillConst( - dh::get_device_idx(param.gpu_id), nodeSplits, nUniqKeys, + dh::FillConst( + dh::GetDeviceIdx(param.gpu_id), nodeSplits, nUniqKeys, ExactSplitCandidate()); - int nBlks = dh::div_round_up(len, ITEMS_PER_THREAD * BLKDIM); + int nBlks = dh::DivRoundUp(len, ITEMS_PER_THREAD * BLKDIM); switch (algo) { - case ABK_GMEM: + case kAbkGmem: atomicArgMaxByKeyGmem<<>>( nodeSplits, gradScans, gradSums, vals, colIds, nodeAssigns, nodes, nUniqKeys, nodeStart, len, param); break; - case ABK_SMEM: + case kAbkSmem: atomicArgMaxByKeySmem<<>>( nodeSplits, gradScans, gradSums, vals, colIds, nodeAssigns, nodes, - nUniqKeys, nodeStart, len, param); + nUniqKeys, nodeStart, len, GPUTrainingParam(param)); break; default: throw std::runtime_error("argMaxByKey: Bad algo passed!"); @@ -404,22 +404,22 @@ __global__ void assignColIds(int* colIds, const int* colOffsets) { } } -__global__ void fillDefaultNodeIds(node_id_t* nodeIdsPerInst, +__global__ void fillDefaultNodeIds(NodeIdT* nodeIdsPerInst, const DeviceNodeStats* nodes, int nRows) { int id = threadIdx.x + (blockIdx.x * blockDim.x); if (id >= nRows) { return; } // if this element belongs to none of the currently active node-id's - node_id_t nId = nodeIdsPerInst[id]; - if (nId == UNUSED_NODE) { + NodeIdT nId = nodeIdsPerInst[id]; + if (nId == kUnusedNode) { return; } const DeviceNodeStats n = nodes[nId]; - node_id_t result; + NodeIdT result; if (n.IsLeaf() || n.IsUnused()) { - result = UNUSED_NODE; - } else if (n.dir == LeftDir) { + result = kUnusedNode; + } else if (n.dir == kLeftDir) { result = (2 * n.idx) + 1; } else { result = (2 * n.idx) + 2; @@ -427,8 +427,8 @@ __global__ void fillDefaultNodeIds(node_id_t* nodeIdsPerInst, nodeIdsPerInst[id] = result; } -__global__ void assignNodeIds(node_id_t* nodeIdsPerInst, int* nodeLocations, - const node_id_t* nodeIds, const int* instId, +__global__ void assignNodeIds(NodeIdT* nodeIdsPerInst, int* nodeLocations, + const NodeIdT* nodeIds, const int* instId, const DeviceNodeStats* nodes, const int* colOffsets, const float* vals, int nVals, int nCols) { @@ -441,7 +441,7 @@ __global__ void assignNodeIds(node_id_t* nodeIdsPerInst, int* nodeLocations, // the nodeIdsPerInst with all default assignments int nId = nodeIds[id]; // if this element belongs to none of the currently active node-id's - if (nId != UNUSED_NODE) { + if (nId != kUnusedNode) { const DeviceNodeStats n = nodes[nId]; int colId = n.fidx; // printf("nid=%d colId=%d id=%d\n", nId, colId, id); @@ -449,7 +449,7 @@ __global__ void assignNodeIds(node_id_t* nodeIdsPerInst, int* nodeLocations, int end = colOffsets[colId + 1]; // @todo: too much wasteful threads!! if ((id >= start) && (id < end) && !(n.IsLeaf() || n.IsUnused())) { - node_id_t result = (2 * n.idx) + 1 + (vals[id] >= n.fvalue); + NodeIdT result = (2 * n.idx) + 1 + (vals[id] >= n.fvalue); nodeIdsPerInst[instId[id]] = result; } } @@ -475,31 +475,31 @@ class GPUMaker : public TreeUpdater { /** whether we have initialized memory already (so as not to repeat!) */ bool allocated; /** feature values stored in column-major compressed format */ - dh::dvec2 vals; - dh::dvec vals_cached; + dh::DVec2 vals; + dh::DVec vals_cached; /** corresponding instance id's of these featutre values */ - dh::dvec2 instIds; - dh::dvec instIds_cached; + dh::DVec2 instIds; + dh::DVec instIds_cached; /** column offsets for these feature values */ - dh::dvec colOffsets; - dh::dvec gradsInst; - dh::dvec2 nodeAssigns; - dh::dvec2 nodeLocations; - dh::dvec nodes; - dh::dvec nodeAssignsPerInst; - dh::dvec gradSums; - dh::dvec gradScans; - dh::dvec nodeSplits; + dh::DVec colOffsets; + dh::DVec gradsInst; + dh::DVec2 nodeAssigns; + dh::DVec2 nodeLocations; + dh::DVec nodes; + dh::DVec nodeAssignsPerInst; + dh::DVec gradSums; + dh::DVec gradScans; + dh::DVec nodeSplits; int nVals; int nRows; int nCols; int maxNodes; int maxLeaves; dh::CubMemory tmp_mem; - dh::dvec tmpScanGradBuff; - dh::dvec tmpScanKeyBuff; - dh::dvec colIds; - dh::bulk_allocator ba; + dh::DVec tmpScanGradBuff; + dh::DVec tmpScanKeyBuff; + dh::DVec colIds; + dh::BulkAllocator ba; public: GPUMaker() : allocated(false) {} @@ -512,9 +512,9 @@ class GPUMaker : public TreeUpdater { maxLeaves = 1 << param.max_depth; } - void Update(HostDeviceVector* gpair, DMatrix* dmat, + void Update(HostDeviceVector* gpair, DMatrix* dmat, const std::vector& trees) override { - GradStats::CheckInfo(dmat->info()); + GradStats::CheckInfo(dmat->Info()); // rescale learning rate according to size of trees float lr = param.learning_rate; param.learning_rate = lr / trees.size(); @@ -530,7 +530,7 @@ class GPUMaker : public TreeUpdater { param.learning_rate = lr; } /// @note: Update should be only after Init!! - void UpdateTree(HostDeviceVector* gpair, DMatrix* dmat, + void UpdateTree(HostDeviceVector* gpair, DMatrix* dmat, RegTree* hTree) { if (!allocated) { setupOneTimeData(dmat); @@ -538,33 +538,33 @@ class GPUMaker : public TreeUpdater { for (int i = 0; i < param.max_depth; ++i) { if (i == 0) { // make sure to start on a fresh tree with sorted values! - vals.current_dvec() = vals_cached; - instIds.current_dvec() = instIds_cached; + vals.CurrentDVec() = vals_cached; + instIds.CurrentDVec() = instIds_cached; transferGrads(gpair); } int nNodes = 1 << i; - node_id_t nodeStart = nNodes - 1; + NodeIdT nodeStart = nNodes - 1; initNodeData(i, nodeStart, nNodes); findSplit(i, nodeStart, nNodes); } // mark all the used nodes with unused children as leaf nodes markLeaves(); - dense2sparse_tree(hTree, nodes, param); + Dense2SparseTree(hTree, nodes, param); } - void split2node(int nNodes, node_id_t nodeStart) { - auto d_nodes = nodes.data(); - auto d_gradScans = gradScans.data(); - auto d_gradSums = gradSums.data(); - auto d_nodeAssigns = nodeAssigns.current(); - auto d_colIds = colIds.data(); - auto d_vals = vals.current(); - auto d_nodeSplits = nodeSplits.data(); + void split2node(int nNodes, NodeIdT nodeStart) { + auto d_nodes = nodes.Data(); + auto d_gradScans = gradScans.Data(); + auto d_gradSums = gradSums.Data(); + auto d_nodeAssigns = nodeAssigns.Current(); + auto d_colIds = colIds.Data(); + auto d_vals = vals.Current(); + auto d_nodeSplits = nodeSplits.Data(); int nUniqKeys = nNodes; float min_split_loss = param.min_split_loss; auto gpu_param = GPUTrainingParam(param); - dh::launch_n(param.gpu_id, nNodes, [=] __device__(int uid) { + dh::LaunchN(param.gpu_id, nNodes, [=] __device__(int uid) { int absNodeId = uid + nodeStart; ExactSplitCandidate s = d_nodeSplits[uid]; if (s.isSplittable(min_split_loss)) { @@ -573,26 +573,26 @@ class GPUMaker : public TreeUpdater { abs2uniqKey(idx, d_nodeAssigns, d_colIds, nodeStart, nUniqKeys); bool missingLeft = true; const DeviceNodeStats& n = d_nodes[absNodeId]; - bst_gpair gradScan = d_gradScans[idx]; - bst_gpair gradSum = d_gradSums[nodeInstId]; + GradientPair gradScan = d_gradScans[idx]; + GradientPair gradSum = d_gradSums[nodeInstId]; float thresh = d_vals[idx]; int colId = d_colIds[idx]; // get the default direction for the current node - bst_gpair missing = n.sum_gradients - gradSum; - loss_chg_missing(gradScan, missing, n.sum_gradients, n.root_gain, + GradientPair missing = n.sum_gradients - gradSum; + LossChangeMissing(gradScan, missing, n.sum_gradients, n.root_gain, gpu_param, missingLeft); // get the score/weight/id/gradSum for left and right child nodes - bst_gpair lGradSum = missingLeft ? gradScan + missing : gradScan; - bst_gpair rGradSum = n.sum_gradients - lGradSum; + GradientPair lGradSum = missingLeft ? gradScan + missing : gradScan; + GradientPair rGradSum = n.sum_gradients - lGradSum; // Create children - d_nodes[left_child_nidx(absNodeId)] = - DeviceNodeStats(lGradSum, left_child_nidx(absNodeId), gpu_param); - d_nodes[right_child_nidx(absNodeId)] = - DeviceNodeStats(rGradSum, right_child_nidx(absNodeId), gpu_param); + d_nodes[LeftChildNodeIdx(absNodeId)] = + DeviceNodeStats(lGradSum, LeftChildNodeIdx(absNodeId), gpu_param); + d_nodes[RightChildNodeIdx(absNodeId)] = + DeviceNodeStats(rGradSum, RightChildNodeIdx(absNodeId), gpu_param); // Set split for parent d_nodes[absNodeId].SetSplit(thresh, colId, - missingLeft ? LeftDir : RightDir, lGradSum, + missingLeft ? kLeftDir : kRightDir, lGradSum, rGradSum); } else { // cannot be split further, so this node is a leaf! @@ -601,21 +601,21 @@ class GPUMaker : public TreeUpdater { }); } - void findSplit(int level, node_id_t nodeStart, int nNodes) { - reduceScanByKey(gradSums.data(), gradScans.data(), gradsInst.data(), - instIds.current(), nodeAssigns.current(), nVals, nNodes, - nCols, tmpScanGradBuff.data(), tmpScanKeyBuff.data(), - colIds.data(), nodeStart); - argMaxByKey(nodeSplits.data(), gradScans.data(), gradSums.data(), - vals.current(), colIds.data(), nodeAssigns.current(), - nodes.data(), nNodes, nodeStart, nVals, param, - level <= MAX_ABK_LEVELS ? ABK_SMEM : ABK_GMEM); + void findSplit(int level, NodeIdT nodeStart, int nNodes) { + reduceScanByKey(gradSums.Data(), gradScans.Data(), gradsInst.Data(), + instIds.Current(), nodeAssigns.Current(), nVals, nNodes, + nCols, tmpScanGradBuff.Data(), tmpScanKeyBuff.Data(), + colIds.Data(), nodeStart); + argMaxByKey(nodeSplits.Data(), gradScans.Data(), gradSums.Data(), + vals.Current(), colIds.Data(), nodeAssigns.Current(), + nodes.Data(), nNodes, nodeStart, nVals, param, + level <= kMaxAbkLevels ? kAbkSmem : kAbkGmem); split2node(nNodes, nodeStart); } void allocateAllData(int offsetSize) { - int tmpBuffSize = scanTempBufferSize(nVals); - ba.allocate(dh::get_device_idx(param.gpu_id), param.silent, &vals, nVals, + int tmpBuffSize = ScanTempBufferSize(nVals); + ba.Allocate(dh::GetDeviceIdx(param.gpu_id), param.silent, &vals, nVals, &vals_cached, nVals, &instIds, nVals, &instIds_cached, nVals, &colOffsets, offsetSize, &gradsInst, nRows, &nodeAssigns, nVals, &nodeLocations, nVals, &nodes, maxNodes, &nodeAssignsPerInst, @@ -625,7 +625,7 @@ class GPUMaker : public TreeUpdater { } void setupOneTimeData(DMatrix* dmat) { - size_t free_memory = dh::available_memory(dh::get_device_idx(param.gpu_id)); + size_t free_memory = dh::AvailableMemory(dh::GetDeviceIdx(param.gpu_id)); if (!dmat->SingleColBlock()) { throw std::runtime_error("exact::GPUBuilder - must have 1 column block"); } @@ -640,11 +640,11 @@ class GPUMaker : public TreeUpdater { void convertToCsc(DMatrix* dmat, std::vector* fval, std::vector* fId, std::vector* offset) { - MetaInfo info = dmat->info(); - CHECK(info.num_col < std::numeric_limits::max()); - CHECK(info.num_row < std::numeric_limits::max()); - nRows = static_cast(info.num_row); - nCols = static_cast(info.num_col); + MetaInfo info = dmat->Info(); + CHECK(info.num_col_ < std::numeric_limits::max()); + CHECK(info.num_row_ < std::numeric_limits::max()); + nRows = static_cast(info.num_row_); + nCols = static_cast(info.num_col_); offset->reserve(nCols + 1); offset->push_back(0); fval->reserve(nCols * nRows); @@ -677,56 +677,56 @@ class GPUMaker : public TreeUpdater { void transferAndSortData(const std::vector& fval, const std::vector& fId, const std::vector& offset) { - vals.current_dvec() = fval; - instIds.current_dvec() = fId; + vals.CurrentDVec() = fval; + instIds.CurrentDVec() = fId; colOffsets = offset; - dh::segmentedSort(&tmp_mem, &vals, &instIds, nVals, nCols, + dh::SegmentedSort(&tmp_mem, &vals, &instIds, nVals, nCols, colOffsets); - vals_cached = vals.current_dvec(); - instIds_cached = instIds.current_dvec(); - assignColIds<<>>(colIds.data(), colOffsets.data()); + vals_cached = vals.CurrentDVec(); + instIds_cached = instIds.CurrentDVec(); + assignColIds<<>>(colIds.Data(), colOffsets.Data()); } - void transferGrads(HostDeviceVector* gpair) { + void transferGrads(HostDeviceVector* gpair) { // HACK - dh::safe_cuda(cudaMemcpy(gradsInst.data(), gpair->ptr_d(param.gpu_id), - sizeof(bst_gpair) * nRows, + dh::safe_cuda(cudaMemcpy(gradsInst.Data(), gpair->DevicePointer(param.gpu_id), + sizeof(GradientPair) * nRows, cudaMemcpyDefault)); // evaluate the full-grad reduction for the root node - dh::sumReduction(tmp_mem, gradsInst, gradSums, nRows); + dh::SumReduction(tmp_mem, gradsInst, gradSums, nRows); } - void initNodeData(int level, node_id_t nodeStart, int nNodes) { + void initNodeData(int level, NodeIdT nodeStart, int nNodes) { // all instances belong to root node at the beginning! if (level == 0) { - nodes.fill(DeviceNodeStats()); - nodeAssigns.current_dvec().fill(0); - nodeAssignsPerInst.fill(0); + nodes.Fill(DeviceNodeStats()); + nodeAssigns.CurrentDVec().Fill(0); + nodeAssignsPerInst.Fill(0); // for root node, just update the gradient/score/weight/id info // before splitting it! Currently all data is on GPU, hence this // stupid little kernel - auto d_nodes = nodes.data(); - auto d_sums = gradSums.data(); + auto d_nodes = nodes.Data(); + auto d_sums = gradSums.Data(); auto gpu_params = GPUTrainingParam(param); - dh::launch_n(param.gpu_id, 1, [=] __device__(int idx) { + dh::LaunchN(param.gpu_id, 1, [=] __device__(int idx) { d_nodes[0] = DeviceNodeStats(d_sums[0], 0, gpu_params); }); } else { const int BlkDim = 256; const int ItemsPerThread = 4; // assign default node ids first - int nBlks = dh::div_round_up(nRows, BlkDim); - fillDefaultNodeIds<<>>(nodeAssignsPerInst.data(), - nodes.data(), nRows); + int nBlks = dh::DivRoundUp(nRows, BlkDim); + fillDefaultNodeIds<<>>(nodeAssignsPerInst.Data(), + nodes.Data(), nRows); // evaluate the correct child indices of non-missing values next - nBlks = dh::div_round_up(nVals, BlkDim * ItemsPerThread); + nBlks = dh::DivRoundUp(nVals, BlkDim * ItemsPerThread); assignNodeIds<<>>( - nodeAssignsPerInst.data(), nodeLocations.current(), - nodeAssigns.current(), instIds.current(), nodes.data(), - colOffsets.data(), vals.current(), nVals, nCols); + nodeAssignsPerInst.Data(), nodeLocations.Current(), + nodeAssigns.Current(), instIds.Current(), nodes.Data(), + colOffsets.Data(), vals.Current(), nVals, nCols); // gather the node assignments across all other columns too - dh::gather(dh::get_device_idx(param.gpu_id), nodeAssigns.current(), - nodeAssignsPerInst.data(), instIds.current(), nVals); + dh::Gather(dh::GetDeviceIdx(param.gpu_id), nodeAssigns.Current(), + nodeAssignsPerInst.Data(), instIds.Current(), nVals); sortKeys(level); } } @@ -734,19 +734,19 @@ class GPUMaker : public TreeUpdater { void sortKeys(int level) { // segmented-sort the arrays based on node-id's // but we don't need more than level+1 bits for sorting! - segmentedSort(&tmp_mem, &nodeAssigns, &nodeLocations, nVals, nCols, + SegmentedSort(&tmp_mem, &nodeAssigns, &nodeLocations, nVals, nCols, colOffsets, 0, level + 1); - dh::gather(dh::get_device_idx(param.gpu_id), vals.other(), - vals.current(), instIds.other(), instIds.current(), - nodeLocations.current(), nVals); + dh::Gather(dh::GetDeviceIdx(param.gpu_id), vals.other(), + vals.Current(), instIds.other(), instIds.Current(), + nodeLocations.Current(), nVals); vals.buff().selector ^= 1; instIds.buff().selector ^= 1; } void markLeaves() { const int BlkDim = 128; - int nBlks = dh::div_round_up(maxNodes, BlkDim); - markLeavesKernel<<>>(nodes.data(), maxNodes); + int nBlks = dh::DivRoundUp(maxNodes, BlkDim); + markLeavesKernel<<>>(nodes.Data(), maxNodes); } }; diff --git a/src/tree/updater_gpu_common.cuh b/src/tree/updater_gpu_common.cuh index 63d5f98ef..9cf490ac7 100644 --- a/src/tree/updater_gpu_common.cuh +++ b/src/tree/updater_gpu_common.cuh @@ -15,7 +15,7 @@ #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 #else -__device__ __forceinline__ double atomicAdd(double* address, double val) { +XGBOOST_DEVICE __forceinline__ double atomicAdd(double* address, double val) { unsigned long long int* address_as_ull = (unsigned long long int*)address; // NOLINT unsigned long long int old = *address_as_ull, assumed; // NOLINT @@ -37,8 +37,8 @@ namespace xgboost { namespace tree { // Atomic add function for double precision gradients -__device__ __forceinline__ void AtomicAddGpair(bst_gpair_precise* dest, - const bst_gpair& gpair) { +__device__ __forceinline__ void AtomicAddGpair(GradientPairPrecise* dest, + const GradientPair& gpair) { auto dst_ptr = reinterpret_cast(dest); atomicAdd(dst_ptr, static_cast(gpair.GetGrad())); @@ -46,11 +46,11 @@ __device__ __forceinline__ void AtomicAddGpair(bst_gpair_precise* dest, } // For integer gradients -__device__ __forceinline__ void AtomicAddGpair(bst_gpair_integer* dest, - const bst_gpair& gpair) { +__device__ __forceinline__ void AtomicAddGpair(GradientPairInteger* dest, + const GradientPair& gpair) { auto dst_ptr = reinterpret_cast(dest); // NOLINT - bst_gpair_integer tmp(gpair.GetGrad(), gpair.GetHess()); - auto src_ptr = reinterpret_cast(&tmp); + GradientPairInteger tmp(gpair.GetGrad(), gpair.GetHess()); + auto src_ptr = reinterpret_cast(&tmp); atomicAdd(dst_ptr, static_cast(*src_ptr)); // NOLINT @@ -59,13 +59,11 @@ __device__ __forceinline__ void AtomicAddGpair(bst_gpair_integer* dest, } /** - * \fn void CheckGradientMax(const dh::dvec& gpair) - * * \brief Check maximum gradient value is below 2^16. This is to prevent * overflow when using integer gradient summation. */ -inline void CheckGradientMax(const std::vector& gpair) { +inline void CheckGradientMax(const std::vector& gpair) { auto* ptr = reinterpret_cast(gpair.data()); float abs_max = std::accumulate(ptr, ptr + (gpair.size() * 2), 0.f, @@ -87,19 +85,19 @@ struct GPUTrainingParam { // default=0 means no constraint on weight delta float max_delta_step; - __host__ __device__ GPUTrainingParam() {} + GPUTrainingParam() = default; - __host__ __device__ GPUTrainingParam(const TrainParam& param) + XGBOOST_DEVICE explicit GPUTrainingParam(const TrainParam& param) : min_child_weight(param.min_child_weight), reg_lambda(param.reg_lambda), reg_alpha(param.reg_alpha), max_delta_step(param.max_delta_step) {} }; -typedef int node_id_t; +using NodeIdT = int; /** used to assign default id to a Node */ -static const int UNUSED_NODE = -1; +static const int kUnusedNode = -1; /** * @enum DefaultDirection node.cuh @@ -107,9 +105,9 @@ static const int UNUSED_NODE = -1; */ enum DefaultDirection { /** move to left child */ - LeftDir = 0, + kLeftDir = 0, /** move to right child */ - RightDir + kRightDir }; struct DeviceSplitCandidate { @@ -117,15 +115,15 @@ struct DeviceSplitCandidate { DefaultDirection dir; float fvalue; int findex; - bst_gpair_integer left_sum; - bst_gpair_integer right_sum; + GradientPair left_sum; + GradientPair right_sum; - __host__ __device__ DeviceSplitCandidate() - : loss_chg(-FLT_MAX), dir(LeftDir), fvalue(0), findex(-1) {} + XGBOOST_DEVICE DeviceSplitCandidate() + : loss_chg(-FLT_MAX), dir(kLeftDir), fvalue(0), findex(-1) {} - template - __host__ __device__ void Update(const DeviceSplitCandidate& other, - const param_t& param) { + template + XGBOOST_DEVICE void Update(const DeviceSplitCandidate& other, + const ParamT& param) { if (other.loss_chg > loss_chg && other.left_sum.GetHess() >= param.min_child_weight && other.right_sum.GetHess() >= param.min_child_weight) { @@ -133,10 +131,10 @@ struct DeviceSplitCandidate { } } - __device__ void Update(float loss_chg_in, DefaultDirection dir_in, + XGBOOST_DEVICE void Update(float loss_chg_in, DefaultDirection dir_in, float fvalue_in, int findex_in, - bst_gpair_integer left_sum_in, - bst_gpair_integer right_sum_in, + GradientPair left_sum_in, + GradientPair right_sum_in, const GPUTrainingParam& param) { if (loss_chg_in > loss_chg && left_sum_in.GetHess() >= param.min_child_weight && @@ -149,11 +147,11 @@ struct DeviceSplitCandidate { findex = findex_in; } } - __device__ bool IsValid() const { return loss_chg > 0.0f; } + XGBOOST_DEVICE bool IsValid() const { return loss_chg > 0.0f; } }; struct DeviceNodeStats { - bst_gpair sum_gradients; + GradientPair sum_gradients; float root_gain; float weight; @@ -161,31 +159,31 @@ struct DeviceNodeStats { DefaultDirection dir; /** threshold value for comparison */ float fvalue; - bst_gpair left_sum; - bst_gpair right_sum; + GradientPair left_sum; + GradientPair right_sum; /** \brief The feature index. */ int fidx; /** node id (used as key for reduce/scan) */ - node_id_t idx; + NodeIdT idx; HOST_DEV_INLINE DeviceNodeStats() : sum_gradients(), root_gain(-FLT_MAX), weight(-FLT_MAX), - dir(LeftDir), + dir(kLeftDir), fvalue(0.f), left_sum(), right_sum(), - fidx(UNUSED_NODE), - idx(UNUSED_NODE) {} + fidx(kUnusedNode), + idx(kUnusedNode) {} - template - HOST_DEV_INLINE DeviceNodeStats(bst_gpair sum_gradients, node_id_t nidx, - const param_t& param) + template + HOST_DEV_INLINE DeviceNodeStats(GradientPair sum_gradients, NodeIdT nidx, + const ParamT& param) : sum_gradients(sum_gradients), - dir(LeftDir), + dir(kLeftDir), fvalue(0.f), - fidx(UNUSED_NODE), + fidx(kUnusedNode), idx(nidx) { this->root_gain = CalcGain(param, sum_gradients.GetGrad(), sum_gradients.GetHess()); @@ -194,7 +192,7 @@ struct DeviceNodeStats { } HOST_DEV_INLINE void SetSplit(float fvalue, int fidx, DefaultDirection dir, - bst_gpair left_sum, bst_gpair right_sum) { + GradientPair left_sum, GradientPair right_sum) { this->fvalue = fvalue; this->fidx = fidx; this->dir = dir; @@ -208,11 +206,11 @@ struct DeviceNodeStats { } /** Tells whether this node is part of the decision tree */ - HOST_DEV_INLINE bool IsUnused() const { return (idx == UNUSED_NODE); } + HOST_DEV_INLINE bool IsUnused() const { return (idx == kUnusedNode); } /** Tells whether this node is a leaf of the decision tree */ HOST_DEV_INLINE bool IsLeaf() const { - return (!IsUnused() && (fidx == UNUSED_NODE)); + return (!IsUnused() && (fidx == kUnusedNode)); } }; @@ -221,37 +219,37 @@ struct SumCallbackOp { // Running prefix T running_total; // Constructor - __device__ SumCallbackOp() : running_total(T()) {} - __device__ T operator()(T block_aggregate) { + XGBOOST_DEVICE SumCallbackOp() : running_total(T()) {} + XGBOOST_DEVICE T operator()(T block_aggregate) { T old_prefix = running_total; running_total += block_aggregate; return old_prefix; } }; -template -__device__ inline float device_calc_loss_chg(const GPUTrainingParam& param, - const gpair_t& left, - const gpair_t& parent_sum, +template +XGBOOST_DEVICE inline float DeviceCalcLossChange(const GPUTrainingParam& param, + const GradientPairT& left, + const GradientPairT& parent_sum, const float& parent_gain) { - gpair_t right = parent_sum - left; + GradientPairT right = parent_sum - left; float left_gain = CalcGain(param, left.GetGrad(), left.GetHess()); float right_gain = CalcGain(param, right.GetGrad(), right.GetHess()); return left_gain + right_gain - parent_gain; } // Without constraints -template -__device__ float inline loss_chg_missing(const gpair_t& scan, - const gpair_t& missing, - const gpair_t& parent_sum, +template +XGBOOST_DEVICE float inline LossChangeMissing(const GradientPairT& scan, + const GradientPairT& missing, + const GradientPairT& parent_sum, const float& parent_gain, const GPUTrainingParam& param, bool& missing_left_out) { // NOLINT float missing_left_loss = - device_calc_loss_chg(param, scan + missing, parent_sum, parent_gain); + DeviceCalcLossChange(param, scan + missing, parent_sum, parent_gain); float missing_right_loss = - device_calc_loss_chg(param, scan, parent_sum, parent_gain); + DeviceCalcLossChange(param, scan, parent_sum, parent_gain); if (missing_left_loss >= missing_right_loss) { missing_left_out = true; @@ -263,9 +261,9 @@ __device__ float inline loss_chg_missing(const gpair_t& scan, } // With constraints -template -__device__ float inline loss_chg_missing( - const gpair_t& scan, const gpair_t& missing, const gpair_t& parent_sum, +template +XGBOOST_DEVICE float inline LossChangeMissing( + const GradientPairT& scan, const GradientPairT& missing, const GradientPairT& parent_sum, const float& parent_gain, const GPUTrainingParam& param, int constraint, const ValueConstraint& value_constraint, bool& missing_left_out) { // NOLINT @@ -285,54 +283,54 @@ __device__ float inline loss_chg_missing( } // Total number of nodes in tree, given depth -__host__ __device__ inline int n_nodes(int depth) { +XGBOOST_DEVICE inline int MaxNodesDepth(int depth) { return (1 << (depth + 1)) - 1; } // Number of nodes at this level of the tree -__host__ __device__ inline int n_nodes_level(int depth) { return 1 << depth; } +XGBOOST_DEVICE inline int MaxNodesLevel(int depth) { return 1 << depth; } // Whether a node is currently being processed at current depth -__host__ __device__ inline bool is_active(int nidx, int depth) { - return nidx >= n_nodes(depth - 1); +XGBOOST_DEVICE inline bool IsNodeActive(int nidx, int depth) { + return nidx >= MaxNodesDepth(depth - 1); } -__host__ __device__ inline int parent_nidx(int nidx) { return (nidx - 1) / 2; } +XGBOOST_DEVICE inline int ParentNodeIdx(int nidx) { return (nidx - 1) / 2; } -__host__ __device__ inline int left_child_nidx(int nidx) { +XGBOOST_DEVICE inline int LeftChildNodeIdx(int nidx) { return nidx * 2 + 1; } -__host__ __device__ inline int right_child_nidx(int nidx) { +XGBOOST_DEVICE inline int RightChildNodeIdx(int nidx) { return nidx * 2 + 2; } -__host__ __device__ inline bool is_left_child(int nidx) { +XGBOOST_DEVICE inline bool IsLeftChild(int nidx) { return nidx % 2 == 1; } // Copy gpu dense representation of tree to xgboost sparse representation -inline void dense2sparse_tree(RegTree* p_tree, - const dh::dvec& nodes, +inline void Dense2SparseTree(RegTree* p_tree, + const dh::DVec& nodes, const TrainParam& param) { RegTree& tree = *p_tree; - std::vector h_nodes = nodes.as_vector(); + std::vector h_nodes = nodes.AsVector(); int nid = 0; for (int gpu_nid = 0; gpu_nid < h_nodes.size(); gpu_nid++) { const DeviceNodeStats& n = h_nodes[gpu_nid]; if (!n.IsUnused() && !n.IsLeaf()) { tree.AddChilds(nid); - tree[nid].set_split(n.fidx, n.fvalue, n.dir == LeftDir); - tree.stat(nid).loss_chg = n.root_gain; - tree.stat(nid).base_weight = n.weight; - tree.stat(nid).sum_hess = n.sum_gradients.GetHess(); - tree[tree[nid].cleft()].set_leaf(0); - tree[tree[nid].cright()].set_leaf(0); + tree[nid].SetSplit(n.fidx, n.fvalue, n.dir == kLeftDir); + tree.Stat(nid).loss_chg = n.root_gain; + tree.Stat(nid).base_weight = n.weight; + tree.Stat(nid).sum_hess = n.sum_gradients.GetHess(); + tree[tree[nid].LeftChild()].SetLeaf(0); + tree[tree[nid].RightChild()].SetLeaf(0); nid++; } else if (n.IsLeaf()) { - tree[nid].set_leaf(n.weight * param.learning_rate); - tree.stat(nid).sum_hess = n.sum_gradients.GetHess(); + tree[nid].SetLeaf(n.weight * param.learning_rate); + tree.Stat(nid).sum_hess = n.sum_gradients.GetHess(); nid++; } } @@ -346,11 +344,11 @@ struct BernoulliRng { float p; uint32_t seed; - __host__ __device__ BernoulliRng(float p, size_t seed_) : p(p) { + XGBOOST_DEVICE BernoulliRng(float p, size_t seed_) : p(p) { seed = static_cast(seed_); } - __host__ __device__ bool operator()(const int i) const { + XGBOOST_DEVICE bool operator()(const int i) const { thrust::default_random_engine rng(seed); thrust::uniform_real_distribution dist; rng.discard(i); @@ -359,25 +357,25 @@ struct BernoulliRng { }; // Set gradient pair to 0 with p = 1 - subsample -inline void subsample_gpair(dh::dvec* p_gpair, float subsample, +inline void SubsampleGradientPair(dh::DVec* p_gpair, float subsample, int offset = 0) { if (subsample == 1.0) { return; } - dh::dvec& gpair = *p_gpair; + dh::DVec& gpair = *p_gpair; - auto d_gpair = gpair.data(); + auto d_gpair = gpair.Data(); BernoulliRng rng(subsample, common::GlobalRandom()()); - dh::launch_n(gpair.device_idx(), gpair.size(), [=] __device__(int i) { + dh::LaunchN(gpair.DeviceIdx(), gpair.Size(), [=] XGBOOST_DEVICE(int i) { if (!rng(i + offset)) { - d_gpair[i] = bst_gpair(); + d_gpair[i] = GradientPair(); } }); } -inline std::vector col_sample(std::vector features, float colsample) { +inline std::vector ColSample(std::vector features, float colsample) { CHECK_GT(features.size(), 0); int n = std::max(1, static_cast(colsample * features.size())); @@ -397,9 +395,9 @@ inline std::vector col_sample(std::vector features, float colsample) { */ class ColumnSampler { - std::vector feature_set_tree; - std::map> feature_set_level; - TrainParam param; + std::vector feature_set_tree_; + std::map> feature_set_level_; + TrainParam param_; public: /** @@ -413,10 +411,10 @@ class ColumnSampler { void Init(int64_t num_col, const TrainParam& param) { this->Reset(); - this->param = param; - feature_set_tree.resize(num_col); - std::iota(feature_set_tree.begin(), feature_set_tree.end(), 0); - feature_set_tree = col_sample(feature_set_tree, param.colsample_bytree); + this->param_ = param; + feature_set_tree_.resize(num_col); + std::iota(feature_set_tree_.begin(), feature_set_tree_.end(), 0); + feature_set_tree_ = ColSample(feature_set_tree_, param.colsample_bytree); } /** @@ -426,8 +424,8 @@ class ColumnSampler { */ void Reset() { - feature_set_tree.clear(); - feature_set_level.clear(); + feature_set_tree_.clear(); + feature_set_level_.clear(); } /** @@ -442,13 +440,13 @@ class ColumnSampler { */ bool ColumnUsed(int column, int depth) { - if (feature_set_level.count(depth) == 0) { - feature_set_level[depth] = - col_sample(feature_set_tree, param.colsample_bylevel); + if (feature_set_level_.count(depth) == 0) { + feature_set_level_[depth] = + ColSample(feature_set_tree_, param_.colsample_bylevel); } - return std::binary_search(feature_set_level[depth].begin(), - feature_set_level[depth].end(), column); + return std::binary_search(feature_set_level_[depth].begin(), + feature_set_level_[depth].end(), column); } }; diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu index c3ff507a7..93309f3db 100644 --- a/src/tree/updater_gpu_hist.cu +++ b/src/tree/updater_gpu_hist.cu @@ -23,22 +23,22 @@ namespace tree { DMLC_REGISTRY_FILE_TAG(updater_gpu_hist); -typedef bst_gpair_precise gpair_sum_t; +using GradientPairSumT = GradientPairPrecise; -template -__device__ gpair_sum_t ReduceFeature(const gpair_sum_t* begin, - const gpair_sum_t* end, - temp_storage_t* temp_storage) { - __shared__ cub::Uninitialized uninitialized_sum; - gpair_sum_t& shared_sum = uninitialized_sum.Alias(); +template +__device__ GradientPairSumT ReduceFeature(const GradientPairSumT* begin, + const GradientPairSumT* end, + TempStorageT* temp_storage) { + __shared__ cub::Uninitialized uninitialized_sum; + GradientPairSumT& shared_sum = uninitialized_sum.Alias(); - gpair_sum_t local_sum = gpair_sum_t(); + GradientPairSumT local_sum = GradientPairSumT(); for (auto itr = begin; itr < end; itr += BLOCK_THREADS) { bool thread_active = itr + threadIdx.x < end; // Scan histogram - gpair_sum_t bin = thread_active ? *(itr + threadIdx.x) : gpair_sum_t(); + GradientPairSumT bin = thread_active ? *(itr + threadIdx.x) : GradientPairSumT(); - local_sum += reduce_t(temp_storage->sum_reduce).Reduce(bin, cub::Sum()); + local_sum += ReduceT(temp_storage->sum_reduce).Reduce(bin, cub::Sum()); } if (threadIdx.x == 0) { @@ -49,41 +49,41 @@ __device__ gpair_sum_t ReduceFeature(const gpair_sum_t* begin, return shared_sum; } -template -__device__ void EvaluateFeature(int fidx, const gpair_sum_t* hist, +template +__device__ void EvaluateFeature(int fidx, const GradientPairSumT* hist, const int* feature_segments, float min_fvalue, const float* gidx_fvalue_map, DeviceSplitCandidate* best_split, const DeviceNodeStats& node, const GPUTrainingParam& param, - temp_storage_t* temp_storage, int constraint, + TempStorageT* temp_storage, int constraint, const ValueConstraint& value_constraint) { int gidx_begin = feature_segments[fidx]; int gidx_end = feature_segments[fidx + 1]; - gpair_sum_t feature_sum = ReduceFeature( + GradientPairSumT feature_sum = ReduceFeature( hist + gidx_begin, hist + gidx_end, temp_storage); - auto prefix_op = SumCallbackOp(); + auto prefix_op = SumCallbackOp(); for (int scan_begin = gidx_begin; scan_begin < gidx_end; scan_begin += BLOCK_THREADS) { bool thread_active = scan_begin + threadIdx.x < gidx_end; - gpair_sum_t bin = - thread_active ? hist[scan_begin + threadIdx.x] : gpair_sum_t(); + GradientPairSumT bin = + thread_active ? hist[scan_begin + threadIdx.x] : GradientPairSumT(); scan_t(temp_storage->scan).ExclusiveScan(bin, bin, cub::Sum(), prefix_op); // Calculate gain - gpair_sum_t parent_sum = gpair_sum_t(node.sum_gradients); + GradientPairSumT parent_sum = GradientPairSumT(node.sum_gradients); - gpair_sum_t missing = parent_sum - feature_sum; + GradientPairSumT missing = parent_sum - feature_sum; bool missing_left = true; const float null_gain = -FLT_MAX; float gain = null_gain; if (thread_active) { - gain = loss_chg_missing(bin, missing, parent_sum, node.root_gain, param, + gain = LossChangeMissing(bin, missing, parent_sum, node.root_gain, param, constraint, value_constraint, missing_left); } @@ -92,7 +92,7 @@ __device__ void EvaluateFeature(int fidx, const gpair_sum_t* hist, // Find thread with best gain cub::KeyValuePair tuple(threadIdx.x, gain); cub::KeyValuePair best = - max_reduce_t(temp_storage->max_reduce).Reduce(tuple, cub::ArgMax()); + max_ReduceT(temp_storage->max_reduce).Reduce(tuple, cub::ArgMax()); __shared__ cub::KeyValuePair block_max; if (threadIdx.x == 0) { @@ -107,11 +107,11 @@ __device__ void EvaluateFeature(int fidx, const gpair_sum_t* hist, float fvalue = gidx == gidx_begin ? min_fvalue : gidx_fvalue_map[gidx - 1]; - gpair_sum_t left = missing_left ? bin + missing : bin; - gpair_sum_t right = parent_sum - left; + GradientPairSumT left = missing_left ? bin + missing : bin; + GradientPairSumT right = parent_sum - left; - best_split->Update(gain, missing_left ? LeftDir : RightDir, fvalue, fidx, - left, right, param); + best_split->Update(gain, missing_left ? kLeftDir : kRightDir, fvalue, fidx, + GradientPair(left), GradientPair(right), param); } __syncthreads(); } @@ -119,17 +119,17 @@ __device__ void EvaluateFeature(int fidx, const gpair_sum_t* hist, template __global__ void evaluate_split_kernel( - const gpair_sum_t* d_hist, int nidx, uint64_t n_features, + const GradientPairSumT* d_hist, int nidx, uint64_t n_features, DeviceNodeStats nodes, const int* d_feature_segments, const float* d_fidx_min_map, const float* d_gidx_fvalue_map, GPUTrainingParam gpu_param, DeviceSplitCandidate* d_split, ValueConstraint value_constraint, int* d_monotonic_constraints) { typedef cub::KeyValuePair ArgMaxT; - typedef cub::BlockScan + typedef cub::BlockScan BlockScanT; typedef cub::BlockReduce MaxReduceT; - typedef cub::BlockReduce SumReduceT; + typedef cub::BlockReduce SumReduceT; union TempStorage { typename BlockScanT::TempStorage scan; @@ -163,8 +163,8 @@ __global__ void evaluate_split_kernel( } // Find a gidx value for a given feature otherwise return -1 if not found -template -__device__ int BinarySearchRow(bst_uint begin, bst_uint end, gidx_iter_t data, +template +__device__ int BinarySearchRow(bst_uint begin, bst_uint end, GidxIterT data, int fidx_begin, int fidx_end) { bst_uint previous_middle = UINT32_MAX; while (end != begin) { @@ -189,19 +189,19 @@ __device__ int BinarySearchRow(bst_uint begin, bst_uint end, gidx_iter_t data, } struct DeviceHistogram { - dh::bulk_allocator ba; - dh::dvec data; + dh::BulkAllocator ba; + dh::DVec data; int n_bins; void Init(int device_idx, int max_nodes, int n_bins, bool silent) { this->n_bins = n_bins; - ba.allocate(device_idx, silent, &data, size_t(max_nodes) * size_t(n_bins)); + ba.Allocate(device_idx, silent, &data, size_t(max_nodes) * size_t(n_bins)); } - void Reset() { data.fill(gpair_sum_t()); } - gpair_sum_t* GetHistPtr(int nidx) { return data.data() + nidx * n_bins; } + void Reset() { data.Fill(GradientPairSumT()); } + GradientPairSumT* GetHistPtr(int nidx) { return data.Data() + nidx * n_bins; } void PrintNidx(int nidx) const { - auto h_data = data.as_vector(); + auto h_data = data.AsVector(); std::cout << "nidx " << nidx << ":\n"; for (int i = n_bins * nidx; i < n_bins * (nidx + 1); i++) { std::cout << h_data[i] << " "; @@ -216,7 +216,7 @@ struct CalcWeightTrainParam { float reg_lambda; float max_delta_step; float learning_rate; - __host__ __device__ CalcWeightTrainParam(const TrainParam& p) + XGBOOST_DEVICE explicit CalcWeightTrainParam(const TrainParam& p) : min_child_weight(p.min_child_weight), reg_alpha(p.reg_alpha), reg_lambda(p.reg_lambda), @@ -240,19 +240,19 @@ struct DeviceShard { int device_idx; int normalised_device_idx; // Device index counting from param.gpu_id - dh::bulk_allocator ba; - dh::dvec gidx_buffer; - dh::dvec gpair; - dh::dvec2 ridx; // Row index relative to this shard - dh::dvec2 position; + dh::BulkAllocator ba; + dh::DVec gidx_buffer; + dh::DVec gpair; + dh::DVec2 ridx; // Row index relative to this shard + dh::DVec2 position; std::vector ridx_segments; - dh::dvec feature_segments; - dh::dvec gidx_fvalue_map; - dh::dvec min_fvalue; - dh::dvec monotone_constraints; - dh::dvec prediction_cache; - std::vector node_sum_gradients; - dh::dvec node_sum_gradients_d; + dh::DVec feature_segments; + dh::DVec gidx_fvalue_map; + dh::DVec min_fvalue; + dh::DVec monotone_constraints; + dh::DVec prediction_cache; + std::vector node_sum_gradients; + dh::DVec node_sum_gradients_d; common::CompressedIterator gidx; int row_stride; bst_uint row_begin_idx; // The row offset for this shard @@ -311,8 +311,8 @@ struct DeviceShard { << "Max leaves and max depth cannot both be unconstrained for " "gpu_hist."; int max_nodes = - param.max_leaves > 0 ? param.max_leaves * 2 : n_nodes(param.max_depth); - ba.allocate(device_idx, param.silent, &gidx_buffer, compressed_size_bytes, + param.max_leaves > 0 ? param.max_leaves * 2 : MaxNodesDepth(param.max_depth); + ba.Allocate(device_idx, param.silent, &gidx_buffer, compressed_size_bytes, &gpair, n_rows, &ridx, n_rows, &position, n_rows, &prediction_cache, n_rows, &node_sum_gradients_d, max_nodes, &feature_segments, gmat.cut->row_ptr.size(), &gidx_fvalue_map, @@ -328,11 +328,11 @@ struct DeviceShard { // Compress gidx common::CompressedBufferWriter cbw(num_symbols); - std::vector host_buffer(gidx_buffer.size()); + std::vector host_buffer(gidx_buffer.Size()); cbw.Write(host_buffer.data(), ellpack_matrix.begin(), ellpack_matrix.end()); gidx_buffer = host_buffer; gidx = - common::CompressedIterator(gidx_buffer.data(), num_symbols); + common::CompressedIterator(gidx_buffer.Data(), num_symbols); common::CompressedIterator ci_host(host_buffer.data(), num_symbols); @@ -369,19 +369,19 @@ struct DeviceShard { } // Reset values for each update iteration - void Reset(HostDeviceVector* dh_gpair, int device) { + void Reset(HostDeviceVector* dh_gpair, int device) { auto begin = dh_gpair->tbegin(device); dh::safe_cuda(cudaSetDevice(device_idx)); - position.current_dvec().fill(0); + position.CurrentDVec().Fill(0); std::fill(node_sum_gradients.begin(), node_sum_gradients.end(), - bst_gpair()); + GradientPair()); - thrust::sequence(ridx.current_dvec().tbegin(), ridx.current_dvec().tend()); + thrust::sequence(ridx.CurrentDVec().tbegin(), ridx.CurrentDVec().tend()); std::fill(ridx_segments.begin(), ridx_segments.end(), Segment(0, 0)); - ridx_segments.front() = Segment(0, ridx.size()); + ridx_segments.front() = Segment(0, ridx.Size()); this->gpair.copy(begin + row_begin_idx, begin + row_end_idx); - subsample_gpair(&gpair, param.subsample, row_begin_idx); + SubsampleGradientPair(&gpair, param.subsample, row_begin_idx); hist.Reset(); } @@ -389,13 +389,13 @@ struct DeviceShard { auto segment = ridx_segments[nidx]; auto d_node_hist = hist.GetHistPtr(nidx); auto d_gidx = gidx; - auto d_ridx = ridx.current(); - auto d_gpair = gpair.data(); + auto d_ridx = ridx.Current(); + auto d_gpair = gpair.Data(); auto row_stride = this->row_stride; auto null_gidx_value = this->null_gidx_value; auto n_elements = segment.Size() * row_stride; - dh::launch_n(device_idx, n_elements, [=] __device__(size_t idx) { + dh::LaunchN(device_idx, n_elements, [=] __device__(size_t idx) { int ridx = d_ridx[(idx / row_stride) + segment.begin]; int gidx = d_gidx[ridx * row_stride + idx % row_stride]; @@ -410,7 +410,7 @@ struct DeviceShard { auto d_node_hist_histogram = hist.GetHistPtr(nidx_histogram); auto d_node_hist_subtraction = hist.GetHistPtr(nidx_subtraction); - dh::launch_n(device_idx, hist.n_bins, [=] __device__(size_t idx) { + dh::LaunchN(device_idx, hist.n_bins, [=] __device__(size_t idx) { d_node_hist_subtraction[idx] = d_node_hist_parent[idx] - d_node_hist_histogram[idx]; }); @@ -432,11 +432,11 @@ struct DeviceShard { auto d_left_count = temp_memory.Pointer(); dh::safe_cuda(cudaMemset(d_left_count, 0, sizeof(int64_t))); auto segment = ridx_segments[nidx]; - auto d_ridx = ridx.current(); - auto d_position = position.current(); + auto d_ridx = ridx.Current(); + auto d_position = position.Current(); auto d_gidx = gidx; auto row_stride = this->row_stride; - dh::launch_n<1, 512>( + dh::LaunchN<1, 512>( device_idx, segment.Size(), [=] __device__(bst_uint idx) { idx += segment.begin; auto ridx = d_ridx[idx]; @@ -482,22 +482,22 @@ struct DeviceShard { size_t temp_storage_bytes = 0; cub::DeviceRadixSort::SortPairs( - nullptr, temp_storage_bytes, position.current() + segment.begin, - position.other() + segment.begin, ridx.current() + segment.begin, + nullptr, temp_storage_bytes, position.Current() + segment.begin, + position.other() + segment.begin, ridx.Current() + segment.begin, ridx.other() + segment.begin, segment.Size(), min_bits, max_bits); temp_memory.LazyAllocate(temp_storage_bytes); cub::DeviceRadixSort::SortPairs( temp_memory.d_temp_storage, temp_memory.temp_storage_bytes, - position.current() + segment.begin, position.other() + segment.begin, - ridx.current() + segment.begin, ridx.other() + segment.begin, + position.Current() + segment.begin, position.other() + segment.begin, + ridx.Current() + segment.begin, ridx.other() + segment.begin, segment.Size(), min_bits, max_bits); dh::safe_cuda(cudaMemcpy( - position.current() + segment.begin, position.other() + segment.begin, + position.Current() + segment.begin, position.other() + segment.begin, segment.Size() * sizeof(int), cudaMemcpyDeviceToDevice)); dh::safe_cuda(cudaMemcpy( - ridx.current() + segment.begin, ridx.other() + segment.begin, + ridx.Current() + segment.begin, ridx.other() + segment.begin, segment.Size() * sizeof(bst_uint), cudaMemcpyDeviceToDevice)); } @@ -505,8 +505,8 @@ struct DeviceShard { dh::safe_cuda(cudaSetDevice(device_idx)); if (!prediction_cache_initialised) { dh::safe_cuda(cudaMemcpy( - prediction_cache.data(), &out_preds_d[row_begin_idx], - prediction_cache.size() * sizeof(bst_float), cudaMemcpyDefault)); + prediction_cache.Data(), &out_preds_d[row_begin_idx], + prediction_cache.Size() * sizeof(bst_float), cudaMemcpyDefault)); } prediction_cache_initialised = true; @@ -514,13 +514,13 @@ struct DeviceShard { thrust::copy(node_sum_gradients.begin(), node_sum_gradients.end(), node_sum_gradients_d.tbegin()); - auto d_position = position.current(); - auto d_ridx = ridx.current(); - auto d_node_sum_gradients = node_sum_gradients_d.data(); - auto d_prediction_cache = prediction_cache.data(); + auto d_position = position.Current(); + auto d_ridx = ridx.Current(); + auto d_node_sum_gradients = node_sum_gradients_d.Data(); + auto d_prediction_cache = prediction_cache.Data(); - dh::launch_n( - device_idx, prediction_cache.size(), [=] __device__(int local_idx) { + dh::LaunchN( + device_idx, prediction_cache.Size(), [=] __device__(int local_idx) { int pos = d_position[local_idx]; bst_float weight = CalcWeight(param_d, d_node_sum_gradients[pos]); d_prediction_cache[d_ridx[local_idx]] += @@ -528,8 +528,8 @@ struct DeviceShard { }); dh::safe_cuda(cudaMemcpy( - &out_preds_d[row_begin_idx], prediction_cache.data(), - prediction_cache.size() * sizeof(bst_float), cudaMemcpyDefault)); + &out_preds_d[row_begin_idx], prediction_cache.Data(), + prediction_cache.Size() * sizeof(bst_float), cudaMemcpyDefault)); } }; @@ -537,33 +537,32 @@ class GPUHistMaker : public TreeUpdater { public: struct ExpandEntry; - GPUHistMaker() : initialised(false), p_last_fmat_(nullptr) {} - ~GPUHistMaker() {} + GPUHistMaker() : initialised_(false), p_last_fmat_(nullptr) {} void Init( const std::vector>& args) override { - param.InitAllowUnknown(args); - CHECK(param.n_gpus != 0) << "Must have at least one device"; - n_devices = param.n_gpus; + param_.InitAllowUnknown(args); + CHECK(param_.n_gpus != 0) << "Must have at least one device"; + n_devices_ = param_.n_gpus; - dh::check_compute_capability(); + dh::CheckComputeCapability(); - if (param.grow_policy == TrainParam::kLossGuide) { - qexpand_.reset(new ExpandQueue(loss_guide)); + if (param_.grow_policy == TrainParam::kLossGuide) { + qexpand_.reset(new ExpandQueue(LossGuide)); } else { - qexpand_.reset(new ExpandQueue(depth_wise)); + qexpand_.reset(new ExpandQueue(DepthWise)); } - monitor.Init("updater_gpu_hist", param.debug_verbose); + monitor_.Init("updater_gpu_hist", param_.debug_verbose); } - void Update(HostDeviceVector* gpair, DMatrix* dmat, + void Update(HostDeviceVector* gpair, DMatrix* dmat, const std::vector& trees) override { - monitor.Start("Update", dList); - GradStats::CheckInfo(dmat->info()); + monitor_.Start("Update", device_list_); + GradStats::CheckInfo(dmat->Info()); // rescale learning rate according to size of trees - float lr = param.learning_rate; - param.learning_rate = lr / trees.size(); - ValueConstraint::Init(¶m, dmat->info().num_col); + float lr = param_.learning_rate; + param_.learning_rate = lr / trees.size(); + ValueConstraint::Init(¶m_, dmat->Info().num_col_); // build tree try { for (size_t i = 0; i < trees.size(); ++i) { @@ -572,97 +571,97 @@ class GPUHistMaker : public TreeUpdater { } catch (const std::exception& e) { LOG(FATAL) << "GPU plugin exception: " << e.what() << std::endl; } - param.learning_rate = lr; - monitor.Stop("Update", dList); + param_.learning_rate = lr; + monitor_.Stop("Update", device_list_); } void InitDataOnce(DMatrix* dmat) { - info = &dmat->info(); - monitor.Start("Quantiles", dList); - hmat_.Init(dmat, param.max_bin); + info_ = &dmat->Info(); + monitor_.Start("Quantiles", device_list_); + hmat_.Init(dmat, param_.max_bin); gmat_.cut = &hmat_; gmat_.Init(dmat); - monitor.Stop("Quantiles", dList); - n_bins = hmat_.row_ptr.back(); + monitor_.Stop("Quantiles", device_list_); + n_bins_ = hmat_.row_ptr.back(); - int n_devices = dh::n_devices(param.n_gpus, info->num_row); + int n_devices = dh::NDevices(param_.n_gpus, info_->num_row_); bst_uint row_begin = 0; bst_uint shard_size = - std::ceil(static_cast(info->num_row) / n_devices); + std::ceil(static_cast(info_->num_row_) / n_devices); - dList.resize(n_devices); + device_list_.resize(n_devices); for (int d_idx = 0; d_idx < n_devices; ++d_idx) { - int device_idx = (param.gpu_id + d_idx) % dh::n_visible_devices(); - dList[d_idx] = device_idx; + int device_idx = (param_.gpu_id + d_idx) % dh::NVisibleDevices(); + device_list_[d_idx] = device_idx; } - reducer.Init(dList); + reducer_.Init(device_list_); // Partition input matrix into row segments std::vector row_segments; - shards.resize(n_devices); + shards_.resize(n_devices); row_segments.push_back(0); for (int d_idx = 0; d_idx < n_devices; ++d_idx) { bst_uint row_end = - std::min(static_cast(row_begin + shard_size), info->num_row); + std::min(static_cast(row_begin + shard_size), info_->num_row_); row_segments.push_back(row_end); row_begin = row_end; } // Create device shards - omp_set_num_threads(shards.size()); + omp_set_num_threads(shards_.size()); #pragma omp parallel { auto cpu_thread_id = omp_get_thread_num(); - shards[cpu_thread_id] = std::unique_ptr( - new DeviceShard(dList[cpu_thread_id], cpu_thread_id, gmat_, + shards_[cpu_thread_id] = std::unique_ptr( + new DeviceShard(device_list_[cpu_thread_id], cpu_thread_id, gmat_, row_segments[cpu_thread_id], - row_segments[cpu_thread_id + 1], n_bins, param)); + row_segments[cpu_thread_id + 1], n_bins_, param_)); } p_last_fmat_ = dmat; - initialised = true; + initialised_ = true; } - void InitData(HostDeviceVector* gpair, DMatrix* dmat, + void InitData(HostDeviceVector* gpair, DMatrix* dmat, const RegTree& tree) { - monitor.Start("InitDataOnce", dList); - if (!initialised) { + monitor_.Start("InitDataOnce", device_list_); + if (!initialised_) { this->InitDataOnce(dmat); } - monitor.Stop("InitDataOnce", dList); + monitor_.Stop("InitDataOnce", device_list_); - column_sampler.Init(info->num_col, param); + column_sampler_.Init(info_->num_col_, param_); // Copy gpair & reset memory - monitor.Start("InitDataReset", dList); - omp_set_num_threads(shards.size()); + monitor_.Start("InitDataReset", device_list_); + omp_set_num_threads(shards_.size()); // TODO(canonizer): make it parallel again once HostDeviceVector is // thread-safe - for (int shard = 0; shard < shards.size(); ++shard) - shards[shard]->Reset(gpair, param.gpu_id); - monitor.Stop("InitDataReset", dList); + for (int shard = 0; shard < shards_.size(); ++shard) + shards_[shard]->Reset(gpair, param_.gpu_id); + monitor_.Stop("InitDataReset", device_list_); } void AllReduceHist(int nidx) { - for (auto& shard : shards) { + for (auto& shard : shards_) { auto d_node_hist = shard->hist.GetHistPtr(nidx); - reducer.AllReduceSum( + reducer_.AllReduceSum( shard->normalised_device_idx, - reinterpret_cast(d_node_hist), - reinterpret_cast(d_node_hist), - n_bins * (sizeof(gpair_sum_t) / sizeof(gpair_sum_t::value_t))); + reinterpret_cast(d_node_hist), + reinterpret_cast(d_node_hist), + n_bins_ * (sizeof(GradientPairSumT) / sizeof(GradientPairSumT::ValueT))); } - reducer.Synchronize(); + reducer_.Synchronize(); } void BuildHistLeftRight(int nidx_parent, int nidx_left, int nidx_right) { size_t left_node_max_elements = 0; size_t right_node_max_elements = 0; - for (auto& shard : shards) { + for (auto& shard : shards_) { left_node_max_elements = (std::max)( left_node_max_elements, shard->ridx_segments[nidx_left].Size()); right_node_max_elements = (std::max)( @@ -677,13 +676,13 @@ class GPUHistMaker : public TreeUpdater { subtraction_trick_nidx = nidx_left; } - for (auto& shard : shards) { + for (auto& shard : shards_) { shard->BuildHist(build_hist_nidx); } this->AllReduceHist(build_hist_nidx); - for (auto& shard : shards) { + for (auto& shard : shards_) { shard->SubtractionTrick(nidx_parent, build_hist_nidx, subtraction_trick_nidx); } @@ -692,12 +691,12 @@ class GPUHistMaker : public TreeUpdater { // Returns best loss std::vector EvaluateSplits( const std::vector& nidx_set, RegTree* p_tree) { - auto columns = info->num_col; + auto columns = info_->num_col_; std::vector best_splits(nidx_set.size()); std::vector candidate_splits(nidx_set.size() * columns); // Use first device - auto& shard = shards.front(); + auto& shard = shards_.front(); dh::safe_cuda(cudaSetDevice(shard->device_idx)); shard->temp_memory.LazyAllocate(sizeof(DeviceSplitCandidate) * columns * nidx_set.size()); @@ -708,16 +707,16 @@ class GPUHistMaker : public TreeUpdater { // Use streams to process nodes concurrently for (auto i = 0; i < nidx_set.size(); i++) { auto nidx = nidx_set[i]; - DeviceNodeStats node(shard->node_sum_gradients[nidx], nidx, param); + DeviceNodeStats node(shard->node_sum_gradients[nidx], nidx, param_); const int BLOCK_THREADS = 256; evaluate_split_kernel <<>>( - shard->hist.GetHistPtr(nidx), nidx, info->num_col, node, - shard->feature_segments.data(), shard->min_fvalue.data(), - shard->gidx_fvalue_map.data(), GPUTrainingParam(param), + shard->hist.GetHistPtr(nidx), nidx, info_->num_col_, node, + shard->feature_segments.Data(), shard->min_fvalue.Data(), + shard->gidx_fvalue_map.Data(), GPUTrainingParam(param_), d_split + i * columns, node_value_constraints_[nidx], - shard->monotone_constraints.data()); + shard->monotone_constraints.Data()); } dh::safe_cuda( @@ -730,9 +729,9 @@ class GPUHistMaker : public TreeUpdater { DeviceSplitCandidate nidx_best; for (auto fidx = 0; fidx < columns; fidx++) { auto& candidate = candidate_splits[i * columns + fidx]; - if (column_sampler.ColumnUsed(candidate.findex, + if (column_sampler_.ColumnUsed(candidate.findex, p_tree->GetDepth(nidx))) { - nidx_best.Update(candidate_splits[i * columns + fidx], param); + nidx_best.Update(candidate_splits[i * columns + fidx], param_); } } best_splits[i] = nidx_best; @@ -743,34 +742,34 @@ class GPUHistMaker : public TreeUpdater { void InitRoot(RegTree* p_tree) { auto root_nidx = 0; // Sum gradients - std::vector tmp_sums(shards.size()); - omp_set_num_threads(shards.size()); + std::vector tmp_sums(shards_.size()); + omp_set_num_threads(shards_.size()); #pragma omp parallel { auto cpu_thread_id = omp_get_thread_num(); - auto& shard = shards[cpu_thread_id]; + auto& shard = shards_[cpu_thread_id]; dh::safe_cuda(cudaSetDevice(shard->device_idx)); - tmp_sums[cpu_thread_id] = dh::sumReduction( - shard->temp_memory, shard->gpair.data(), shard->gpair.size()); + tmp_sums[cpu_thread_id] = dh::SumReduction( + shard->temp_memory, shard->gpair.Data(), shard->gpair.Size()); } auto sum_gradient = - std::accumulate(tmp_sums.begin(), tmp_sums.end(), bst_gpair_precise()); + std::accumulate(tmp_sums.begin(), tmp_sums.end(), GradientPair()); // Generate root histogram - for (auto& shard : shards) { + for (auto& shard : shards_) { shard->BuildHist(root_nidx); } this->AllReduceHist(root_nidx); // Remember root stats - p_tree->stat(root_nidx).sum_hess = sum_gradient.GetHess(); - auto weight = CalcWeight(param, sum_gradient); - p_tree->stat(root_nidx).base_weight = weight; - (*p_tree)[root_nidx].set_leaf(param.learning_rate * weight); + p_tree->Stat(root_nidx).sum_hess = sum_gradient.GetHess(); + auto weight = CalcWeight(param_, sum_gradient); + p_tree->Stat(root_nidx).base_weight = weight; + (*p_tree)[root_nidx].SetLeaf(param_.learning_rate * weight); // Store sum gradients - for (auto& shard : shards) { + for (auto& shard : shards_) { shard->node_sum_gradients[root_nidx] = sum_gradient; } @@ -785,14 +784,14 @@ class GPUHistMaker : public TreeUpdater { void UpdatePosition(const ExpandEntry& candidate, RegTree* p_tree) { auto nidx = candidate.nid; - auto left_nidx = (*p_tree)[nidx].cleft(); - auto right_nidx = (*p_tree)[nidx].cright(); + auto left_nidx = (*p_tree)[nidx].LeftChild(); + auto right_nidx = (*p_tree)[nidx].RightChild(); // convert floating-point split_pt into corresponding bin_id // split_cond = -1 indicates that split_pt is less than all known cut points auto split_gidx = -1; auto fidx = candidate.split.findex; - auto default_dir_left = candidate.split.dir == LeftDir; + auto default_dir_left = candidate.split.dir == kLeftDir; auto fidx_begin = hmat_.row_ptr[fidx]; auto fidx_end = hmat_.row_ptr[fidx + 1]; for (auto i = fidx_begin; i < fidx_end; ++i) { @@ -801,13 +800,13 @@ class GPUHistMaker : public TreeUpdater { } } - auto is_dense = info->num_nonzero == info->num_row * info->num_col; + auto is_dense = info_->num_nonzero_ == info_->num_row_ * info_->num_col_; - omp_set_num_threads(shards.size()); + omp_set_num_threads(shards_.size()); #pragma omp parallel { auto cpu_thread_id = omp_get_thread_num(); - shards[cpu_thread_id]->UpdatePosition(nidx, left_nidx, right_nidx, fidx, + shards_[cpu_thread_id]->UpdatePosition(nidx, left_nidx, right_nidx, fidx, split_gidx, default_dir_left, is_dense, fidx_begin, fidx_end); } @@ -818,55 +817,55 @@ class GPUHistMaker : public TreeUpdater { RegTree& tree = *p_tree; tree.AddChilds(candidate.nid); auto& parent = tree[candidate.nid]; - parent.set_split(candidate.split.findex, candidate.split.fvalue, - candidate.split.dir == LeftDir); - tree.stat(candidate.nid).loss_chg = candidate.split.loss_chg; + parent.SetSplit(candidate.split.findex, candidate.split.fvalue, + candidate.split.dir == kLeftDir); + tree.Stat(candidate.nid).loss_chg = candidate.split.loss_chg; // Set up child constraints node_value_constraints_.resize(tree.GetNodes().size()); - GradStats left_stats(param); + GradStats left_stats(param_); left_stats.Add(candidate.split.left_sum); - GradStats right_stats(param); + GradStats right_stats(param_); right_stats.Add(candidate.split.right_sum); node_value_constraints_[candidate.nid].SetChild( - param, parent.split_index(), left_stats, right_stats, - &node_value_constraints_[parent.cleft()], - &node_value_constraints_[parent.cright()]); + param_, parent.SplitIndex(), left_stats, right_stats, + &node_value_constraints_[parent.LeftChild()], + &node_value_constraints_[parent.RightChild()]); // Configure left child auto left_weight = - node_value_constraints_[parent.cleft()].CalcWeight(param, left_stats); - tree[parent.cleft()].set_leaf(left_weight * param.learning_rate, 0); - tree.stat(parent.cleft()).base_weight = left_weight; - tree.stat(parent.cleft()).sum_hess = candidate.split.left_sum.GetHess(); + node_value_constraints_[parent.LeftChild()].CalcWeight(param_, left_stats); + tree[parent.LeftChild()].SetLeaf(left_weight * param_.learning_rate, 0); + tree.Stat(parent.LeftChild()).base_weight = left_weight; + tree.Stat(parent.LeftChild()).sum_hess = candidate.split.left_sum.GetHess(); // Configure right child auto right_weight = - node_value_constraints_[parent.cright()].CalcWeight(param, right_stats); - tree[parent.cright()].set_leaf(right_weight * param.learning_rate, 0); - tree.stat(parent.cright()).base_weight = right_weight; - tree.stat(parent.cright()).sum_hess = candidate.split.right_sum.GetHess(); + node_value_constraints_[parent.RightChild()].CalcWeight(param_, right_stats); + tree[parent.RightChild()].SetLeaf(right_weight * param_.learning_rate, 0); + tree.Stat(parent.RightChild()).base_weight = right_weight; + tree.Stat(parent.RightChild()).sum_hess = candidate.split.right_sum.GetHess(); // Store sum gradients - for (auto& shard : shards) { - shard->node_sum_gradients[parent.cleft()] = candidate.split.left_sum; - shard->node_sum_gradients[parent.cright()] = candidate.split.right_sum; + for (auto& shard : shards_) { + shard->node_sum_gradients[parent.LeftChild()] = candidate.split.left_sum; + shard->node_sum_gradients[parent.RightChild()] = candidate.split.right_sum; } this->UpdatePosition(candidate, p_tree); } - void UpdateTree(HostDeviceVector* gpair, DMatrix* p_fmat, + void UpdateTree(HostDeviceVector* gpair, DMatrix* p_fmat, RegTree* p_tree) { // Temporarily store number of threads so we can change it back later int nthread = omp_get_max_threads(); auto& tree = *p_tree; - monitor.Start("InitData", dList); + monitor_.Start("InitData", device_list_); this->InitData(gpair, p_fmat, *p_tree); - monitor.Stop("InitData", dList); - monitor.Start("InitRoot", dList); + monitor_.Stop("InitData", device_list_); + monitor_.Start("InitRoot", device_list_); this->InitRoot(p_tree); - monitor.Stop("InitRoot", dList); + monitor_.Stop("InitRoot", device_list_); auto timestamp = qexpand_->size(); auto num_leaves = 1; @@ -874,25 +873,25 @@ class GPUHistMaker : public TreeUpdater { while (!qexpand_->empty()) { auto candidate = qexpand_->top(); qexpand_->pop(); - if (!candidate.IsValid(param, num_leaves)) continue; + if (!candidate.IsValid(param_, num_leaves)) continue; // std::cout << candidate; - monitor.Start("ApplySplit", dList); + monitor_.Start("ApplySplit", device_list_); this->ApplySplit(candidate, p_tree); - monitor.Stop("ApplySplit", dList); + monitor_.Stop("ApplySplit", device_list_); num_leaves++; - auto left_child_nidx = tree[candidate.nid].cleft(); - auto right_child_nidx = tree[candidate.nid].cright(); + auto left_child_nidx = tree[candidate.nid].LeftChild(); + auto right_child_nidx = tree[candidate.nid].RightChild(); // Only create child entries if needed - if (ExpandEntry::ChildIsValid(param, tree.GetDepth(left_child_nidx), + if (ExpandEntry::ChildIsValid(param_, tree.GetDepth(left_child_nidx), num_leaves)) { - monitor.Start("BuildHist", dList); + monitor_.Start("BuildHist", device_list_); this->BuildHistLeftRight(candidate.nid, left_child_nidx, right_child_nidx); - monitor.Stop("BuildHist", dList); + monitor_.Stop("BuildHist", device_list_); - monitor.Start("EvaluateSplits", dList); + monitor_.Start("EvaluateSplits", device_list_); auto splits = this->EvaluateSplits({left_child_nidx, right_child_nidx}, p_tree); qexpand_->push(ExpandEntry(left_child_nidx, @@ -901,7 +900,7 @@ class GPUHistMaker : public TreeUpdater { qexpand_->push(ExpandEntry(right_child_nidx, tree.GetDepth(right_child_nidx), splits[1], timestamp++)); - monitor.Stop("EvaluateSplits", dList); + monitor_.Stop("EvaluateSplits", device_list_); } } // Reset omp num threads @@ -910,17 +909,17 @@ class GPUHistMaker : public TreeUpdater { bool UpdatePredictionCache( const DMatrix* data, HostDeviceVector* p_out_preds) override { - monitor.Start("UpdatePredictionCache", dList); - if (shards.empty() || p_last_fmat_ == nullptr || p_last_fmat_ != data) + monitor_.Start("UpdatePredictionCache", device_list_); + if (shards_.empty() || p_last_fmat_ == nullptr || p_last_fmat_ != data) return false; - bst_float* out_preds_d = p_out_preds->ptr_d(param.gpu_id); + bst_float* out_preds_d = p_out_preds->DevicePointer(param_.gpu_id); #pragma omp parallel for schedule(static, 1) - for (int shard = 0; shard < shards.size(); ++shard) { - shards[shard]->UpdatePredictionCache(out_preds_d); + for (int shard = 0; shard < shards_.size(); ++shard) { + shards_[shard]->UpdatePredictionCache(out_preds_d); } - monitor.Stop("UpdatePredictionCache", dList); + monitor_.Stop("UpdatePredictionCache", device_list_); return true; } @@ -933,7 +932,7 @@ class GPUHistMaker : public TreeUpdater { uint64_t timestamp) : nid(nid), depth(depth), split(split), timestamp(timestamp) {} bool IsValid(const TrainParam& param, int num_leaves) const { - if (split.loss_chg <= rt_eps) return false; + if (split.loss_chg <= kRtEps) return false; if (split.left_sum.GetHess() == 0 || split.right_sum.GetHess() == 0) return false; if (param.max_depth > 0 && depth == param.max_depth) return false; @@ -959,38 +958,38 @@ class GPUHistMaker : public TreeUpdater { } }; - inline static bool depth_wise(ExpandEntry lhs, ExpandEntry rhs) { + inline static bool DepthWise(ExpandEntry lhs, ExpandEntry rhs) { if (lhs.depth == rhs.depth) { return lhs.timestamp > rhs.timestamp; // favor small timestamp } else { return lhs.depth > rhs.depth; // favor small depth } } - inline static bool loss_guide(ExpandEntry lhs, ExpandEntry rhs) { + inline static bool LossGuide(ExpandEntry lhs, ExpandEntry rhs) { if (lhs.split.loss_chg == rhs.split.loss_chg) { return lhs.timestamp > rhs.timestamp; // favor small timestamp } else { return lhs.split.loss_chg < rhs.split.loss_chg; // favor large loss_chg } } - TrainParam param; + TrainParam param_; common::HistCutMatrix hmat_; common::GHistIndexMatrix gmat_; - MetaInfo* info; - bool initialised; - int n_devices; - int n_bins; + MetaInfo* info_; + bool initialised_; + int n_devices_; + int n_bins_; - std::vector> shards; - ColumnSampler column_sampler; + std::vector> shards_; + ColumnSampler column_sampler_; typedef std::priority_queue, std::function> ExpandQueue; std::unique_ptr qexpand_; - common::Monitor monitor; - dh::AllReducer reducer; + common::Monitor monitor_; + dh::AllReducer reducer_; std::vector node_value_constraints_; - std::vector dList; + std::vector device_list_; DMatrix* p_last_fmat_; }; diff --git a/src/tree/updater_histmaker.cc b/src/tree/updater_histmaker.cc index 04012f4b8..cc01d9976 100644 --- a/src/tree/updater_histmaker.cc +++ b/src/tree/updater_histmaker.cc @@ -21,18 +21,18 @@ DMLC_REGISTRY_FILE_TAG(updater_histmaker); template class HistMaker: public BaseMaker { public: - void Update(HostDeviceVector *gpair, + void Update(HostDeviceVector *gpair, DMatrix *p_fmat, const std::vector &trees) override { - TStats::CheckInfo(p_fmat->info()); + TStats::CheckInfo(p_fmat->Info()); // rescale learning rate according to size of trees - float lr = param.learning_rate; - param.learning_rate = lr / trees.size(); + float lr = param_.learning_rate; + param_.learning_rate = lr / trees.size(); // build tree - for (size_t i = 0; i < trees.size(); ++i) { - this->Update(gpair->data_h(), p_fmat, trees[i]); + for (auto tree : trees) { + this->Update(gpair->HostVector(), p_fmat, tree); } - param.learning_rate = lr; + param_.learning_rate = lr; } protected: @@ -45,13 +45,13 @@ class HistMaker: public BaseMaker { /*! \brief size of histogram */ unsigned size; // default constructor - HistUnit() {} + HistUnit() = default; // constructor HistUnit(const bst_float *cut, TStats *data, unsigned size) : cut(cut), data(data), size(size) {} /*! \brief add a histogram to data */ inline void Add(bst_float fv, - const std::vector &gpair, + const std::vector &gpair, const MetaInfo &info, const bst_uint ridx) { unsigned i = std::upper_bound(cut, cut + size, fv) - cut; @@ -116,44 +116,44 @@ class HistMaker: public BaseMaker { } }; // workspace of thread - ThreadWSpace wspace; + ThreadWSpace wspace_; // reducer for histogram - rabit::Reducer histred; + rabit::Reducer histred_; // set of working features - std::vector fwork_set; + std::vector fwork_set_; // update function implementation - virtual void Update(const std::vector &gpair, + virtual void Update(const std::vector &gpair, DMatrix *p_fmat, RegTree *p_tree) { this->InitData(gpair, *p_fmat, *p_tree); - this->InitWorkSet(p_fmat, *p_tree, &fwork_set); + this->InitWorkSet(p_fmat, *p_tree, &fwork_set_); // mark root node as fresh. for (int i = 0; i < p_tree->param.num_roots; ++i) { - (*p_tree)[i].set_leaf(0.0f, 0); + (*p_tree)[i].SetLeaf(0.0f, 0); } - for (int depth = 0; depth < param.max_depth; ++depth) { + for (int depth = 0; depth < param_.max_depth; ++depth) { // reset and propose candidate split - this->ResetPosAndPropose(gpair, p_fmat, fwork_set, *p_tree); + this->ResetPosAndPropose(gpair, p_fmat, fwork_set_, *p_tree); // create histogram - this->CreateHist(gpair, p_fmat, fwork_set, *p_tree); + this->CreateHist(gpair, p_fmat, fwork_set_, *p_tree); // find split based on histogram statistics - this->FindSplit(depth, gpair, p_fmat, fwork_set, p_tree); + this->FindSplit(depth, gpair, p_fmat, fwork_set_, p_tree); // reset position after split this->ResetPositionAfterSplit(p_fmat, *p_tree); this->UpdateQueueExpand(*p_tree); // if nothing left to be expand, break - if (qexpand.size() == 0) break; + if (qexpand_.size() == 0) break; } - for (size_t i = 0; i < qexpand.size(); ++i) { - const int nid = qexpand[i]; - (*p_tree)[nid].set_leaf(p_tree->stat(nid).base_weight * param.learning_rate); + for (size_t i = 0; i < qexpand_.size(); ++i) { + const int nid = qexpand_[i]; + (*p_tree)[nid].SetLeaf(p_tree->Stat(nid).base_weight * param_.learning_rate); } } // this function does two jobs // (1) reset the position in array position, to be the latest leaf id // (2) propose a set of candidate cuts and set wspace.rptr wspace.cut correctly - virtual void ResetPosAndPropose(const std::vector &gpair, + virtual void ResetPosAndPropose(const std::vector &gpair, DMatrix *p_fmat, const std::vector &fset, const RegTree &tree) = 0; @@ -170,7 +170,7 @@ class HistMaker: public BaseMaker { virtual void ResetPositionAfterSplit(DMatrix *p_fmat, const RegTree &tree) { } - virtual void CreateHist(const std::vector &gpair, + virtual void CreateHist(const std::vector &gpair, DMatrix *p_fmat, const std::vector &fset, const RegTree &tree) = 0; @@ -183,14 +183,14 @@ class HistMaker: public BaseMaker { TStats *left_sum) { if (hist.size == 0) return; - double root_gain = node_sum.CalcGain(param); - TStats s(param), c(param); + double root_gain = node_sum.CalcGain(param_); + TStats s(param_), c(param_); for (bst_uint i = 0; i < hist.size; ++i) { s.Add(hist.data[i]); - if (s.sum_hess >= param.min_child_weight) { + if (s.sum_hess >= param_.min_child_weight) { c.SetSubstract(node_sum, s); - if (c.sum_hess >= param.min_child_weight) { - double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; + if (c.sum_hess >= param_.min_child_weight) { + double loss_chg = s.CalcGain(param_) + c.CalcGain(param_) - root_gain; if (best->Update(static_cast(loss_chg), fid, hist.cut[i], false)) { *left_sum = s; } @@ -200,10 +200,10 @@ class HistMaker: public BaseMaker { s.Clear(); for (bst_uint i = hist.size - 1; i != 0; --i) { s.Add(hist.data[i]); - if (s.sum_hess >= param.min_child_weight) { + if (s.sum_hess >= param_.min_child_weight) { c.SetSubstract(node_sum, s); - if (c.sum_hess >= param.min_child_weight) { - double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; + if (c.sum_hess >= param_.min_child_weight) { + double loss_chg = s.CalcGain(param_) + c.CalcGain(param_) - root_gain; if (best->Update(static_cast(loss_chg), fid, hist.cut[i-1], true)) { *left_sum = c; } @@ -212,65 +212,64 @@ class HistMaker: public BaseMaker { } } inline void FindSplit(int depth, - const std::vector &gpair, + const std::vector &gpair, DMatrix *p_fmat, const std::vector &fset, RegTree *p_tree) { const size_t num_feature = fset.size(); // get the best split condition for each node - std::vector sol(qexpand.size()); - std::vector left_sum(qexpand.size()); - bst_omp_uint nexpand = static_cast(qexpand.size()); + std::vector sol(qexpand_.size()); + std::vector left_sum(qexpand_.size()); + auto nexpand = static_cast(qexpand_.size()); #pragma omp parallel for schedule(dynamic, 1) for (bst_omp_uint wid = 0; wid < nexpand; ++wid) { - const int nid = qexpand[wid]; - CHECK_EQ(node2workindex[nid], static_cast(wid)); + const int nid = qexpand_[wid]; + CHECK_EQ(node2workindex_[nid], static_cast(wid)); SplitEntry &best = sol[wid]; - TStats &node_sum = wspace.hset[0][num_feature + wid * (num_feature + 1)].data[0]; + TStats &node_sum = wspace_.hset[0][num_feature + wid * (num_feature + 1)].data[0]; for (size_t i = 0; i < fset.size(); ++i) { - EnumerateSplit(this->wspace.hset[0][i + wid * (num_feature+1)], + EnumerateSplit(this->wspace_.hset[0][i + wid * (num_feature+1)], node_sum, fset[i], &best, &left_sum[wid]); } } // get the best result, we can synchronize the solution for (bst_omp_uint wid = 0; wid < nexpand; ++wid) { - const int nid = qexpand[wid]; + const int nid = qexpand_[wid]; const SplitEntry &best = sol[wid]; - const TStats &node_sum = wspace.hset[0][num_feature + wid * (num_feature + 1)].data[0]; + const TStats &node_sum = wspace_.hset[0][num_feature + wid * (num_feature + 1)].data[0]; this->SetStats(p_tree, nid, node_sum); // set up the values - p_tree->stat(nid).loss_chg = best.loss_chg; + p_tree->Stat(nid).loss_chg = best.loss_chg; // now we know the solution in snode[nid], set split - if (best.loss_chg > rt_eps) { + if (best.loss_chg > kRtEps) { p_tree->AddChilds(nid); - (*p_tree)[nid].set_split(best.split_index(), - best.split_value, best.default_left()); + (*p_tree)[nid].SetSplit(best.SplitIndex(), + best.split_value, best.DefaultLeft()); // mark right child as 0, to indicate fresh leaf - (*p_tree)[(*p_tree)[nid].cleft()].set_leaf(0.0f, 0); - (*p_tree)[(*p_tree)[nid].cright()].set_leaf(0.0f, 0); + (*p_tree)[(*p_tree)[nid].LeftChild()].SetLeaf(0.0f, 0); + (*p_tree)[(*p_tree)[nid].RightChild()].SetLeaf(0.0f, 0); // right side sum TStats right_sum; right_sum.SetSubstract(node_sum, left_sum[wid]); - this->SetStats(p_tree, (*p_tree)[nid].cleft(), left_sum[wid]); - this->SetStats(p_tree, (*p_tree)[nid].cright(), right_sum); + this->SetStats(p_tree, (*p_tree)[nid].LeftChild(), left_sum[wid]); + this->SetStats(p_tree, (*p_tree)[nid].RightChild(), right_sum); } else { - (*p_tree)[nid].set_leaf(p_tree->stat(nid).base_weight * param.learning_rate); + (*p_tree)[nid].SetLeaf(p_tree->Stat(nid).base_weight * param_.learning_rate); } } } inline void SetStats(RegTree *p_tree, int nid, const TStats &node_sum) { - p_tree->stat(nid).base_weight = static_cast(node_sum.CalcWeight(param)); - p_tree->stat(nid).sum_hess = static_cast(node_sum.sum_hess); - node_sum.SetLeafVec(param, p_tree->leafvec(nid)); + p_tree->Stat(nid).base_weight = static_cast(node_sum.CalcWeight(param_)); + p_tree->Stat(nid).sum_hess = static_cast(node_sum.sum_hess); + node_sum.SetLeafVec(param_, p_tree->Leafvec(nid)); } }; template class CQHistMaker: public HistMaker { public: - CQHistMaker() : cache_dmatrix_(nullptr) { - } + CQHistMaker() = default; protected: struct HistEntry { @@ -281,7 +280,7 @@ class CQHistMaker: public HistMaker { * do linear scan, start from istart */ inline void Add(bst_float fv, - const std::vector &gpair, + const std::vector &gpair, const MetaInfo &info, const bst_uint ridx) { while (istart < hist.size && !(fv < hist.cut[istart])) ++istart; @@ -293,7 +292,7 @@ class CQHistMaker: public HistMaker { * do linear scan, start from istart */ inline void Add(bst_float fv, - bst_gpair gstats) { + GradientPair gstats) { if (fv < hist.cut[istart]) { hist.data[istart].Add(gstats); } else { @@ -311,190 +310,190 @@ class CQHistMaker: public HistMaker { } }; // sketch type used for this - typedef common::WXQuantileSketch WXQSketch; + using WXQSketch = common::WXQuantileSketch; // initialize the work set of tree void InitWorkSet(DMatrix *p_fmat, const RegTree &tree, std::vector *p_fset) override { if (p_fmat != cache_dmatrix_) { - feat_helper.InitByCol(p_fmat, tree); + feat_helper_.InitByCol(p_fmat, tree); cache_dmatrix_ = p_fmat; } - feat_helper.SyncInfo(); - feat_helper.SampleCol(this->param.colsample_bytree, p_fset); + feat_helper_.SyncInfo(); + feat_helper_.SampleCol(this->param_.colsample_bytree, p_fset); } // code to create histogram - void CreateHist(const std::vector &gpair, + void CreateHist(const std::vector &gpair, DMatrix *p_fmat, const std::vector &fset, const RegTree &tree) override { - const MetaInfo &info = p_fmat->info(); + const MetaInfo &info = p_fmat->Info(); // fill in reverse map - feat2workindex.resize(tree.param.num_feature); - std::fill(feat2workindex.begin(), feat2workindex.end(), -1); + feat2workindex_.resize(tree.param.num_feature); + std::fill(feat2workindex_.begin(), feat2workindex_.end(), -1); for (size_t i = 0; i < fset.size(); ++i) { - feat2workindex[fset[i]] = static_cast(i); + feat2workindex_[fset[i]] = static_cast(i); } // start to work - this->wspace.Init(this->param, 1); + this->wspace_.Init(this->param_, 1); // if it is C++11, use lazy evaluation for Allreduce, // to gain speedup in recovery #if __cplusplus >= 201103L auto lazy_get_hist = [&]() #endif { - thread_hist.resize(omp_get_max_threads()); + thread_hist_.resize(omp_get_max_threads()); // start accumulating statistics dmlc::DataIter *iter = p_fmat->ColIterator(fset); iter->BeforeFirst(); while (iter->Next()) { const ColBatch &batch = iter->Value(); // start enumeration - const bst_omp_uint nsize = static_cast(batch.size); + const auto nsize = static_cast(batch.size); #pragma omp parallel for schedule(dynamic, 1) for (bst_omp_uint i = 0; i < nsize; ++i) { - int offset = feat2workindex[batch.col_index[i]]; + int offset = feat2workindex_[batch.col_index[i]]; if (offset >= 0) { this->UpdateHistCol(gpair, batch[i], info, tree, fset, offset, - &thread_hist[omp_get_thread_num()]); + &thread_hist_[omp_get_thread_num()]); } } } // update node statistics. this->GetNodeStats(gpair, *p_fmat, tree, - &thread_stats, &node_stats); - for (size_t i = 0; i < this->qexpand.size(); ++i) { - const int nid = this->qexpand[i]; - const int wid = this->node2workindex[nid]; - this->wspace.hset[0][fset.size() + wid * (fset.size()+1)] - .data[0] = node_stats[nid]; + &thread_stats_, &node_stats_); + for (size_t i = 0; i < this->qexpand_.size(); ++i) { + const int nid = this->qexpand_[i]; + const int wid = this->node2workindex_[nid]; + this->wspace_.hset[0][fset.size() + wid * (fset.size()+1)] + .data[0] = node_stats_[nid]; } }; // sync the histogram // if it is C++11, use lazy evaluation for Allreduce #if __cplusplus >= 201103L - this->histred.Allreduce(dmlc::BeginPtr(this->wspace.hset[0].data), - this->wspace.hset[0].data.size(), lazy_get_hist); + this->histred_.Allreduce(dmlc::BeginPtr(this->wspace_.hset[0].data), + this->wspace_.hset[0].data.size(), lazy_get_hist); #else - this->histred.Allreduce(dmlc::BeginPtr(this->wspace.hset[0].data), - this->wspace.hset[0].data.size()); + this->histred_.Allreduce(dmlc::BeginPtr(this->wspace_.hset[0].data), + this->wspace_.hset[0].data.size()); #endif } void ResetPositionAfterSplit(DMatrix *p_fmat, const RegTree &tree) override { - this->GetSplitSet(this->qexpand, tree, &fsplit_set); + this->GetSplitSet(this->qexpand_, tree, &fsplit_set_); } - void ResetPosAndPropose(const std::vector &gpair, + void ResetPosAndPropose(const std::vector &gpair, DMatrix *p_fmat, const std::vector &fset, const RegTree &tree) override { - const MetaInfo &info = p_fmat->info(); + const MetaInfo &info = p_fmat->Info(); // fill in reverse map - feat2workindex.resize(tree.param.num_feature); - std::fill(feat2workindex.begin(), feat2workindex.end(), -1); - work_set.clear(); - for (size_t i = 0; i < fset.size(); ++i) { - if (feat_helper.Type(fset[i]) == 2) { - feat2workindex[fset[i]] = static_cast(work_set.size()); - work_set.push_back(fset[i]); + feat2workindex_.resize(tree.param.num_feature); + std::fill(feat2workindex_.begin(), feat2workindex_.end(), -1); + work_set_.clear(); + for (auto fidx : fset) { + if (feat_helper_.Type(fidx) == 2) { + feat2workindex_[fidx] = static_cast(work_set_.size()); + work_set_.push_back(fidx); } else { - feat2workindex[fset[i]] = -2; + feat2workindex_[fidx] = -2; } } - const size_t work_set_size = work_set.size(); + const size_t work_set_size = work_set_.size(); - sketchs.resize(this->qexpand.size() * work_set_size); - for (size_t i = 0; i < sketchs.size(); ++i) { - sketchs[i].Init(info.num_row, this->param.sketch_eps); + sketchs_.resize(this->qexpand_.size() * work_set_size); + for (size_t i = 0; i < sketchs_.size(); ++i) { + sketchs_[i].Init(info.num_row_, this->param_.sketch_eps); } // intitialize the summary array - summary_array.resize(sketchs.size()); + summary_array_.resize(sketchs_.size()); // setup maximum size - unsigned max_size = this->param.max_sketch_size(); - for (size_t i = 0; i < sketchs.size(); ++i) { - summary_array[i].Reserve(max_size); + unsigned max_size = this->param_.MaxSketchSize(); + for (size_t i = 0; i < sketchs_.size(); ++i) { + summary_array_[i].Reserve(max_size); } { // get smmary - thread_sketch.resize(omp_get_max_threads()); + thread_sketch_.resize(omp_get_max_threads()); // TWOPASS: use the real set + split set in the column iteration. this->SetDefaultPostion(p_fmat, tree); - work_set.insert(work_set.end(), fsplit_set.begin(), fsplit_set.end()); - std::sort(work_set.begin(), work_set.end()); - work_set.resize(std::unique(work_set.begin(), work_set.end()) - work_set.begin()); + work_set_.insert(work_set_.end(), fsplit_set_.begin(), fsplit_set_.end()); + std::sort(work_set_.begin(), work_set_.end()); + work_set_.resize(std::unique(work_set_.begin(), work_set_.end()) - work_set_.begin()); // start accumulating statistics - dmlc::DataIter *iter = p_fmat->ColIterator(work_set); + dmlc::DataIter *iter = p_fmat->ColIterator(work_set_); iter->BeforeFirst(); while (iter->Next()) { const ColBatch &batch = iter->Value(); // TWOPASS: use the real set + split set in the column iteration. - this->CorrectNonDefaultPositionByBatch(batch, fsplit_set, tree); + this->CorrectNonDefaultPositionByBatch(batch, fsplit_set_, tree); // start enumeration - const bst_omp_uint nsize = static_cast(batch.size); + const auto nsize = static_cast(batch.size); #pragma omp parallel for schedule(dynamic, 1) for (bst_omp_uint i = 0; i < nsize; ++i) { - int offset = feat2workindex[batch.col_index[i]]; + int offset = feat2workindex_[batch.col_index[i]]; if (offset >= 0) { this->UpdateSketchCol(gpair, batch[i], tree, work_set_size, offset, - &thread_sketch[omp_get_thread_num()]); + &thread_sketch_[omp_get_thread_num()]); } } } - for (size_t i = 0; i < sketchs.size(); ++i) { + for (size_t i = 0; i < sketchs_.size(); ++i) { common::WXQuantileSketch::SummaryContainer out; - sketchs[i].GetSummary(&out); - summary_array[i].SetPrune(out, max_size); + sketchs_[i].GetSummary(&out); + summary_array_[i].SetPrune(out, max_size); } - CHECK_EQ(summary_array.size(), sketchs.size()); + CHECK_EQ(summary_array_.size(), sketchs_.size()); } - if (summary_array.size() != 0) { + if (summary_array_.size() != 0) { size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size); - sreducer.Allreduce(dmlc::BeginPtr(summary_array), nbytes, summary_array.size()); + sreducer_.Allreduce(dmlc::BeginPtr(summary_array_), nbytes, summary_array_.size()); } // now we get the final result of sketch, setup the cut - this->wspace.cut.clear(); - this->wspace.rptr.clear(); - this->wspace.rptr.push_back(0); - for (size_t wid = 0; wid < this->qexpand.size(); ++wid) { - for (size_t i = 0; i < fset.size(); ++i) { - int offset = feat2workindex[fset[i]]; + this->wspace_.cut.clear(); + this->wspace_.rptr.clear(); + this->wspace_.rptr.push_back(0); + for (size_t wid = 0; wid < this->qexpand_.size(); ++wid) { + for (unsigned int i : fset) { + int offset = feat2workindex_[i]; if (offset >= 0) { - const WXQSketch::Summary &a = summary_array[wid * work_set_size + offset]; + const WXQSketch::Summary &a = summary_array_[wid * work_set_size + offset]; for (size_t i = 1; i < a.size; ++i) { - bst_float cpt = a.data[i].value - rt_eps; - if (i == 1 || cpt > this->wspace.cut.back()) { - this->wspace.cut.push_back(cpt); + bst_float cpt = a.data[i].value - kRtEps; + if (i == 1 || cpt > this->wspace_.cut.back()) { + this->wspace_.cut.push_back(cpt); } } // push a value that is greater than anything if (a.size != 0) { bst_float cpt = a.data[a.size - 1].value; // this must be bigger than last value in a scale - bst_float last = cpt + fabs(cpt) + rt_eps; - this->wspace.cut.push_back(last); + bst_float last = cpt + fabs(cpt) + kRtEps; + this->wspace_.cut.push_back(last); } - this->wspace.rptr.push_back(static_cast(this->wspace.cut.size())); + this->wspace_.rptr.push_back(static_cast(this->wspace_.cut.size())); } else { CHECK_EQ(offset, -2); - bst_float cpt = feat_helper.MaxValue(fset[i]); - this->wspace.cut.push_back(cpt + fabs(cpt) + rt_eps); - this->wspace.rptr.push_back(static_cast(this->wspace.cut.size())); + bst_float cpt = feat_helper_.MaxValue(i); + this->wspace_.cut.push_back(cpt + fabs(cpt) + kRtEps); + this->wspace_.rptr.push_back(static_cast(this->wspace_.cut.size())); } } // reserve last value for global statistics - this->wspace.cut.push_back(0.0f); - this->wspace.rptr.push_back(static_cast(this->wspace.cut.size())); + this->wspace_.cut.push_back(0.0f); + this->wspace_.rptr.push_back(static_cast(this->wspace_.cut.size())); } - CHECK_EQ(this->wspace.rptr.size(), - (fset.size() + 1) * this->qexpand.size() + 1); + CHECK_EQ(this->wspace_.rptr.size(), + (fset.size() + 1) * this->qexpand_.size() + 1); } - inline void UpdateHistCol(const std::vector &gpair, + inline void UpdateHistCol(const std::vector &gpair, const ColBatch::Inst &c, const MetaInfo &info, const RegTree &tree, @@ -505,21 +504,21 @@ class CQHistMaker: public HistMaker { // initialize sbuilder for use std::vector &hbuilder = *p_temp; hbuilder.resize(tree.param.num_nodes); - for (size_t i = 0; i < this->qexpand.size(); ++i) { - const unsigned nid = this->qexpand[i]; - const unsigned wid = this->node2workindex[nid]; + for (size_t i = 0; i < this->qexpand_.size(); ++i) { + const unsigned nid = this->qexpand_[i]; + const unsigned wid = this->node2workindex_[nid]; hbuilder[nid].istart = 0; - hbuilder[nid].hist = this->wspace.hset[0][fid_offset + wid * (fset.size()+1)]; + hbuilder[nid].hist = this->wspace_.hset[0][fid_offset + wid * (fset.size()+1)]; } - if (TStats::kSimpleStats != 0 && this->param.cache_opt != 0) { - const bst_uint kBuffer = 32; + if (TStats::kSimpleStats != 0 && this->param_.cache_opt != 0) { + constexpr bst_uint kBuffer = 32; bst_uint align_length = c.length / kBuffer * kBuffer; int buf_position[kBuffer]; - bst_gpair buf_gpair[kBuffer]; + GradientPair buf_gpair[kBuffer]; for (bst_uint j = 0; j < align_length; j += kBuffer) { for (bst_uint i = 0; i < kBuffer; ++i) { bst_uint ridx = c[j + i].index; - buf_position[i] = this->position[ridx]; + buf_position[i] = this->position_[ridx]; buf_gpair[i] = gpair[ridx]; } for (bst_uint i = 0; i < kBuffer; ++i) { @@ -531,7 +530,7 @@ class CQHistMaker: public HistMaker { } for (bst_uint j = align_length; j < c.length; ++j) { const bst_uint ridx = c[j].index; - const int nid = this->position[ridx]; + const int nid = this->position_[ridx]; if (nid >= 0) { hbuilder[nid].Add(c[j].fvalue, gpair[ridx]); } @@ -539,14 +538,14 @@ class CQHistMaker: public HistMaker { } else { for (bst_uint j = 0; j < c.length; ++j) { const bst_uint ridx = c[j].index; - const int nid = this->position[ridx]; + const int nid = this->position_[ridx]; if (nid >= 0) { hbuilder[nid].Add(c[j].fvalue, gpair, info, ridx); } } } } - inline void UpdateSketchCol(const std::vector &gpair, + inline void UpdateSketchCol(const std::vector &gpair, const ColBatch::Inst &c, const RegTree &tree, size_t work_set_size, @@ -556,45 +555,45 @@ class CQHistMaker: public HistMaker { // initialize sbuilder for use std::vector &sbuilder = *p_temp; sbuilder.resize(tree.param.num_nodes); - for (size_t i = 0; i < this->qexpand.size(); ++i) { - const unsigned nid = this->qexpand[i]; - const unsigned wid = this->node2workindex[nid]; + for (size_t i = 0; i < this->qexpand_.size(); ++i) { + const unsigned nid = this->qexpand_[i]; + const unsigned wid = this->node2workindex_[nid]; sbuilder[nid].sum_total = 0.0f; - sbuilder[nid].sketch = &sketchs[wid * work_set_size + offset]; + sbuilder[nid].sketch = &sketchs_[wid * work_set_size + offset]; } // first pass, get sum of weight, TODO, optimization to skip first pass for (bst_uint j = 0; j < c.length; ++j) { const bst_uint ridx = c[j].index; - const int nid = this->position[ridx]; + const int nid = this->position_[ridx]; if (nid >= 0) { sbuilder[nid].sum_total += gpair[ridx].GetHess(); } } // if only one value, no need to do second pass if (c[0].fvalue == c[c.length-1].fvalue) { - for (size_t i = 0; i < this->qexpand.size(); ++i) { - const int nid = this->qexpand[i]; + for (size_t i = 0; i < this->qexpand_.size(); ++i) { + const int nid = this->qexpand_[i]; sbuilder[nid].sketch->Push(c[0].fvalue, static_cast(sbuilder[nid].sum_total)); } return; } // two pass scan - unsigned max_size = this->param.max_sketch_size(); - for (size_t i = 0; i < this->qexpand.size(); ++i) { - const int nid = this->qexpand[i]; + unsigned max_size = this->param_.MaxSketchSize(); + for (size_t i = 0; i < this->qexpand_.size(); ++i) { + const int nid = this->qexpand_[i]; sbuilder[nid].Init(max_size); } // second pass, build the sketch - if (TStats::kSimpleStats != 0 && this->param.cache_opt != 0) { - const bst_uint kBuffer = 32; + if (TStats::kSimpleStats != 0 && this->param_.cache_opt != 0) { + constexpr bst_uint kBuffer = 32; bst_uint align_length = c.length / kBuffer * kBuffer; int buf_position[kBuffer]; bst_float buf_hess[kBuffer]; for (bst_uint j = 0; j < align_length; j += kBuffer) { for (bst_uint i = 0; i < kBuffer; ++i) { bst_uint ridx = c[j + i].index; - buf_position[i] = this->position[ridx]; + buf_position[i] = this->position_[ridx]; buf_hess[i] = gpair[ridx].GetHess(); } for (bst_uint i = 0; i < kBuffer; ++i) { @@ -606,7 +605,7 @@ class CQHistMaker: public HistMaker { } for (bst_uint j = align_length; j < c.length; ++j) { const bst_uint ridx = c[j].index; - const int nid = this->position[ridx]; + const int nid = this->position_[ridx]; if (nid >= 0) { sbuilder[nid].Push(c[j].fvalue, gpair[ridx].GetHess(), max_size); } @@ -614,136 +613,137 @@ class CQHistMaker: public HistMaker { } else { for (bst_uint j = 0; j < c.length; ++j) { const bst_uint ridx = c[j].index; - const int nid = this->position[ridx]; + const int nid = this->position_[ridx]; if (nid >= 0) { sbuilder[nid].Push(c[j].fvalue, gpair[ridx].GetHess(), max_size); } } } - for (size_t i = 0; i < this->qexpand.size(); ++i) { - const int nid = this->qexpand[i]; + for (size_t i = 0; i < this->qexpand_.size(); ++i) { + const int nid = this->qexpand_[i]; sbuilder[nid].Finalize(max_size); } } // cached dmatrix where we initialized the feature on. - const DMatrix* cache_dmatrix_; + const DMatrix* cache_dmatrix_{nullptr}; // feature helper - BaseMaker::FMetaHelper feat_helper; + BaseMaker::FMetaHelper feat_helper_; // temp space to map feature id to working index - std::vector feat2workindex; + std::vector feat2workindex_; // set of index from fset that are current work set - std::vector work_set; + std::vector work_set_; // set of index from that are split candidates. - std::vector fsplit_set; + std::vector fsplit_set_; // thread temp data - std::vector > thread_sketch; + std::vector > thread_sketch_; // used to hold statistics - std::vector > thread_stats; + std::vector > thread_stats_; // used to hold start pointer - std::vector > thread_hist; + std::vector > thread_hist_; // node statistics - std::vector node_stats; + std::vector node_stats_; // summary array - std::vector summary_array; + std::vector summary_array_; // reducer for summary - rabit::SerializeReducer sreducer; + rabit::SerializeReducer sreducer_; // per node, per feature sketch - std::vector > sketchs; + std::vector > sketchs_; }; // global proposal template class GlobalProposalHistMaker: public CQHistMaker { protected: - void ResetPosAndPropose(const std::vector &gpair, + void ResetPosAndPropose(const std::vector &gpair, DMatrix *p_fmat, const std::vector &fset, const RegTree &tree) override { - if (this->qexpand.size() == 1) { + if (this->qexpand_.size() == 1) { cached_rptr_.clear(); cached_cut_.clear(); } if (cached_rptr_.size() == 0) { - CHECK_EQ(this->qexpand.size(), 1U); + CHECK_EQ(this->qexpand_.size(), 1U); CQHistMaker::ResetPosAndPropose(gpair, p_fmat, fset, tree); - cached_rptr_ = this->wspace.rptr; - cached_cut_ = this->wspace.cut; + cached_rptr_ = this->wspace_.rptr; + cached_cut_ = this->wspace_.cut; } else { - this->wspace.cut.clear(); - this->wspace.rptr.clear(); - this->wspace.rptr.push_back(0); - for (size_t i = 0; i < this->qexpand.size(); ++i) { + this->wspace_.cut.clear(); + this->wspace_.rptr.clear(); + this->wspace_.rptr.push_back(0); + for (size_t i = 0; i < this->qexpand_.size(); ++i) { for (size_t j = 0; j < cached_rptr_.size() - 1; ++j) { - this->wspace.rptr.push_back( - this->wspace.rptr.back() + cached_rptr_[j + 1] - cached_rptr_[j]); + this->wspace_.rptr.push_back( + this->wspace_.rptr.back() + cached_rptr_[j + 1] - cached_rptr_[j]); } - this->wspace.cut.insert(this->wspace.cut.end(), cached_cut_.begin(), cached_cut_.end()); + this->wspace_.cut.insert(this->wspace_.cut.end(), cached_cut_.begin(), cached_cut_.end()); } - CHECK_EQ(this->wspace.rptr.size(), - (fset.size() + 1) * this->qexpand.size() + 1); - CHECK_EQ(this->wspace.rptr.back(), this->wspace.cut.size()); + CHECK_EQ(this->wspace_.rptr.size(), + (fset.size() + 1) * this->qexpand_.size() + 1); + CHECK_EQ(this->wspace_.rptr.back(), this->wspace_.cut.size()); } } // code to create histogram - void CreateHist(const std::vector &gpair, + void CreateHist(const std::vector &gpair, DMatrix *p_fmat, const std::vector &fset, const RegTree &tree) override { - const MetaInfo &info = p_fmat->info(); + const MetaInfo &info = p_fmat->Info(); // fill in reverse map - this->feat2workindex.resize(tree.param.num_feature); - this->work_set = fset; - std::fill(this->feat2workindex.begin(), this->feat2workindex.end(), -1); + this->feat2workindex_.resize(tree.param.num_feature); + this->work_set_ = fset; + std::fill(this->feat2workindex_.begin(), this->feat2workindex_.end(), -1); for (size_t i = 0; i < fset.size(); ++i) { - this->feat2workindex[fset[i]] = static_cast(i); + this->feat2workindex_[fset[i]] = static_cast(i); } // start to work - this->wspace.Init(this->param, 1); + this->wspace_.Init(this->param_, 1); // to gain speedup in recovery { - this->thread_hist.resize(omp_get_max_threads()); + this->thread_hist_.resize(omp_get_max_threads()); // TWOPASS: use the real set + split set in the column iteration. this->SetDefaultPostion(p_fmat, tree); - this->work_set.insert(this->work_set.end(), this->fsplit_set.begin(), this->fsplit_set.end()); - std::sort(this->work_set.begin(), this->work_set.end()); - this->work_set.resize( - std::unique(this->work_set.begin(), this->work_set.end()) - this->work_set.begin()); + this->work_set_.insert(this->work_set_.end(), this->fsplit_set_.begin(), + this->fsplit_set_.end()); + std::sort(this->work_set_.begin(), this->work_set_.end()); + this->work_set_.resize( + std::unique(this->work_set_.begin(), this->work_set_.end()) - this->work_set_.begin()); // start accumulating statistics - dmlc::DataIter *iter = p_fmat->ColIterator(this->work_set); + dmlc::DataIter *iter = p_fmat->ColIterator(this->work_set_); iter->BeforeFirst(); while (iter->Next()) { const ColBatch &batch = iter->Value(); // TWOPASS: use the real set + split set in the column iteration. - this->CorrectNonDefaultPositionByBatch(batch, this->fsplit_set, tree); + this->CorrectNonDefaultPositionByBatch(batch, this->fsplit_set_, tree); // start enumeration - const bst_omp_uint nsize = static_cast(batch.size); + const auto nsize = static_cast(batch.size); #pragma omp parallel for schedule(dynamic, 1) for (bst_omp_uint i = 0; i < nsize; ++i) { - int offset = this->feat2workindex[batch.col_index[i]]; + int offset = this->feat2workindex_[batch.col_index[i]]; if (offset >= 0) { this->UpdateHistCol(gpair, batch[i], info, tree, fset, offset, - &this->thread_hist[omp_get_thread_num()]); + &this->thread_hist_[omp_get_thread_num()]); } } } // update node statistics. this->GetNodeStats(gpair, *p_fmat, tree, - &(this->thread_stats), &(this->node_stats)); - for (size_t i = 0; i < this->qexpand.size(); ++i) { - const int nid = this->qexpand[i]; - const int wid = this->node2workindex[nid]; - this->wspace.hset[0][fset.size() + wid * (fset.size()+1)] - .data[0] = this->node_stats[nid]; + &(this->thread_stats_), &(this->node_stats_)); + for (size_t i = 0; i < this->qexpand_.size(); ++i) { + const int nid = this->qexpand_[i]; + const int wid = this->node2workindex_[nid]; + this->wspace_.hset[0][fset.size() + wid * (fset.size()+1)] + .data[0] = this->node_stats_[nid]; } } - this->histred.Allreduce(dmlc::BeginPtr(this->wspace.hset[0].data), - this->wspace.hset[0].data.size()); + this->histred_.Allreduce(dmlc::BeginPtr(this->wspace_.hset[0].data), + this->wspace_.hset[0].data.size()); } // cached unit pointer @@ -756,17 +756,17 @@ class GlobalProposalHistMaker: public CQHistMaker { template class QuantileHistMaker: public HistMaker { protected: - typedef common::WXQuantileSketch WXQSketch; - void ResetPosAndPropose(const std::vector &gpair, + using WXQSketch = common::WXQuantileSketch; + void ResetPosAndPropose(const std::vector &gpair, DMatrix *p_fmat, const std::vector &fset, const RegTree &tree) override { - const MetaInfo &info = p_fmat->info(); + const MetaInfo &info = p_fmat->Info(); // initialize the data structure const int nthread = omp_get_max_threads(); - sketchs.resize(this->qexpand.size() * tree.param.num_feature); - for (size_t i = 0; i < sketchs.size(); ++i) { - sketchs[i].Init(info.num_row, this->param.sketch_eps); + sketchs_.resize(this->qexpand_.size() * tree.param.num_feature); + for (size_t i = 0; i < sketchs_.size(); ++i) { + sketchs_[i].Init(info.num_row_, this->param_.sketch_eps); } // start accumulating statistics dmlc::DataIter *iter = p_fmat->RowIterator(); @@ -775,7 +775,7 @@ class QuantileHistMaker: public HistMaker { const RowBatch &batch = iter->Value(); // parallel convert to column major format common::ParallelGroupBuilder - builder(&col_ptr, &col_data, &thread_col_ptr); + builder(&col_ptr_, &col_data_, &thread_col_ptr_); builder.InitBudget(tree.param.num_feature, nthread); const bst_omp_uint nbatch = static_cast(batch.size); @@ -783,13 +783,13 @@ class QuantileHistMaker: public HistMaker { for (bst_omp_uint i = 0; i < nbatch; ++i) { RowBatch::Inst inst = batch[i]; const bst_uint ridx = static_cast(batch.base_rowid + i); - int nid = this->position[ridx]; + int nid = this->position_[ridx]; if (nid >= 0) { - if (!tree[nid].is_leaf()) { - this->position[ridx] = nid = HistMaker::NextLevel(inst, tree, nid); + if (!tree[nid].IsLeaf()) { + this->position_[ridx] = nid = HistMaker::NextLevel(inst, tree, nid); } - if (this->node2workindex[nid] < 0) { - this->position[ridx] = ~nid; + if (this->node2workindex_[nid] < 0) { + this->position_[ridx] = ~nid; } else { for (bst_uint j = 0; j < inst.length; ++j) { builder.AddBudget(inst[j].index, omp_get_thread_num()); @@ -802,7 +802,7 @@ class QuantileHistMaker: public HistMaker { for (bst_omp_uint i = 0; i < nbatch; ++i) { RowBatch::Inst inst = batch[i]; const bst_uint ridx = static_cast(batch.base_rowid + i); - const int nid = this->position[ridx]; + const int nid = this->position_[ridx]; if (nid >= 0) { for (bst_uint j = 0; j < inst.length; ++j) { builder.Push(inst[j].index, @@ -812,71 +812,71 @@ class QuantileHistMaker: public HistMaker { } } // start putting things into sketch - const bst_omp_uint nfeat = col_ptr.size() - 1; + const bst_omp_uint nfeat = col_ptr_.size() - 1; #pragma omp parallel for schedule(dynamic, 1) for (bst_omp_uint k = 0; k < nfeat; ++k) { - for (size_t i = col_ptr[k]; i < col_ptr[k+1]; ++i) { - const SparseBatch::Entry &e = col_data[i]; - const int wid = this->node2workindex[e.index]; - sketchs[wid * tree.param.num_feature + k].Push(e.fvalue, gpair[e.index].GetHess()); + for (size_t i = col_ptr_[k]; i < col_ptr_[k+1]; ++i) { + const SparseBatch::Entry &e = col_data_[i]; + const int wid = this->node2workindex_[e.index]; + sketchs_[wid * tree.param.num_feature + k].Push(e.fvalue, gpair[e.index].GetHess()); } } } // setup maximum size - unsigned max_size = this->param.max_sketch_size(); + unsigned max_size = this->param_.MaxSketchSize(); // synchronize sketch - summary_array.resize(sketchs.size()); - for (size_t i = 0; i < sketchs.size(); ++i) { + summary_array_.resize(sketchs_.size()); + for (size_t i = 0; i < sketchs_.size(); ++i) { common::WQuantileSketch::SummaryContainer out; - sketchs[i].GetSummary(&out); - summary_array[i].Reserve(max_size); - summary_array[i].SetPrune(out, max_size); + sketchs_[i].GetSummary(&out); + summary_array_[i].Reserve(max_size); + summary_array_[i].SetPrune(out, max_size); } size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size); - sreducer.Allreduce(dmlc::BeginPtr(summary_array), nbytes, summary_array.size()); + sreducer_.Allreduce(dmlc::BeginPtr(summary_array_), nbytes, summary_array_.size()); // now we get the final result of sketch, setup the cut - this->wspace.cut.clear(); - this->wspace.rptr.clear(); - this->wspace.rptr.push_back(0); - for (size_t wid = 0; wid < this->qexpand.size(); ++wid) { + this->wspace_.cut.clear(); + this->wspace_.rptr.clear(); + this->wspace_.rptr.push_back(0); + for (size_t wid = 0; wid < this->qexpand_.size(); ++wid) { for (int fid = 0; fid < tree.param.num_feature; ++fid) { - const WXQSketch::Summary &a = summary_array[wid * tree.param.num_feature + fid]; + const WXQSketch::Summary &a = summary_array_[wid * tree.param.num_feature + fid]; for (size_t i = 1; i < a.size; ++i) { - bst_float cpt = a.data[i].value - rt_eps; - if (i == 1 || cpt > this->wspace.cut.back()) { - this->wspace.cut.push_back(cpt); + bst_float cpt = a.data[i].value - kRtEps; + if (i == 1 || cpt > this->wspace_.cut.back()) { + this->wspace_.cut.push_back(cpt); } } // push a value that is greater than anything if (a.size != 0) { bst_float cpt = a.data[a.size - 1].value; // this must be bigger than last value in a scale - bst_float last = cpt + fabs(cpt) + rt_eps; - this->wspace.cut.push_back(last); + bst_float last = cpt + fabs(cpt) + kRtEps; + this->wspace_.cut.push_back(last); } - this->wspace.rptr.push_back(this->wspace.cut.size()); + this->wspace_.rptr.push_back(this->wspace_.cut.size()); } // reserve last value for global statistics - this->wspace.cut.push_back(0.0f); - this->wspace.rptr.push_back(this->wspace.cut.size()); + this->wspace_.cut.push_back(0.0f); + this->wspace_.rptr.push_back(this->wspace_.cut.size()); } - CHECK_EQ(this->wspace.rptr.size(), - (tree.param.num_feature + 1) * this->qexpand.size() + 1); + CHECK_EQ(this->wspace_.rptr.size(), + (tree.param.num_feature + 1) * this->qexpand_.size() + 1); } private: // summary array - std::vector summary_array; + std::vector summary_array_; // reducer for summary - rabit::SerializeReducer sreducer; + rabit::SerializeReducer sreducer_; // local temp column data structure - std::vector col_ptr; + std::vector col_ptr_; // local storage of column data - std::vector col_data; - std::vector > thread_col_ptr; + std::vector col_data_; + std::vector > thread_col_ptr_; // per node, per feature sketch - std::vector > sketchs; + std::vector > sketchs_; }; XGBOOST_REGISTER_TREE_UPDATER(LocalHistMaker, "grow_local_histmaker") diff --git a/src/tree/updater_prune.cc b/src/tree/updater_prune.cc index bbdc155d1..3710cd28b 100644 --- a/src/tree/updater_prune.cc +++ b/src/tree/updater_prune.cc @@ -21,37 +21,37 @@ DMLC_REGISTRY_FILE_TAG(updater_prune); class TreePruner: public TreeUpdater { public: TreePruner() { - syncher.reset(TreeUpdater::Create("sync")); + syncher_.reset(TreeUpdater::Create("sync")); } // set training parameter void Init(const std::vector >& args) override { - param.InitAllowUnknown(args); - syncher->Init(args); + param_.InitAllowUnknown(args); + syncher_->Init(args); } // update the tree, do pruning - void Update(HostDeviceVector *gpair, + void Update(HostDeviceVector *gpair, DMatrix *p_fmat, const std::vector &trees) override { // rescale learning rate according to size of trees - float lr = param.learning_rate; - param.learning_rate = lr / trees.size(); - for (size_t i = 0; i < trees.size(); ++i) { - this->DoPrune(*trees[i]); + float lr = param_.learning_rate; + param_.learning_rate = lr / trees.size(); + for (auto tree : trees) { + this->DoPrune(*tree); } - param.learning_rate = lr; - syncher->Update(gpair, p_fmat, trees); + param_.learning_rate = lr; + syncher_->Update(gpair, p_fmat, trees); } private: // try to prune off current leaf inline int TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) { // NOLINT(*) - if (tree[nid].is_root()) return npruned; - int pid = tree[nid].parent(); - RegTree::NodeStat &s = tree.stat(pid); + if (tree[nid].IsRoot()) return npruned; + int pid = tree[nid].Parent(); + RegTree::NodeStat &s = tree.Stat(pid); ++s.leaf_child_cnt; - if (s.leaf_child_cnt >= 2 && param.need_prune(s.loss_chg, depth - 1)) { + if (s.leaf_child_cnt >= 2 && param_.NeedPrune(s.loss_chg, depth - 1)) { // need to be pruned - tree.ChangeToLeaf(pid, param.learning_rate * s.base_weight); + tree.ChangeToLeaf(pid, param_.learning_rate * s.base_weight); // tail recursion return this->TryPruneLeaf(tree, pid, depth - 1, npruned + 2); } else { @@ -63,25 +63,25 @@ class TreePruner: public TreeUpdater { int npruned = 0; // initialize auxiliary statistics for (int nid = 0; nid < tree.param.num_nodes; ++nid) { - tree.stat(nid).leaf_child_cnt = 0; + tree.Stat(nid).leaf_child_cnt = 0; } for (int nid = 0; nid < tree.param.num_nodes; ++nid) { - if (tree[nid].is_leaf()) { + if (tree[nid].IsLeaf()) { npruned = this->TryPruneLeaf(tree, nid, tree.GetDepth(nid), npruned); } } - if (!param.silent) { + if (!param_.silent) { LOG(INFO) << "tree pruning end, " << tree.param.num_roots << " roots, " - << tree.num_extra_nodes() << " extra nodes, " << npruned + << tree.NumExtraNodes() << " extra nodes, " << npruned << " pruned nodes, max_depth=" << tree.MaxDepth(); } } private: // synchronizer - std::unique_ptr syncher; + std::unique_ptr syncher_; // training parameter - TrainParam param; + TrainParam param_; }; XGBOOST_REGISTER_TREE_UPDATER(TreePruner, "prune") diff --git a/src/tree/updater_refresh.cc b/src/tree/updater_refresh.cc index e94a92147..ff179006e 100644 --- a/src/tree/updater_refresh.cc +++ b/src/tree/updater_refresh.cc @@ -22,14 +22,14 @@ template class TreeRefresher: public TreeUpdater { public: void Init(const std::vector >& args) override { - param.InitAllowUnknown(args); + param_.InitAllowUnknown(args); } // update the tree, do pruning - void Update(HostDeviceVector *gpair, + void Update(HostDeviceVector *gpair, DMatrix *p_fmat, const std::vector &trees) override { if (trees.size() == 0) return; - std::vector &gpair_h = gpair->data_h(); + std::vector &gpair_h = gpair->HostVector(); // number of threads // thread temporal space std::vector > stemp; @@ -42,11 +42,11 @@ class TreeRefresher: public TreeUpdater { { int tid = omp_get_thread_num(); int num_nodes = 0; - for (size_t i = 0; i < trees.size(); ++i) { - num_nodes += trees[i]->param.num_nodes; + for (auto tree : trees) { + num_nodes += tree->param.num_nodes; } - stemp[tid].resize(num_nodes, TStats(param)); - std::fill(stemp[tid].begin(), stemp[tid].end(), TStats(param)); + stemp[tid].resize(num_nodes, TStats(param_)); + std::fill(stemp[tid].begin(), stemp[tid].end(), TStats(param_)); fvec_temp[tid].Init(trees[0]->param.num_feature); } // if it is C++11, use lazy evaluation for Allreduce, @@ -55,32 +55,32 @@ class TreeRefresher: public TreeUpdater { auto lazy_get_stats = [&]() #endif { - const MetaInfo &info = p_fmat->info(); + const MetaInfo &info = p_fmat->Info(); // start accumulating statistics dmlc::DataIter *iter = p_fmat->RowIterator(); iter->BeforeFirst(); while (iter->Next()) { const RowBatch &batch = iter->Value(); CHECK_LT(batch.size, std::numeric_limits::max()); - const bst_omp_uint nbatch = static_cast(batch.size); + const auto nbatch = static_cast(batch.size); #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < nbatch; ++i) { RowBatch::Inst inst = batch[i]; const int tid = omp_get_thread_num(); - const bst_uint ridx = static_cast(batch.base_rowid + i); + const auto ridx = static_cast(batch.base_rowid + i); RegTree::FVec &feats = fvec_temp[tid]; feats.Fill(inst); int offset = 0; - for (size_t j = 0; j < trees.size(); ++j) { - AddStats(*trees[j], feats, gpair_h, info, ridx, + for (auto tree : trees) { + AddStats(*tree, feats, gpair_h, info, ridx, dmlc::BeginPtr(stemp[tid]) + offset); - offset += trees[j]->param.num_nodes; + offset += tree->param.num_nodes; } feats.Drop(inst); } } // aggregate the statistics - int num_nodes = static_cast(stemp[0].size()); + auto num_nodes = static_cast(stemp[0].size()); #pragma omp parallel for schedule(static) for (int nid = 0; nid < num_nodes; ++nid) { for (int tid = 1; tid < nthread; ++tid) { @@ -89,64 +89,64 @@ class TreeRefresher: public TreeUpdater { } }; #if __cplusplus >= 201103L - reducer.Allreduce(dmlc::BeginPtr(stemp[0]), stemp[0].size(), lazy_get_stats); + reducer_.Allreduce(dmlc::BeginPtr(stemp[0]), stemp[0].size(), lazy_get_stats); #else - reducer.Allreduce(dmlc::BeginPtr(stemp[0]), stemp[0].size()); + reducer_.Allreduce(dmlc::BeginPtr(stemp[0]), stemp[0].size()); #endif // rescale learning rate according to size of trees - float lr = param.learning_rate; - param.learning_rate = lr / trees.size(); + float lr = param_.learning_rate; + param_.learning_rate = lr / trees.size(); int offset = 0; - for (size_t i = 0; i < trees.size(); ++i) { - for (int rid = 0; rid < trees[i]->param.num_roots; ++rid) { - this->Refresh(dmlc::BeginPtr(stemp[0]) + offset, rid, trees[i]); + for (auto tree : trees) { + for (int rid = 0; rid < tree->param.num_roots; ++rid) { + this->Refresh(dmlc::BeginPtr(stemp[0]) + offset, rid, tree); } - offset += trees[i]->param.num_nodes; + offset += tree->param.num_nodes; } // set learning rate back - param.learning_rate = lr; + param_.learning_rate = lr; } private: inline static void AddStats(const RegTree &tree, const RegTree::FVec &feat, - const std::vector &gpair, + const std::vector &gpair, const MetaInfo &info, const bst_uint ridx, TStats *gstats) { // start from groups that belongs to current data - int pid = static_cast(info.GetRoot(ridx)); + auto pid = static_cast(info.GetRoot(ridx)); gstats[pid].Add(gpair, info, ridx); // tranverse tree - while (!tree[pid].is_leaf()) { - unsigned split_index = tree[pid].split_index(); - pid = tree.GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index)); + while (!tree[pid].IsLeaf()) { + unsigned split_index = tree[pid].SplitIndex(); + pid = tree.GetNext(pid, feat.Fvalue(split_index), feat.IsMissing(split_index)); gstats[pid].Add(gpair, info, ridx); } } inline void Refresh(const TStats *gstats, int nid, RegTree *p_tree) { RegTree &tree = *p_tree; - tree.stat(nid).base_weight = static_cast(gstats[nid].CalcWeight(param)); - tree.stat(nid).sum_hess = static_cast(gstats[nid].sum_hess); - gstats[nid].SetLeafVec(param, tree.leafvec(nid)); - if (tree[nid].is_leaf()) { - if (param.refresh_leaf) { - tree[nid].set_leaf(tree.stat(nid).base_weight * param.learning_rate); + tree.Stat(nid).base_weight = static_cast(gstats[nid].CalcWeight(param_)); + tree.Stat(nid).sum_hess = static_cast(gstats[nid].sum_hess); + gstats[nid].SetLeafVec(param_, tree.Leafvec(nid)); + if (tree[nid].IsLeaf()) { + if (param_.refresh_leaf) { + tree[nid].SetLeaf(tree.Stat(nid).base_weight * param_.learning_rate); } } else { - tree.stat(nid).loss_chg = static_cast( - gstats[tree[nid].cleft()].CalcGain(param) + - gstats[tree[nid].cright()].CalcGain(param) - - gstats[nid].CalcGain(param)); - this->Refresh(gstats, tree[nid].cleft(), p_tree); - this->Refresh(gstats, tree[nid].cright(), p_tree); + tree.Stat(nid).loss_chg = static_cast( + gstats[tree[nid].LeftChild()].CalcGain(param_) + + gstats[tree[nid].RightChild()].CalcGain(param_) - + gstats[nid].CalcGain(param_)); + this->Refresh(gstats, tree[nid].LeftChild(), p_tree); + this->Refresh(gstats, tree[nid].RightChild(), p_tree); } } // training parameter - TrainParam param; + TrainParam param_; // reducer - rabit::Reducer reducer; + rabit::Reducer reducer_; }; XGBOOST_REGISTER_TREE_UPDATER(TreeRefresher, "refresh") diff --git a/src/tree/updater_skmaker.cc b/src/tree/updater_skmaker.cc index 688e2026c..ff1c54036 100644 --- a/src/tree/updater_skmaker.cc +++ b/src/tree/updater_skmaker.cc @@ -22,58 +22,57 @@ DMLC_REGISTRY_FILE_TAG(updater_skmaker); class SketchMaker: public BaseMaker { public: - void Update(HostDeviceVector *gpair, + void Update(HostDeviceVector *gpair, DMatrix *p_fmat, const std::vector &trees) override { // rescale learning rate according to size of trees - float lr = param.learning_rate; - param.learning_rate = lr / trees.size(); + float lr = param_.learning_rate; + param_.learning_rate = lr / trees.size(); // build tree - for (size_t i = 0; i < trees.size(); ++i) { - this->Update(gpair->data_h(), p_fmat, trees[i]); + for (auto tree : trees) { + this->Update(gpair->HostVector(), p_fmat, tree); } - param.learning_rate = lr; + param_.learning_rate = lr; } protected: - inline void Update(const std::vector &gpair, + inline void Update(const std::vector &gpair, DMatrix *p_fmat, RegTree *p_tree) { this->InitData(gpair, *p_fmat, *p_tree); - for (int depth = 0; depth < param.max_depth; ++depth) { + for (int depth = 0; depth < param_.max_depth; ++depth) { this->GetNodeStats(gpair, *p_fmat, *p_tree, - &thread_stats, &node_stats); + &thread_stats_, &node_stats_); this->BuildSketch(gpair, p_fmat, *p_tree); this->SyncNodeStats(); this->FindSplit(depth, gpair, p_fmat, p_tree); - this->ResetPositionCol(qexpand, p_fmat, *p_tree); + this->ResetPositionCol(qexpand_, p_fmat, *p_tree); this->UpdateQueueExpand(*p_tree); // if nothing left to be expand, break - if (qexpand.size() == 0) break; + if (qexpand_.size() == 0) break; } - if (qexpand.size() != 0) { + if (qexpand_.size() != 0) { this->GetNodeStats(gpair, *p_fmat, *p_tree, - &thread_stats, &node_stats); + &thread_stats_, &node_stats_); this->SyncNodeStats(); } // set all statistics correctly for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) { - this->SetStats(nid, node_stats[nid], p_tree); - if (!(*p_tree)[nid].is_leaf()) { - p_tree->stat(nid).loss_chg = static_cast( - node_stats[(*p_tree)[nid].cleft()].CalcGain(param) + - node_stats[(*p_tree)[nid].cright()].CalcGain(param) - - node_stats[nid].CalcGain(param)); + this->SetStats(nid, node_stats_[nid], p_tree); + if (!(*p_tree)[nid].IsLeaf()) { + p_tree->Stat(nid).loss_chg = static_cast( + node_stats_[(*p_tree)[nid].LeftChild()].CalcGain(param_) + + node_stats_[(*p_tree)[nid].RightChild()].CalcGain(param_) - + node_stats_[nid].CalcGain(param_)); } } // set left leaves - for (size_t i = 0; i < qexpand.size(); ++i) { - const int nid = qexpand[i]; - (*p_tree)[nid].set_leaf(p_tree->stat(nid).base_weight * param.learning_rate); + for (int nid : qexpand_) { + (*p_tree)[nid].SetLeaf(p_tree->Stat(nid).base_weight * param_.learning_rate); } } // define the sketch we want to use - typedef common::WXQuantileSketch WXQSketch; + using WXQSketch = common::WXQuantileSketch; private: // statistics needed in the gradient calculation @@ -84,20 +83,20 @@ class SketchMaker: public BaseMaker { double neg_grad; /*! \brief sum of hessian statistics */ double sum_hess; - SKStats(void) {} + SKStats() = default; // constructor explicit SKStats(const TrainParam ¶m) { this->Clear(); } /*! \brief clear the statistics */ - inline void Clear(void) { + inline void Clear() { neg_grad = pos_grad = sum_hess = 0.0f; } // accumulate statistics - inline void Add(const std::vector &gpair, + inline void Add(const std::vector &gpair, const MetaInfo &info, bst_uint ridx) { - const bst_gpair &b = gpair[ridx]; + const GradientPair &b = gpair[ridx]; if (b.GetGrad() >= 0.0f) { pos_grad += b.GetGrad(); } else { @@ -133,48 +132,48 @@ class SketchMaker: public BaseMaker { inline void SetLeafVec(const TrainParam ¶m, bst_float *vec) const { } }; - inline void BuildSketch(const std::vector &gpair, + inline void BuildSketch(const std::vector &gpair, DMatrix *p_fmat, const RegTree &tree) { - const MetaInfo& info = p_fmat->info(); - sketchs.resize(this->qexpand.size() * tree.param.num_feature * 3); - for (size_t i = 0; i < sketchs.size(); ++i) { - sketchs[i].Init(info.num_row, this->param.sketch_eps); + const MetaInfo& info = p_fmat->Info(); + sketchs_.resize(this->qexpand_.size() * tree.param.num_feature * 3); + for (auto & sketch : sketchs_) { + sketch.Init(info.num_row_, this->param_.sketch_eps); } - thread_sketch.resize(omp_get_max_threads()); + thread_sketch_.resize(omp_get_max_threads()); // number of rows in - const size_t nrows = p_fmat->buffered_rowset().size(); + const size_t nrows = p_fmat->BufferedRowset().Size(); // start accumulating statistics dmlc::DataIter *iter = p_fmat->ColIterator(); iter->BeforeFirst(); while (iter->Next()) { const ColBatch &batch = iter->Value(); // start enumeration - const bst_omp_uint nsize = static_cast(batch.size); + const auto nsize = static_cast(batch.size); #pragma omp parallel for schedule(dynamic, 1) for (bst_omp_uint i = 0; i < nsize; ++i) { this->UpdateSketchCol(gpair, batch[i], tree, - node_stats, + node_stats_, batch.col_index[i], batch[i].length == nrows, - &thread_sketch[omp_get_thread_num()]); + &thread_sketch_[omp_get_thread_num()]); } } // setup maximum size - unsigned max_size = param.max_sketch_size(); + unsigned max_size = param_.MaxSketchSize(); // synchronize sketch - summary_array.resize(sketchs.size()); - for (size_t i = 0; i < sketchs.size(); ++i) { + summary_array_.resize(sketchs_.size()); + for (size_t i = 0; i < sketchs_.size(); ++i) { common::WXQuantileSketch::SummaryContainer out; - sketchs[i].GetSummary(&out); - summary_array[i].Reserve(max_size); - summary_array[i].SetPrune(out, max_size); + sketchs_[i].GetSummary(&out); + summary_array_[i].Reserve(max_size); + summary_array_[i].SetPrune(out, max_size); } size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size); - sketch_reducer.Allreduce(dmlc::BeginPtr(summary_array), nbytes, summary_array.size()); + sketch_reducer_.Allreduce(dmlc::BeginPtr(summary_array_), nbytes, summary_array_.size()); } // update sketch information in column fid - inline void UpdateSketchCol(const std::vector &gpair, + inline void UpdateSketchCol(const std::vector &gpair, const ColBatch::Inst &c, const RegTree &tree, const std::vector &nstats, @@ -185,20 +184,19 @@ class SketchMaker: public BaseMaker { // initialize sbuilder for use std::vector &sbuilder = *p_temp; sbuilder.resize(tree.param.num_nodes * 3); - for (size_t i = 0; i < this->qexpand.size(); ++i) { - const unsigned nid = this->qexpand[i]; - const unsigned wid = this->node2workindex[nid]; + for (unsigned int nid : this->qexpand_) { + const unsigned wid = this->node2workindex_[nid]; for (int k = 0; k < 3; ++k) { sbuilder[3 * nid + k].sum_total = 0.0f; - sbuilder[3 * nid + k].sketch = &sketchs[(wid * tree.param.num_feature + fid) * 3 + k]; + sbuilder[3 * nid + k].sketch = &sketchs_[(wid * tree.param.num_feature + fid) * 3 + k]; } } if (!col_full) { for (bst_uint j = 0; j < c.length; ++j) { const bst_uint ridx = c[j].index; - const int nid = this->position[ridx]; + const int nid = this->position_[ridx]; if (nid >= 0) { - const bst_gpair &e = gpair[ridx]; + const GradientPair &e = gpair[ridx]; if (e.GetGrad() >= 0.0f) { sbuilder[3 * nid + 0].sum_total += e.GetGrad(); } else { @@ -208,8 +206,7 @@ class SketchMaker: public BaseMaker { } } } else { - for (size_t i = 0; i < this->qexpand.size(); ++i) { - const unsigned nid = this->qexpand[i]; + for (unsigned int nid : this->qexpand_) { sbuilder[3 * nid + 0].sum_total = static_cast(nstats[nid].pos_grad); sbuilder[3 * nid + 1].sum_total = static_cast(nstats[nid].neg_grad); sbuilder[3 * nid + 2].sum_total = static_cast(nstats[nid].sum_hess); @@ -217,8 +214,7 @@ class SketchMaker: public BaseMaker { } // if only one value, no need to do second pass if (c[0].fvalue == c[c.length-1].fvalue) { - for (size_t i = 0; i < this->qexpand.size(); ++i) { - const int nid = this->qexpand[i]; + for (int nid : this->qexpand_) { for (int k = 0; k < 3; ++k) { sbuilder[3 * nid + k].sketch->Push(c[0].fvalue, static_cast( @@ -228,9 +224,8 @@ class SketchMaker: public BaseMaker { return; } // two pass scan - unsigned max_size = param.max_sketch_size(); - for (size_t i = 0; i < this->qexpand.size(); ++i) { - const int nid = this->qexpand[i]; + unsigned max_size = param_.MaxSketchSize(); + for (int nid : this->qexpand_) { for (int k = 0; k < 3; ++k) { sbuilder[3 * nid + k].Init(max_size); } @@ -238,9 +233,9 @@ class SketchMaker: public BaseMaker { // second pass, build the sketch for (bst_uint j = 0; j < c.length; ++j) { const bst_uint ridx = c[j].index; - const int nid = this->position[ridx]; + const int nid = this->position_[ridx]; if (nid >= 0) { - const bst_gpair &e = gpair[ridx]; + const GradientPair &e = gpair[ridx]; if (e.GetGrad() >= 0.0f) { sbuilder[3 * nid + 0].Push(c[j].fvalue, e.GetGrad(), max_size); } else { @@ -249,70 +244,69 @@ class SketchMaker: public BaseMaker { sbuilder[3 * nid + 2].Push(c[j].fvalue, e.GetHess(), max_size); } } - for (size_t i = 0; i < this->qexpand.size(); ++i) { - const int nid = this->qexpand[i]; + for (int nid : this->qexpand_) { for (int k = 0; k < 3; ++k) { sbuilder[3 * nid + k].Finalize(max_size); } } } - inline void SyncNodeStats(void) { - CHECK_NE(qexpand.size(), 0U); - std::vector tmp(qexpand.size()); - for (size_t i = 0; i < qexpand.size(); ++i) { - tmp[i] = node_stats[qexpand[i]]; + inline void SyncNodeStats() { + CHECK_NE(qexpand_.size(), 0U); + std::vector tmp(qexpand_.size()); + for (size_t i = 0; i < qexpand_.size(); ++i) { + tmp[i] = node_stats_[qexpand_[i]]; } - stats_reducer.Allreduce(dmlc::BeginPtr(tmp), tmp.size()); - for (size_t i = 0; i < qexpand.size(); ++i) { - node_stats[qexpand[i]] = tmp[i]; + stats_reducer_.Allreduce(dmlc::BeginPtr(tmp), tmp.size()); + for (size_t i = 0; i < qexpand_.size(); ++i) { + node_stats_[qexpand_[i]] = tmp[i]; } } inline void FindSplit(int depth, - const std::vector &gpair, + const std::vector &gpair, DMatrix *p_fmat, RegTree *p_tree) { const bst_uint num_feature = p_tree->param.num_feature; // get the best split condition for each node - std::vector sol(qexpand.size()); - bst_omp_uint nexpand = static_cast(qexpand.size()); + std::vector sol(qexpand_.size()); + auto nexpand = static_cast(qexpand_.size()); #pragma omp parallel for schedule(dynamic, 1) for (bst_omp_uint wid = 0; wid < nexpand; ++wid) { - const int nid = qexpand[wid]; - CHECK_EQ(node2workindex[nid], static_cast(wid)); + const int nid = qexpand_[wid]; + CHECK_EQ(node2workindex_[nid], static_cast(wid)); SplitEntry &best = sol[wid]; for (bst_uint fid = 0; fid < num_feature; ++fid) { unsigned base = (wid * p_tree->param.num_feature + fid) * 3; - EnumerateSplit(summary_array[base + 0], - summary_array[base + 1], - summary_array[base + 2], - node_stats[nid], fid, &best); + EnumerateSplit(summary_array_[base + 0], + summary_array_[base + 1], + summary_array_[base + 2], + node_stats_[nid], fid, &best); } } // get the best result, we can synchronize the solution for (bst_omp_uint wid = 0; wid < nexpand; ++wid) { - const int nid = qexpand[wid]; + const int nid = qexpand_[wid]; const SplitEntry &best = sol[wid]; // set up the values - p_tree->stat(nid).loss_chg = best.loss_chg; - this->SetStats(nid, node_stats[nid], p_tree); + p_tree->Stat(nid).loss_chg = best.loss_chg; + this->SetStats(nid, node_stats_[nid], p_tree); // now we know the solution in snode[nid], set split - if (best.loss_chg > rt_eps) { + if (best.loss_chg > kRtEps) { p_tree->AddChilds(nid); - (*p_tree)[nid].set_split(best.split_index(), - best.split_value, best.default_left()); + (*p_tree)[nid].SetSplit(best.SplitIndex(), + best.split_value, best.DefaultLeft()); // mark right child as 0, to indicate fresh leaf - (*p_tree)[(*p_tree)[nid].cleft()].set_leaf(0.0f, 0); - (*p_tree)[(*p_tree)[nid].cright()].set_leaf(0.0f, 0); + (*p_tree)[(*p_tree)[nid].LeftChild()].SetLeaf(0.0f, 0); + (*p_tree)[(*p_tree)[nid].RightChild()].SetLeaf(0.0f, 0); } else { - (*p_tree)[nid].set_leaf(p_tree->stat(nid).base_weight * param.learning_rate); + (*p_tree)[nid].SetLeaf(p_tree->Stat(nid).base_weight * param_.learning_rate); } } } // set statistics on ptree inline void SetStats(int nid, const SKStats &node_sum, RegTree *p_tree) { - p_tree->stat(nid).base_weight = static_cast(node_sum.CalcWeight(param)); - p_tree->stat(nid).sum_hess = static_cast(node_sum.sum_hess); - node_sum.SetLeafVec(param, p_tree->leafvec(nid)); + p_tree->Stat(nid).base_weight = static_cast(node_sum.CalcWeight(param_)); + p_tree->Stat(nid).sum_hess = static_cast(node_sum.sum_hess); + node_sum.SetLeafVec(param_, p_tree->Leafvec(nid)); } inline void EnumerateSplit(const WXQSketch::Summary &pos_grad, const WXQSketch::Summary &neg_grad, @@ -321,7 +315,7 @@ class SketchMaker: public BaseMaker { bst_uint fid, SplitEntry *best) { if (sum_hess.size == 0) return; - double root_gain = node_sum.CalcGain(param); + double root_gain = node_sum.CalcGain(param_); std::vector fsplits; for (size_t i = 0; i < pos_grad.size; ++i) { fsplits.push_back(pos_grad.data[i].value); @@ -350,17 +344,17 @@ class SketchMaker: public BaseMaker { s.sum_hess = 0.5f * (hess.rmin + hess.rmax - hess.wmin); c.SetSubstract(node_sum, s); // forward - if (s.sum_hess >= param.min_child_weight && - c.sum_hess >= param.min_child_weight) { - double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; + if (s.sum_hess >= param_.min_child_weight && + c.sum_hess >= param_.min_child_weight) { + double loss_chg = s.CalcGain(param_) + c.CalcGain(param_) - root_gain; best->Update(static_cast(loss_chg), fid, fsplits[i], false); } // backward c.SetSubstract(feat_sum, s); s.SetSubstract(node_sum, c); - if (s.sum_hess >= param.min_child_weight && - c.sum_hess >= param.min_child_weight) { - double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; + if (s.sum_hess >= param_.min_child_weight && + c.sum_hess >= param_.min_child_weight) { + double loss_chg = s.CalcGain(param_) + c.CalcGain(param_) - root_gain; best->Update(static_cast(loss_chg), fid, fsplits[i], true); } } @@ -368,10 +362,10 @@ class SketchMaker: public BaseMaker { // all including SKStats s = feat_sum, c; c.SetSubstract(node_sum, s); - if (s.sum_hess >= param.min_child_weight && - c.sum_hess >= param.min_child_weight) { + if (s.sum_hess >= param_.min_child_weight && + c.sum_hess >= param_.min_child_weight) { bst_float cpt = fsplits.back(); - double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; + double loss_chg = s.CalcGain(param_) + c.CalcGain(param_) - root_gain; best->Update(static_cast(loss_chg), fid, cpt + std::abs(cpt) + 1.0f, false); } @@ -380,19 +374,19 @@ class SketchMaker: public BaseMaker { // thread temp data // used to hold temporal sketch - std::vector > thread_sketch; + std::vector > thread_sketch_; // used to hold statistics - std::vector > thread_stats; + std::vector > thread_stats_; // node statistics - std::vector node_stats; + std::vector node_stats_; // summary array - std::vector summary_array; + std::vector summary_array_; // reducer for summary - rabit::Reducer stats_reducer; + rabit::Reducer stats_reducer_; // reducer for summary - rabit::SerializeReducer sketch_reducer; + rabit::SerializeReducer sketch_reducer_; // per node, per feature sketch - std::vector > sketchs; + std::vector > sketchs_; }; XGBOOST_REGISTER_TREE_UPDATER(SketchMaker, "grow_skmaker") diff --git a/src/tree/updater_sync.cc b/src/tree/updater_sync.cc index f2a5da48b..f69607fa2 100644 --- a/src/tree/updater_sync.cc +++ b/src/tree/updater_sync.cc @@ -23,7 +23,7 @@ class TreeSyncher: public TreeUpdater { public: void Init(const std::vector >& args) override {} - void Update(HostDeviceVector *gpair, + void Update(HostDeviceVector *gpair, DMatrix* dmat, const std::vector &trees) override { if (rabit::GetWorldSize() == 1) return; @@ -31,14 +31,14 @@ class TreeSyncher: public TreeUpdater { common::MemoryBufferStream fs(&s_model); int rank = rabit::GetRank(); if (rank == 0) { - for (size_t i = 0; i < trees.size(); ++i) { - trees[i]->Save(&fs); + for (auto tree : trees) { + tree->Save(&fs); } } fs.Seek(0); rabit::Broadcast(&s_model, 0); - for (size_t i = 0; i < trees.size(); ++i) { - trees[i]->Load(&fs); + for (auto tree : trees) { + tree->Load(&fs); } } }; diff --git a/tests/cpp/c_api/test_c_api.cc b/tests/cpp/c_api/test_c_api.cc index b1b2d463b..9cb6eba05 100644 --- a/tests/cpp/c_api/test_c_api.cc +++ b/tests/cpp/c_api/test_c_api.cc @@ -20,10 +20,10 @@ TEST(c_api, XGDMatrixCreateFromMat_omp) { std::shared_ptr dmat = *static_cast *>(handle); - xgboost::MetaInfo &info = dmat->info(); - ASSERT_EQ(info.num_col, num_cols); - ASSERT_EQ(info.num_row, row); - ASSERT_EQ(info.num_nonzero, num_cols * row - num_missing); + xgboost::MetaInfo &info = dmat->Info(); + ASSERT_EQ(info.num_col_, num_cols); + ASSERT_EQ(info.num_row_, row); + ASSERT_EQ(info.num_nonzero_, num_cols * row - num_missing); auto iter = dmat->RowIterator(); iter->BeforeFirst(); diff --git a/tests/cpp/common/test_device_helpers.cu b/tests/cpp/common/test_device_helpers.cu index ad00328f1..64146b2b3 100644 --- a/tests/cpp/common/test_device_helpers.cu +++ b/tests/cpp/common/test_device_helpers.cu @@ -38,7 +38,7 @@ void SpeedTest() { xgboost::common::Timer t; dh::TransformLbs( - 0, &temp_memory, h_rows.size(), dh::raw(row_ptr), row_ptr.size() - 1, + 0, &temp_memory, h_rows.size(), dh::Raw(row_ptr), row_ptr.size() - 1, false, [=] __device__(size_t idx, size_t ridx) { d_output_row[idx] = ridx; }); @@ -66,7 +66,7 @@ void TestLbs() { thrust::device_vector output_row(h_rows.size()); auto d_output_row = output_row.data(); - dh::TransformLbs(0, &temp_memory, h_rows.size(), dh::raw(row_ptr), + dh::TransformLbs(0, &temp_memory, h_rows.size(), dh::Raw(row_ptr), row_ptr.size() - 1, false, [=] __device__(size_t idx, size_t ridx) { d_output_row[idx] = ridx; @@ -83,6 +83,6 @@ TEST(cub_lbs, Test) { TestLbs(); } TEST(sumReduce, Test) { thrust::device_vector data(100, 1.0f); dh::CubMemory temp; - auto sum = dh::sumReduction(temp, dh::raw(data), data.size()); + auto sum = dh::SumReduction(temp, dh::Raw(data), data.size()); ASSERT_NEAR(sum, 100.0f, 1e-5); } diff --git a/tests/cpp/data/test_metainfo.cc b/tests/cpp/data/test_metainfo.cc index bd944dab4..f2d5d2ae4 100644 --- a/tests/cpp/data/test_metainfo.cc +++ b/tests/cpp/data/test_metainfo.cc @@ -12,9 +12,9 @@ TEST(MetaInfo, GetSet) { info.SetInfo("root_index", double2, xgboost::kDouble, 2); EXPECT_EQ(info.GetRoot(1), 2.0f); - EXPECT_EQ(info.labels.size(), 0); + EXPECT_EQ(info.labels_.size(), 0); info.SetInfo("label", double2, xgboost::kFloat32, 2); - EXPECT_EQ(info.labels.size(), 2); + EXPECT_EQ(info.labels_.size(), 2); float float2[2] = {1.0f, 2.0f}; EXPECT_EQ(info.GetWeight(1), 1.0f) @@ -23,26 +23,26 @@ TEST(MetaInfo, GetSet) { EXPECT_EQ(info.GetWeight(1), 2.0f); uint32_t uint32_t2[2] = {1U, 2U}; - EXPECT_EQ(info.base_margin.size(), 0); + EXPECT_EQ(info.base_margin_.size(), 0); info.SetInfo("base_margin", uint32_t2, xgboost::kUInt32, 2); - EXPECT_EQ(info.base_margin.size(), 2); + EXPECT_EQ(info.base_margin_.size(), 2); uint64_t uint64_t2[2] = {1U, 2U}; - EXPECT_EQ(info.group_ptr.size(), 0); + EXPECT_EQ(info.group_ptr_.size(), 0); info.SetInfo("group", uint64_t2, xgboost::kUInt64, 2); - ASSERT_EQ(info.group_ptr.size(), 3); - EXPECT_EQ(info.group_ptr[2], 3); + ASSERT_EQ(info.group_ptr_.size(), 3); + EXPECT_EQ(info.group_ptr_[2], 3); info.Clear(); - ASSERT_EQ(info.group_ptr.size(), 0); + ASSERT_EQ(info.group_ptr_.size(), 0); } TEST(MetaInfo, SaveLoadBinary) { xgboost::MetaInfo info; double vals[2] = {1.0, 2.0}; info.SetInfo("label", vals, xgboost::kDouble, 2); - info.num_row = 2; - info.num_col = 1; + info.num_row_ = 2; + info.num_col_ = 1; std::string tmp_file = TempFileName(); dmlc::Stream * fs = dmlc::Stream::Create(tmp_file.c_str(), "w"); @@ -55,9 +55,9 @@ TEST(MetaInfo, SaveLoadBinary) { fs = dmlc::Stream::Create(tmp_file.c_str(), "r"); xgboost::MetaInfo inforead; inforead.LoadBinary(fs); - EXPECT_EQ(inforead.labels, info.labels); - EXPECT_EQ(inforead.num_col, info.num_col); - EXPECT_EQ(inforead.num_row, info.num_row); + EXPECT_EQ(inforead.labels_, info.labels_); + EXPECT_EQ(inforead.num_col_, info.num_col_); + EXPECT_EQ(inforead.num_row_, info.num_row_); std::remove(tmp_file.c_str()); } diff --git a/tests/cpp/data/test_simple_csr_source.cc b/tests/cpp/data/test_simple_csr_source.cc index d912b58c9..6969cdd33 100644 --- a/tests/cpp/data/test_simple_csr_source.cc +++ b/tests/cpp/data/test_simple_csr_source.cc @@ -14,9 +14,9 @@ TEST(SimpleCSRSource, SaveLoadBinary) { xgboost::DMatrix * dmat_read = xgboost::DMatrix::Load(tmp_binfile, true, false); std::remove(tmp_binfile.c_str()); - EXPECT_EQ(dmat->info().num_col, dmat_read->info().num_col); - EXPECT_EQ(dmat->info().num_row, dmat_read->info().num_row); - EXPECT_EQ(dmat->info().num_row, dmat_read->info().num_row); + EXPECT_EQ(dmat->Info().num_col_, dmat_read->Info().num_col_); + EXPECT_EQ(dmat->Info().num_row_, dmat_read->Info().num_row_); + EXPECT_EQ(dmat->Info().num_row_, dmat_read->Info().num_row_); dmlc::DataIter * row_iter = dmat->RowIterator(); dmlc::DataIter * row_iter_read = dmat_read->RowIterator(); diff --git a/tests/cpp/data/test_simple_dmatrix.cc b/tests/cpp/data/test_simple_dmatrix.cc index f13d7b2f9..85b65eca2 100644 --- a/tests/cpp/data/test_simple_dmatrix.cc +++ b/tests/cpp/data/test_simple_dmatrix.cc @@ -10,10 +10,10 @@ TEST(SimpleDMatrix, MetaInfo) { std::remove(tmp_file.c_str()); // Test the metadata that was parsed - EXPECT_EQ(dmat->info().num_row, 2); - EXPECT_EQ(dmat->info().num_col, 5); - EXPECT_EQ(dmat->info().num_nonzero, 6); - EXPECT_EQ(dmat->info().labels.size(), dmat->info().num_row); + EXPECT_EQ(dmat->Info().num_row_, 2); + EXPECT_EQ(dmat->Info().num_col_, 5); + EXPECT_EQ(dmat->Info().num_nonzero_, 6); + EXPECT_EQ(dmat->Info().labels_.size(), dmat->Info().num_row_); } TEST(SimpleDMatrix, RowAccess) { @@ -26,7 +26,7 @@ TEST(SimpleDMatrix, RowAccess) { long row_count = 0; row_iter->BeforeFirst(); while (row_iter->Next()) row_count += row_iter->Value().size; - EXPECT_EQ(row_count, dmat->info().num_row); + EXPECT_EQ(row_count, dmat->Info().num_row_); // Test the data read into the first row row_iter->BeforeFirst(); row_iter->Next(); @@ -43,15 +43,15 @@ TEST(SimpleDMatrix, ColAccessWithoutBatches) { std::remove(tmp_file.c_str()); // Unsorted column access - const std::vector enable(dmat->info().num_col, true); + const std::vector enable(dmat->Info().num_col_, true); EXPECT_EQ(dmat->HaveColAccess(false), false); - dmat->InitColAccess(enable, 1, dmat->info().num_row, false); + dmat->InitColAccess(enable, 1, dmat->Info().num_row_, false); dmat->InitColAccess(enable, 0, 0, false); // Calling it again should not change it ASSERT_EQ(dmat->HaveColAccess(false), true); // Sorted column access EXPECT_EQ(dmat->HaveColAccess(true), false); - dmat->InitColAccess(enable, 1, dmat->info().num_row, true); + dmat->InitColAccess(enable, 1, dmat->Info().num_row_, true); dmat->InitColAccess(enable, 0, 0, true); // Calling it again should not change it ASSERT_EQ(dmat->HaveColAccess(true), true); @@ -67,7 +67,7 @@ TEST(SimpleDMatrix, ColAccessWithoutBatches) { col_iter->BeforeFirst(); while (col_iter->Next()) { num_col_batch += 1; - EXPECT_EQ(col_iter->Value().size, dmat->info().num_col) + EXPECT_EQ(col_iter->Value().size, dmat->Info().num_col_) << "Expected batch size = number of cells as #batches is 1."; for (int i = 0; i < static_cast(col_iter->Value().size); ++i) { EXPECT_EQ(col_iter->Value()[i].length, dmat->GetColSize(i)) @@ -94,7 +94,7 @@ TEST(SimpleDMatrix, ColAccessWithBatches) { std::remove(tmp_file.c_str()); // Unsorted column access - const std::vector enable(dmat->info().num_col, true); + const std::vector enable(dmat->Info().num_col_, true); EXPECT_EQ(dmat->HaveColAccess(false), false); dmat->InitColAccess(enable, 1, 1, false); dmat->InitColAccess(enable, 0, 0, false); // Calling it again should not change it @@ -118,20 +118,20 @@ TEST(SimpleDMatrix, ColAccessWithBatches) { col_iter->BeforeFirst(); while (col_iter->Next()) { num_col_batch += 1; - EXPECT_EQ(col_iter->Value().size, dmat->info().num_col) + EXPECT_EQ(col_iter->Value().size, dmat->Info().num_col_) << "Expected batch size = num_cols as max_row_perbatch is 1."; for (int i = 0; i < static_cast(col_iter->Value().size); ++i) { EXPECT_LE(col_iter->Value()[i].length, 1) << "Expected length of each colbatch <=1 as max_row_perbatch is 1."; } } - EXPECT_EQ(num_col_batch, dmat->info().num_row) + EXPECT_EQ(num_col_batch, dmat->Info().num_row_) << "Expected num batches = num_rows as max_row_perbatch is 1"; col_iter = nullptr; // The iterator feats should ignore any numbers larger than the num_col std::vector sub_feats = { - 4, 3, static_cast(dmat->info().num_col + 1)}; + 4, 3, static_cast(dmat->Info().num_col_ + 1)}; dmlc::DataIter * sub_col_iter = dmat->ColIterator(sub_feats); // Loop over the batches and assert the data is as expected sub_col_iter->BeforeFirst(); diff --git a/tests/cpp/data/test_sparse_page_dmatrix.cc b/tests/cpp/data/test_sparse_page_dmatrix.cc index 6d826f0e8..df4c62a05 100644 --- a/tests/cpp/data/test_sparse_page_dmatrix.cc +++ b/tests/cpp/data/test_sparse_page_dmatrix.cc @@ -12,10 +12,10 @@ TEST(SparsePageDMatrix, MetaInfo) { EXPECT_TRUE(FileExists(tmp_file + ".cache")); // Test the metadata that was parsed - EXPECT_EQ(dmat->info().num_row, 2); - EXPECT_EQ(dmat->info().num_col, 5); - EXPECT_EQ(dmat->info().num_nonzero, 6); - EXPECT_EQ(dmat->info().labels.size(), dmat->info().num_row); + EXPECT_EQ(dmat->Info().num_row_, 2); + EXPECT_EQ(dmat->Info().num_col_, 5); + EXPECT_EQ(dmat->Info().num_nonzero_, 6); + EXPECT_EQ(dmat->Info().labels_.size(), dmat->Info().num_row_); // Clean up of external memory files std::remove((tmp_file + ".cache").c_str()); @@ -34,7 +34,7 @@ TEST(SparsePageDMatrix, RowAccess) { long row_count = 0; row_iter->BeforeFirst(); while (row_iter->Next()) row_count += row_iter->Value().size; - EXPECT_EQ(row_count, dmat->info().num_row); + EXPECT_EQ(row_count, dmat->Info().num_row_); // Test the data read into the first row row_iter->BeforeFirst(); row_iter->Next(); @@ -57,7 +57,7 @@ TEST(SparsePageDMatrix, ColAcess) { EXPECT_FALSE(FileExists(tmp_file + ".cache.col.page")); EXPECT_EQ(dmat->HaveColAccess(true), false); - const std::vector enable(dmat->info().num_col, true); + const std::vector enable(dmat->Info().num_col_, true); dmat->InitColAccess(enable, 1, 1, true); // Max 1 row per patch ASSERT_EQ(dmat->HaveColAccess(true), true); EXPECT_TRUE(FileExists(tmp_file + ".cache.col.page")); @@ -73,10 +73,10 @@ TEST(SparsePageDMatrix, ColAcess) { col_iter->BeforeFirst(); while (col_iter->Next()) { num_col_batch += 1; - EXPECT_EQ(col_iter->Value().size, dmat->info().num_col) + EXPECT_EQ(col_iter->Value().size, dmat->Info().num_col_) << "Expected batch size to be same as num_cols as max_row_perbatch is 1."; } - EXPECT_EQ(num_col_batch, dmat->info().num_row) + EXPECT_EQ(num_col_batch, dmat->Info().num_row_) << "Expected num batches to be same as num_rows as max_row_perbatch is 1"; col_iter = nullptr; diff --git a/tests/cpp/helpers.cc b/tests/cpp/helpers.cc index 3318be60a..4b172c12b 100644 --- a/tests/cpp/helpers.cc +++ b/tests/cpp/helpers.cc @@ -34,17 +34,17 @@ void CheckObjFunction(xgboost::ObjFunction * obj, std::vector out_grad, std::vector out_hess) { xgboost::MetaInfo info; - info.num_row = labels.size(); - info.labels = labels; - info.weights = weights; + info.num_row_ = labels.size(); + info.labels_ = labels; + info.weights_ = weights; xgboost::HostDeviceVector in_preds(preds); - xgboost::HostDeviceVector out_gpair; + xgboost::HostDeviceVector out_gpair; obj->GetGradient(&in_preds, info, 1, &out_gpair); - std::vector& gpair = out_gpair.data_h(); + std::vector& gpair = out_gpair.HostVector(); - ASSERT_EQ(gpair.size(), in_preds.size()); + ASSERT_EQ(gpair.size(), in_preds.Size()); for (int i = 0; i < static_cast(gpair.size()); ++i) { EXPECT_NEAR(gpair[i].GetGrad(), out_grad[i], 0.01) << "Unexpected grad for pred=" << preds[i] << " label=" << labels[i] @@ -60,9 +60,9 @@ xgboost::bst_float GetMetricEval(xgboost::Metric * metric, std::vector labels, std::vector weights) { xgboost::MetaInfo info; - info.num_row = labels.size(); - info.labels = labels; - info.weights = weights; + info.num_row_ = labels.size(); + info.labels_ = labels; + info.weights_ = weights; return metric->Eval(preds, info, false); } diff --git a/tests/cpp/linear/test_linear.cc b/tests/cpp/linear/test_linear.cc index 92ad8095c..7f58c8be1 100644 --- a/tests/cpp/linear/test_linear.cc +++ b/tests/cpp/linear/test_linear.cc @@ -8,15 +8,15 @@ typedef std::pair arg; TEST(Linear, shotgun) { typedef std::pair arg; auto mat = CreateDMatrix(10, 10, 0); - std::vector enabled(mat->info().num_col, true); + std::vector enabled(mat->Info().num_col_, true); mat->InitColAccess(enabled, 1.0f, 1 << 16, false); auto updater = std::unique_ptr( xgboost::LinearUpdater::Create("shotgun")); updater->Init({{"eta", "1."}}); - std::vector gpair(mat->info().num_row, - xgboost::bst_gpair(-5, 1.0)); + std::vector gpair(mat->Info().num_row_, + xgboost::GradientPair(-5, 1.0)); xgboost::gbm::GBLinearModel model; - model.param.num_feature = mat->info().num_col; + model.param.num_feature = mat->Info().num_col_; model.param.num_output_group = 1; model.LazyInitModel(); updater->Update(&gpair, mat.get(), &model, gpair.size()); @@ -27,15 +27,15 @@ TEST(Linear, shotgun) { TEST(Linear, coordinate) { typedef std::pair arg; auto mat = CreateDMatrix(10, 10, 0); - std::vector enabled(mat->info().num_col, true); + std::vector enabled(mat->Info().num_col_, true); mat->InitColAccess(enabled, 1.0f, 1 << 16, false); auto updater = std::unique_ptr( xgboost::LinearUpdater::Create("coord_descent")); updater->Init({}); - std::vector gpair(mat->info().num_row, - xgboost::bst_gpair(-5, 1.0)); + std::vector gpair(mat->Info().num_row_, + xgboost::GradientPair(-5, 1.0)); xgboost::gbm::GBLinearModel model; - model.param.num_feature = mat->info().num_col; + model.param.num_feature = mat->Info().num_col_; model.param.num_output_group = 1; model.LazyInitModel(); updater->Update(&gpair, mat.get(), &model, gpair.size()); diff --git a/tests/cpp/objective/test_regression_obj.cc b/tests/cpp/objective/test_regression_obj.cc index 8a1d3f6ec..41bcaadee 100644 --- a/tests/cpp/objective/test_regression_obj.cc +++ b/tests/cpp/objective/test_regression_obj.cc @@ -49,8 +49,8 @@ TEST(Objective, LogisticRegressionBasic) { xgboost::HostDeviceVector io_preds = {0, 0.1f, 0.5f, 0.9f, 1}; std::vector out_preds = {0.5f, 0.524f, 0.622f, 0.710f, 0.731f}; obj->PredTransform(&io_preds); - auto& preds = io_preds.data_h(); - for (int i = 0; i < static_cast(io_preds.size()); ++i) { + auto& preds = io_preds.HostVector(); + for (int i = 0; i < static_cast(io_preds.Size()); ++i) { EXPECT_NEAR(preds[i], out_preds[i], 0.01f); } } @@ -98,8 +98,8 @@ TEST(Objective, PoissonRegressionBasic) { xgboost::HostDeviceVector io_preds = {0, 0.1f, 0.5f, 0.9f, 1}; std::vector out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f}; obj->PredTransform(&io_preds); - auto& preds = io_preds.data_h(); - for (int i = 0; i < static_cast(io_preds.size()); ++i) { + auto& preds = io_preds.HostVector(); + for (int i = 0; i < static_cast(io_preds.Size()); ++i) { EXPECT_NEAR(preds[i], out_preds[i], 0.01f); } } @@ -134,8 +134,8 @@ TEST(Objective, GammaRegressionBasic) { xgboost::HostDeviceVector io_preds = {0, 0.1f, 0.5f, 0.9f, 1}; std::vector out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f}; obj->PredTransform(&io_preds); - auto& preds = io_preds.data_h(); - for (int i = 0; i < static_cast(io_preds.size()); ++i) { + auto& preds = io_preds.HostVector(); + for (int i = 0; i < static_cast(io_preds.Size()); ++i) { EXPECT_NEAR(preds[i], out_preds[i], 0.01f); } } @@ -171,8 +171,8 @@ TEST(Objective, TweedieRegressionBasic) { xgboost::HostDeviceVector io_preds = {0, 0.1f, 0.5f, 0.9f, 1}; std::vector out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f}; obj->PredTransform(&io_preds); - auto& preds = io_preds.data_h(); - for (int i = 0; i < static_cast(io_preds.size()); ++i) { + auto& preds = io_preds.HostVector(); + for (int i = 0; i < static_cast(io_preds.Size()); ++i) { EXPECT_NEAR(preds[i], out_preds[i], 0.01f); } } diff --git a/tests/cpp/objective/test_regression_obj_gpu.cu b/tests/cpp/objective/test_regression_obj_gpu.cu index 0ea8a8e1e..ac9a0bd2d 100644 --- a/tests/cpp/objective/test_regression_obj_gpu.cu +++ b/tests/cpp/objective/test_regression_obj_gpu.cu @@ -51,8 +51,8 @@ TEST(Objective, GPULogisticRegressionBasic) { xgboost::HostDeviceVector io_preds = {0, 0.1f, 0.5f, 0.9f, 1}; std::vector out_preds = {0.5f, 0.524f, 0.622f, 0.710f, 0.731f}; obj->PredTransform(&io_preds); - auto& preds = io_preds.data_h(); - for (int i = 0; i < static_cast(io_preds.size()); ++i) { + auto& preds = io_preds.HostVector(); + for (int i = 0; i < static_cast(io_preds.Size()); ++i) { EXPECT_NEAR(preds[i], out_preds[i], 0.01f); } } diff --git a/tests/cpp/predictor/test_cpu_predictor.cc b/tests/cpp/predictor/test_cpu_predictor.cc index 0a9c4c8cf..3868831b6 100644 --- a/tests/cpp/predictor/test_cpu_predictor.cc +++ b/tests/cpp/predictor/test_cpu_predictor.cc @@ -11,8 +11,8 @@ TEST(cpu_predictor, Test) { std::vector> trees; trees.push_back(std::unique_ptr(new RegTree)); trees.back()->InitModel(); - (*trees.back())[0].set_leaf(1.5f); - (*trees.back()).stat(0).sum_hess = 1.0f; + (*trees.back())[0].SetLeaf(1.5f); + (*trees.back()).Stat(0).sum_hess = 1.0f; gbm::GBTreeModel model(0.5); model.CommitModel(std::move(trees), 0); model.param.num_output_group = 1; @@ -26,8 +26,8 @@ TEST(cpu_predictor, Test) { // Test predict batch HostDeviceVector out_predictions; cpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0); - std::vector& out_predictions_h = out_predictions.data_h(); - for (int i = 0; i < out_predictions.size(); i++) { + std::vector& out_predictions_h = out_predictions.HostVector(); + for (int i = 0; i < out_predictions.Size(); i++) { ASSERT_EQ(out_predictions_h[i], 1.5); } diff --git a/tests/cpp/predictor/test_gpu_predictor.cu b/tests/cpp/predictor/test_gpu_predictor.cu index 45fad97d6..fe553c8f8 100644 --- a/tests/cpp/predictor/test_gpu_predictor.cu +++ b/tests/cpp/predictor/test_gpu_predictor.cu @@ -21,8 +21,8 @@ TEST(gpu_predictor, Test) { std::vector> trees; trees.push_back(std::unique_ptr(new RegTree())); trees.back()->InitModel(); - (*trees.back())[0].set_leaf(1.5f); - (*trees.back()).stat(0).sum_hess = 1.0f; + (*trees.back())[0].SetLeaf(1.5f); + (*trees.back()).Stat(0).sum_hess = 1.0f; gbm::GBTreeModel model(0.5); model.CommitModel(std::move(trees), 0); model.param.num_output_group = 1; @@ -37,10 +37,10 @@ TEST(gpu_predictor, Test) { HostDeviceVector cpu_out_predictions; gpu_predictor->PredictBatch(dmat.get(), &gpu_out_predictions, model, 0); cpu_predictor->PredictBatch(dmat.get(), &cpu_out_predictions, model, 0); - std::vector& gpu_out_predictions_h = gpu_out_predictions.data_h(); - std::vector& cpu_out_predictions_h = cpu_out_predictions.data_h(); + std::vector& gpu_out_predictions_h = gpu_out_predictions.HostVector(); + std::vector& cpu_out_predictions_h = cpu_out_predictions.HostVector(); float abs_tolerance = 0.001; - for (int i = 0; i < gpu_out_predictions.size(); i++) { + for (int i = 0; i < gpu_out_predictions.Size(); i++) { ASSERT_LT(std::abs(gpu_out_predictions_h[i] - cpu_out_predictions_h[i]), abs_tolerance); } diff --git a/tests/cpp/tree/test_gpu_hist.cu b/tests/cpp/tree/test_gpu_hist.cu index c07e33732..cb5fcae56 100644 --- a/tests/cpp/tree/test_gpu_hist.cu +++ b/tests/cpp/tree/test_gpu_hist.cu @@ -29,7 +29,7 @@ TEST(gpu_hist_experimental, TestSparseShard) { ASSERT_LT(shard.row_stride, columns); - auto host_gidx_buffer = shard.gidx_buffer.as_vector(); + auto host_gidx_buffer = shard.gidx_buffer.AsVector(); common::CompressedIterator gidx(host_gidx_buffer.data(), hmat.row_ptr.back() + 1); @@ -64,7 +64,7 @@ TEST(gpu_hist_experimental, TestDenseShard) { ASSERT_EQ(shard.row_stride, columns); - auto host_gidx_buffer = shard.gidx_buffer.as_vector(); + auto host_gidx_buffer = shard.gidx_buffer.AsVector(); common::CompressedIterator gidx(host_gidx_buffer.data(), hmat.row_ptr.back() + 1); diff --git a/tests/cpp/tree/test_param.cc b/tests/cpp/tree/test_param.cc index f5d1a3aeb..1b9164148 100644 --- a/tests/cpp/tree/test_param.cc +++ b/tests/cpp/tree/test_param.cc @@ -89,8 +89,8 @@ TEST(Param, SplitEntry) { xgboost::tree::SplitEntry se3; se3.Update(2, 101, 0, false); xgboost::tree::SplitEntry::Reduce(se2, se3); - EXPECT_EQ(se2.split_index(), 101); - EXPECT_FALSE(se2.default_left()); + EXPECT_EQ(se2.SplitIndex(), 101); + EXPECT_FALSE(se2.DefaultLeft()); EXPECT_TRUE(se1.NeedReplace(3, 1)); } diff --git a/tests/travis/run_test.sh b/tests/travis/run_test.sh index c3d5c2391..be35eb8cb 100755 --- a/tests/travis/run_test.sh +++ b/tests/travis/run_test.sh @@ -10,6 +10,21 @@ if [ ${TASK} == "lint" ]; then echo "----------------------------" (cat logclean.txt|grep warning) && exit -1 (cat logclean.txt|grep error) && exit -1 + + # Rename cuda files for static analysis + for file in $(find src -name '*.cu'); do + cp "$file" "${file/.cu/_tmp.cc}" + done + + header_filter='(xgboost\/src|xgboost\/include)' + for filename in $(find src -name '*.cc'); do + clang-tidy $filename -header-filter=$header_filter -- -Iinclude -Idmlc-core/include -Irabit/include -std=c++11 >> logtidy.txt + done + echo "---------clang-tidy log----------" + cat logtidy.txt + echo "----------------------------" + # Fail only on warnings related to XGBoost source files + (cat logtidy.txt|grep -E 'dmlc/xgboost.*warning'|grep -v dmlc-core) && exit -1 exit 0 fi