Clang-tidy static analysis (#3222)

* Clang-tidy static analysis

* Modernise checks

* Google coding standard checks

* Identifier renaming according to Google style
This commit is contained in:
Rory Mitchell 2018-04-19 18:57:13 +12:00 committed by GitHub
parent 3242b0a378
commit ccf80703ef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
97 changed files with 3407 additions and 3354 deletions

22
.clang-tidy Normal file
View File

@ -0,0 +1,22 @@
Checks: 'modernize-*,-modernize-make-*,-modernize-raw-string-literal,google-*,-google-default-arguments,-clang-diagnostic-#pragma-messages,readability-identifier-naming'
CheckOptions:
- { key: readability-identifier-naming.ClassCase, value: CamelCase }
- { key: readability-identifier-naming.StructCase, value: CamelCase }
- { key: readability-identifier-naming.TypeAliasCase, value: CamelCase }
- { key: readability-identifier-naming.TypedefCase, value: CamelCase }
- { key: readability-identifier-naming.TypeTemplateParameterCase, value: CamelCase }
- { key: readability-identifier-naming.LocalVariableCase, value: lower_case }
- { key: readability-identifier-naming.MemberCase, value: lower_case }
- { key: readability-identifier-naming.PrivateMemberSuffix, value: '_' }
- { key: readability-identifier-naming.ProtectedMemberSuffix, value: '_' }
- { key: readability-identifier-naming.EnumCase, value: CamelCase }
- { key: readability-identifier-naming.EnumConstant, value: CamelCase }
- { key: readability-identifier-naming.EnumConstantPrefix, value: k }
- { key: readability-identifier-naming.GlobalConstantCase, value: CamelCase }
- { key: readability-identifier-naming.GlobalConstantPrefix, value: k }
- { key: readability-identifier-naming.StaticConstantCase, value: CamelCase }
- { key: readability-identifier-naming.StaticConstantPrefix, value: k }
- { key: readability-identifier-naming.ConstexprVariableCase, value: CamelCase }
- { key: readability-identifier-naming.ConstexprVariablePrefix, value: k }
- { key: readability-identifier-naming.FunctionCase, value: CamelCase }
- { key: readability-identifier-naming.NamespaceCase, value: lower_case }

View File

@ -44,10 +44,12 @@ matrix:
addons: addons:
apt: apt:
sources: sources:
- llvm-toolchain-trusty-5.0
- ubuntu-toolchain-r-test - ubuntu-toolchain-r-test
- george-edison55-precise-backports - george-edison55-precise-backports
packages: packages:
- cmake - clang
- clang-tidy-5.0
- cmake-data - cmake-data
- doxygen - doxygen
- wget - wget

View File

@ -81,20 +81,19 @@ namespace xgboost {
* \brief unsigned integer type used in boost, * \brief unsigned integer type used in boost,
* used for feature index and row index. * used for feature index and row index.
*/ */
typedef uint32_t bst_uint; using bst_uint = uint32_t; // NOLINT
typedef int32_t bst_int; using bst_int = int32_t; // NOLINT
/*! \brief long integers */ /*! \brief long integers */
typedef uint64_t bst_ulong; // NOLINT(*) typedef uint64_t bst_ulong; // NOLINT(*)
/*! \brief float type, used for storing statistics */ /*! \brief float type, used for storing statistics */
typedef float bst_float; using bst_float = float; // NOLINT
namespace detail { namespace detail {
/*! \brief Implementation of gradient statistics pair. Template specialisation /*! \brief Implementation of gradient statistics pair. Template specialisation
* may be used to overload different gradients types e.g. low precision, high * may be used to overload different gradients types e.g. low precision, high
* precision, integer, floating point. */ * precision, integer, floating point. */
template <typename T> template <typename T>
class bst_gpair_internal { class GradientPairInternal {
/*! \brief gradient statistics */ /*! \brief gradient statistics */
T grad_; T grad_;
/*! \brief second order gradient statistics */ /*! \brief second order gradient statistics */
@ -104,23 +103,23 @@ class bst_gpair_internal {
XGBOOST_DEVICE void SetHess(float h) { hess_ = h; } XGBOOST_DEVICE void SetHess(float h) { hess_ = h; }
public: public:
typedef T value_t; using ValueT = T;
XGBOOST_DEVICE bst_gpair_internal() : grad_(0), hess_(0) {} XGBOOST_DEVICE GradientPairInternal() : grad_(0), hess_(0) {}
XGBOOST_DEVICE bst_gpair_internal(float grad, float hess) { XGBOOST_DEVICE GradientPairInternal(float grad, float hess) {
SetGrad(grad); SetGrad(grad);
SetHess(hess); SetHess(hess);
} }
// Copy constructor if of same value type // Copy constructor if of same value type
XGBOOST_DEVICE bst_gpair_internal(const bst_gpair_internal<T> &g) XGBOOST_DEVICE GradientPairInternal(const GradientPairInternal<T> &g)
: grad_(g.grad_), hess_(g.hess_) {} : grad_(g.grad_), hess_(g.hess_) {} // NOLINT
// Copy constructor if different value type - use getters and setters to // Copy constructor if different value type - use getters and setters to
// perform conversion // perform conversion
template <typename T2> template <typename T2>
XGBOOST_DEVICE bst_gpair_internal(const bst_gpair_internal<T2> &g) { XGBOOST_DEVICE explicit GradientPairInternal(const GradientPairInternal<T2> &g) {
SetGrad(g.GetGrad()); SetGrad(g.GetGrad());
SetHess(g.GetHess()); SetHess(g.GetHess());
} }
@ -128,85 +127,85 @@ class bst_gpair_internal {
XGBOOST_DEVICE float GetGrad() const { return grad_; } XGBOOST_DEVICE float GetGrad() const { return grad_; }
XGBOOST_DEVICE float GetHess() const { return hess_; } XGBOOST_DEVICE float GetHess() const { return hess_; }
XGBOOST_DEVICE bst_gpair_internal<T> &operator+=( XGBOOST_DEVICE GradientPairInternal<T> &operator+=(
const bst_gpair_internal<T> &rhs) { const GradientPairInternal<T> &rhs) {
grad_ += rhs.grad_; grad_ += rhs.grad_;
hess_ += rhs.hess_; hess_ += rhs.hess_;
return *this; return *this;
} }
XGBOOST_DEVICE bst_gpair_internal<T> operator+( XGBOOST_DEVICE GradientPairInternal<T> operator+(
const bst_gpair_internal<T> &rhs) const { const GradientPairInternal<T> &rhs) const {
bst_gpair_internal<T> g; GradientPairInternal<T> g;
g.grad_ = grad_ + rhs.grad_; g.grad_ = grad_ + rhs.grad_;
g.hess_ = hess_ + rhs.hess_; g.hess_ = hess_ + rhs.hess_;
return g; return g;
} }
XGBOOST_DEVICE bst_gpair_internal<T> &operator-=( XGBOOST_DEVICE GradientPairInternal<T> &operator-=(
const bst_gpair_internal<T> &rhs) { const GradientPairInternal<T> &rhs) {
grad_ -= rhs.grad_; grad_ -= rhs.grad_;
hess_ -= rhs.hess_; hess_ -= rhs.hess_;
return *this; return *this;
} }
XGBOOST_DEVICE bst_gpair_internal<T> operator-( XGBOOST_DEVICE GradientPairInternal<T> operator-(
const bst_gpair_internal<T> &rhs) const { const GradientPairInternal<T> &rhs) const {
bst_gpair_internal<T> g; GradientPairInternal<T> g;
g.grad_ = grad_ - rhs.grad_; g.grad_ = grad_ - rhs.grad_;
g.hess_ = hess_ - rhs.hess_; g.hess_ = hess_ - rhs.hess_;
return g; return g;
} }
XGBOOST_DEVICE bst_gpair_internal(int value) { XGBOOST_DEVICE explicit GradientPairInternal(int value) {
*this = bst_gpair_internal<T>(static_cast<float>(value), *this = GradientPairInternal<T>(static_cast<float>(value),
static_cast<float>(value)); static_cast<float>(value));
} }
friend std::ostream &operator<<(std::ostream &os, friend std::ostream &operator<<(std::ostream &os,
const bst_gpair_internal<T> &g) { const GradientPairInternal<T> &g) {
os << g.GetGrad() << "/" << g.GetHess(); os << g.GetGrad() << "/" << g.GetHess();
return os; return os;
} }
}; };
template<> template<>
inline XGBOOST_DEVICE float bst_gpair_internal<int64_t>::GetGrad() const { inline XGBOOST_DEVICE float GradientPairInternal<int64_t>::GetGrad() const {
return grad_ * 1e-4f; return grad_ * 1e-4f;
} }
template<> template<>
inline XGBOOST_DEVICE float bst_gpair_internal<int64_t>::GetHess() const { inline XGBOOST_DEVICE float GradientPairInternal<int64_t>::GetHess() const {
return hess_ * 1e-4f; return hess_ * 1e-4f;
} }
template<> template<>
inline XGBOOST_DEVICE void bst_gpair_internal<int64_t>::SetGrad(float g) { inline XGBOOST_DEVICE void GradientPairInternal<int64_t>::SetGrad(float g) {
grad_ = static_cast<int64_t>(std::round(g * 1e4)); grad_ = static_cast<int64_t>(std::round(g * 1e4));
} }
template<> template<>
inline XGBOOST_DEVICE void bst_gpair_internal<int64_t>::SetHess(float h) { inline XGBOOST_DEVICE void GradientPairInternal<int64_t>::SetHess(float h) {
hess_ = static_cast<int64_t>(std::round(h * 1e4)); hess_ = static_cast<int64_t>(std::round(h * 1e4));
} }
} // namespace detail } // namespace detail
/*! \brief gradient statistics pair usually needed in gradient boosting */ /*! \brief gradient statistics pair usually needed in gradient boosting */
typedef detail::bst_gpair_internal<float> bst_gpair; using GradientPair = detail::GradientPairInternal<float>;
/*! \brief High precision gradient statistics pair */ /*! \brief High precision gradient statistics pair */
typedef detail::bst_gpair_internal<double> bst_gpair_precise; using GradientPairPrecise = detail::GradientPairInternal<double>;
/*! \brief High precision gradient statistics pair with integer backed /*! \brief High precision gradient statistics pair with integer backed
* storage. Operators are associative where floating point versions are not * storage. Operators are associative where floating point versions are not
* associative. */ * associative. */
typedef detail::bst_gpair_internal<int64_t> bst_gpair_integer; using GradientPairInteger = detail::GradientPairInternal<int64_t>;
/*! \brief small eps gap for minimum split decision. */ /*! \brief small eps gap for minimum split decision. */
const bst_float rt_eps = 1e-6f; const bst_float kRtEps = 1e-6f;
/*! \brief define unsigned long for openmp loop */ /*! \brief define unsigned long for openmp loop */
typedef dmlc::omp_ulong omp_ulong; using omp_ulong = dmlc::omp_ulong; // NOLINT
/*! \brief define unsigned int for openmp loop */ /*! \brief define unsigned int for openmp loop */
typedef dmlc::omp_uint bst_omp_uint; using bst_omp_uint = dmlc::omp_uint; // NOLINT
/*! /*!
* \brief define compatible keywords in g++ * \brief define compatible keywords in g++

View File

@ -30,16 +30,16 @@ typedef uint64_t bst_ulong; // NOLINT(*)
/*! \brief handle to DMatrix */ /*! \brief handle to DMatrix */
typedef void *DMatrixHandle; typedef void *DMatrixHandle; // NOLINT(*)
/*! \brief handle to Booster */ /*! \brief handle to Booster */
typedef void *BoosterHandle; typedef void *BoosterHandle; // NOLINT(*)
/*! \brief handle to a data iterator */ /*! \brief handle to a data iterator */
typedef void *DataIterHandle; typedef void *DataIterHandle; // NOLINT(*)
/*! \brief handle to a internal data holder. */ /*! \brief handle to a internal data holder. */
typedef void *DataHolderHandle; typedef void *DataHolderHandle; // NOLINT(*)
/*! \brief Mini batch used in XGBoost Data Iteration */ /*! \brief Mini batch used in XGBoost Data Iteration */
typedef struct { typedef struct { // NOLINT(*)
/*! \brief number of rows in the minibatch */ /*! \brief number of rows in the minibatch */
size_t size; size_t size;
/*! \brief row pointer to the rows in the data */ /*! \brief row pointer to the rows in the data */
@ -66,7 +66,7 @@ typedef struct {
* \param handle The handle to the callback. * \param handle The handle to the callback.
* \param batch The data content to be set. * \param batch The data content to be set.
*/ */
XGB_EXTERN_C typedef int XGBCallbackSetData( XGB_EXTERN_C typedef int XGBCallbackSetData( // NOLINT(*)
DataHolderHandle handle, XGBoostBatchCSR batch); DataHolderHandle handle, XGBoostBatchCSR batch);
/*! /*!
@ -80,9 +80,8 @@ XGB_EXTERN_C typedef int XGBCallbackSetData(
* \param set_function_handle The handle to be passed to set function. * \param set_function_handle The handle to be passed to set function.
* \return 0 if we are reaching the end and batch is not returned. * \return 0 if we are reaching the end and batch is not returned.
*/ */
XGB_EXTERN_C typedef int XGBCallbackDataIterNext( XGB_EXTERN_C typedef int XGBCallbackDataIterNext( // NOLINT(*)
DataIterHandle data_handle, DataIterHandle data_handle, XGBCallbackSetData *set_function,
XGBCallbackSetData* set_function,
DataHolderHandle set_function_handle); DataHolderHandle set_function_handle);
/*! /*!
@ -216,11 +215,9 @@ XGB_DLL int XGDMatrixCreateFromMat(const float *data,
* \param nthread number of threads (up to maximum cores available, if <=0 use all cores) * \param nthread number of threads (up to maximum cores available, if <=0 use all cores)
* \return 0 when success, -1 when failure happens * \return 0 when success, -1 when failure happens
*/ */
XGB_DLL int XGDMatrixCreateFromMat_omp(const float *data, XGB_DLL int XGDMatrixCreateFromMat_omp(const float *data, // NOLINT
bst_ulong nrow, bst_ulong nrow, bst_ulong ncol,
bst_ulong ncol, float missing, DMatrixHandle *out,
float missing,
DMatrixHandle *out,
int nthread); int nthread);
/*! /*!
* \brief create a new dmatrix from sliced content of existing matrix * \brief create a new dmatrix from sliced content of existing matrix

View File

@ -30,44 +30,45 @@ enum DataType {
/*! /*!
* \brief Meta information about dataset, always sit in memory. * \brief Meta information about dataset, always sit in memory.
*/ */
struct MetaInfo { class MetaInfo {
public:
/*! \brief number of rows in the data */ /*! \brief number of rows in the data */
uint64_t num_row; uint64_t num_row_{0};
/*! \brief number of columns in the data */ /*! \brief number of columns in the data */
uint64_t num_col; uint64_t num_col_{0};
/*! \brief number of nonzero entries in the data */ /*! \brief number of nonzero entries in the data */
uint64_t num_nonzero; uint64_t num_nonzero_{0};
/*! \brief label of each instance */ /*! \brief label of each instance */
std::vector<bst_float> labels; std::vector<bst_float> labels_;
/*! /*!
* \brief specified root index of each instance, * \brief specified root index of each instance,
* can be used for multi task setting * can be used for multi task setting
*/ */
std::vector<bst_uint> root_index; std::vector<bst_uint> root_index_;
/*! /*!
* \brief the index of begin and end of a group * \brief the index of begin and end of a group
* needed when the learning task is ranking. * needed when the learning task is ranking.
*/ */
std::vector<bst_uint> group_ptr; std::vector<bst_uint> group_ptr_;
/*! \brief weights of each instance, optional */ /*! \brief weights of each instance, optional */
std::vector<bst_float> weights; std::vector<bst_float> weights_;
/*! /*!
* \brief initialized margins, * \brief initialized margins,
* if specified, xgboost will start from this init margin * if specified, xgboost will start from this init margin
* can be used to specify initial prediction to boost from. * can be used to specify initial prediction to boost from.
*/ */
std::vector<bst_float> base_margin; std::vector<bst_float> base_margin_;
/*! \brief version flag, used to check version of this info */ /*! \brief version flag, used to check version of this info */
static const int kVersion = 1; static const int kVersion = 1;
/*! \brief default constructor */ /*! \brief default constructor */
MetaInfo() : num_row(0), num_col(0), num_nonzero(0) {} MetaInfo() = default;
/*! /*!
* \brief Get weight of each instances. * \brief Get weight of each instances.
* \param i Instance index. * \param i Instance index.
* \return The weight. * \return The weight.
*/ */
inline bst_float GetWeight(size_t i) const { inline bst_float GetWeight(size_t i) const {
return weights.size() != 0 ? weights[i] : 1.0f; return weights_.size() != 0 ? weights_[i] : 1.0f;
} }
/*! /*!
* \brief Get the root index of i-th instance. * \brief Get the root index of i-th instance.
@ -75,20 +76,20 @@ struct MetaInfo {
* \return The pre-defined root index of i-th instance. * \return The pre-defined root index of i-th instance.
*/ */
inline unsigned GetRoot(size_t i) const { inline unsigned GetRoot(size_t i) const {
return root_index.size() != 0 ? root_index[i] : 0U; return root_index_.size() != 0 ? root_index_[i] : 0U;
} }
/*! \brief get sorted indexes (argsort) of labels by absolute value (used by cox loss) */ /*! \brief get sorted indexes (argsort) of labels by absolute value (used by cox loss) */
inline const std::vector<size_t>& LabelAbsSort() const { inline const std::vector<size_t>& LabelAbsSort() const {
if (label_order_cache.size() == labels.size()) { if (label_order_cache_.size() == labels_.size()) {
return label_order_cache; return label_order_cache_;
} }
label_order_cache.resize(labels.size()); label_order_cache_.resize(labels_.size());
std::iota(label_order_cache.begin(), label_order_cache.end(), 0); std::iota(label_order_cache_.begin(), label_order_cache_.end(), 0);
const auto l = labels; const auto l = labels_;
XGBOOST_PARALLEL_SORT(label_order_cache.begin(), label_order_cache.end(), XGBOOST_PARALLEL_SORT(label_order_cache_.begin(), label_order_cache_.end(),
[&l](size_t i1, size_t i2) {return std::abs(l[i1]) < std::abs(l[i2]);}); [&l](size_t i1, size_t i2) {return std::abs(l[i1]) < std::abs(l[i2]);});
return label_order_cache; return label_order_cache_;
} }
/*! \brief clear all the information */ /*! \brief clear all the information */
void Clear(); void Clear();
@ -113,7 +114,7 @@ struct MetaInfo {
private: private:
/*! \brief argsort of labels */ /*! \brief argsort of labels */
mutable std::vector<size_t> label_order_cache; mutable std::vector<size_t> label_order_cache_;
}; };
/*! \brief read-only sparse instance batch in CSR format */ /*! \brief read-only sparse instance batch in CSR format */
@ -125,7 +126,7 @@ struct SparseBatch {
/*! \brief feature value */ /*! \brief feature value */
bst_float fvalue; bst_float fvalue;
/*! \brief default constructor */ /*! \brief default constructor */
Entry() {} Entry() = default;
/*! /*!
* \brief constructor with index and value * \brief constructor with index and value
* \param index The feature or row index. * \param index The feature or row index.
@ -141,11 +142,11 @@ struct SparseBatch {
/*! \brief an instance of sparse vector in the batch */ /*! \brief an instance of sparse vector in the batch */
struct Inst { struct Inst {
/*! \brief pointer to the elements*/ /*! \brief pointer to the elements*/
const Entry *data; const Entry *data{nullptr};
/*! \brief length of the instance */ /*! \brief length of the instance */
bst_uint length; bst_uint length{0};
/*! \brief constructor */ /*! \brief constructor */
Inst() : data(0), length(0) {} Inst() = default;
Inst(const Entry *data, bst_uint length) : data(data), length(length) {} Inst(const Entry *data, bst_uint length) : data(data), length(length) {}
/*! \brief get i-th pair in the sparse vector*/ /*! \brief get i-th pair in the sparse vector*/
inline const Entry& operator[](size_t i) const { inline const Entry& operator[](size_t i) const {
@ -167,7 +168,7 @@ struct RowBatch : public SparseBatch {
const Entry *data_ptr; const Entry *data_ptr;
/*! \brief get i-th row from the batch */ /*! \brief get i-th row from the batch */
inline Inst operator[](size_t i) const { inline Inst operator[](size_t i) const {
return Inst(data_ptr + ind_ptr[i], static_cast<bst_uint>(ind_ptr[i + 1] - ind_ptr[i])); return {data_ptr + ind_ptr[i], static_cast<bst_uint>(ind_ptr[i + 1] - ind_ptr[i])};
} }
}; };
@ -206,16 +207,16 @@ class DataSource : public dmlc::DataIter<RowBatch> {
* \brief A vector-like structure to represent set of rows. * \brief A vector-like structure to represent set of rows.
* But saves the memory when all rows are in the set (common case in xgb) * But saves the memory when all rows are in the set (common case in xgb)
*/ */
struct RowSet { class RowSet {
public: public:
/*! \return i-th row index */ /*! \return i-th row index */
inline bst_uint operator[](size_t i) const; inline bst_uint operator[](size_t i) const;
/*! \return the size of the set. */ /*! \return the size of the set. */
inline size_t size() const; inline size_t Size() const;
/*! \brief push the index back to the set */ /*! \brief push the index back to the set */
inline void push_back(bst_uint i); inline void PushBack(bst_uint i);
/*! \brief clear the set */ /*! \brief clear the set */
inline void clear(); inline void Clear();
/*! /*!
* \brief save rowset to file. * \brief save rowset to file.
* \param fo The file to be saved. * \param fo The file to be saved.
@ -228,11 +229,11 @@ struct RowSet {
*/ */
inline bool Load(dmlc::Stream* fi); inline bool Load(dmlc::Stream* fi);
/*! \brief constructor */ /*! \brief constructor */
RowSet() : size_(0) {} RowSet() = default;
private: private:
/*! \brief The internal data structure of size */ /*! \brief The internal data structure of size */
uint64_t size_; uint64_t size_{0};
/*! \brief The internal data structure of row set if not all*/ /*! \brief The internal data structure of row set if not all*/
std::vector<bst_uint> rows_; std::vector<bst_uint> rows_;
}; };
@ -250,11 +251,11 @@ struct RowSet {
class DMatrix { class DMatrix {
public: public:
/*! \brief default constructor */ /*! \brief default constructor */
DMatrix() : cache_learner_ptr_(nullptr) {} DMatrix() = default;
/*! \brief meta information of the dataset */ /*! \brief meta information of the dataset */
virtual MetaInfo& info() = 0; virtual MetaInfo& Info() = 0;
/*! \brief meta information of the dataset */ /*! \brief meta information of the dataset */
virtual const MetaInfo& info() const = 0; virtual const MetaInfo& Info() const = 0;
/*! /*!
* \brief get the row iterator, reset to beginning position * \brief get the row iterator, reset to beginning position
* \note Only either RowIterator or column Iterator can be active. * \note Only either RowIterator or column Iterator can be active.
@ -291,9 +292,9 @@ class DMatrix {
/*! \brief get column density */ /*! \brief get column density */
virtual float GetColDensity(size_t cidx) const = 0; virtual float GetColDensity(size_t cidx) const = 0;
/*! \return reference of buffered rowset, in column access */ /*! \return reference of buffered rowset, in column access */
virtual const RowSet& buffered_rowset() const = 0; virtual const RowSet& BufferedRowset() const = 0;
/*! \brief virtual destructor */ /*! \brief virtual destructor */
virtual ~DMatrix() {} virtual ~DMatrix() = default;
/*! /*!
* \brief Save DMatrix to local file. * \brief Save DMatrix to local file.
* The saved file only works for non-sharded dataset(single machine training). * The saved file only works for non-sharded dataset(single machine training).
@ -343,7 +344,7 @@ class DMatrix {
// allow learner class to access this field. // allow learner class to access this field.
friend class LearnerImpl; friend class LearnerImpl;
/*! \brief public field to back ref cached matrix. */ /*! \brief public field to back ref cached matrix. */
LearnerImpl* cache_learner_ptr_; LearnerImpl* cache_learner_ptr_{nullptr};
}; };
// implementation of inline functions // implementation of inline functions
@ -351,15 +352,15 @@ inline bst_uint RowSet::operator[](size_t i) const {
return rows_.size() == 0 ? static_cast<bst_uint>(i) : rows_[i]; return rows_.size() == 0 ? static_cast<bst_uint>(i) : rows_[i];
} }
inline size_t RowSet::size() const { inline size_t RowSet::Size() const {
return size_; return size_;
} }
inline void RowSet::clear() { inline void RowSet::Clear() {
rows_.clear(); size_ = 0; rows_.clear(); size_ = 0;
} }
inline void RowSet::push_back(bst_uint i) { inline void RowSet::PushBack(bst_uint i) {
if (rows_.size() == 0) { if (rows_.size() == 0) {
if (i == size_) { if (i == size_) {
++size_; return; ++size_; return;

View File

@ -45,7 +45,7 @@ class FeatureMap {
*/ */
inline void PushBack(int fid, const char *fname, const char *ftype) { inline void PushBack(int fid, const char *fname, const char *ftype) {
CHECK_EQ(fid, static_cast<int>(names_.size())); CHECK_EQ(fid, static_cast<int>(names_.size()));
names_.push_back(std::string(fname)); names_.emplace_back(fname);
types_.push_back(GetType(ftype)); types_.push_back(GetType(ftype));
} }
/*! \brief clear the feature map */ /*! \brief clear the feature map */
@ -54,11 +54,11 @@ class FeatureMap {
types_.clear(); types_.clear();
} }
/*! \return number of known features */ /*! \return number of known features */
inline size_t size() const { inline size_t Size() const {
return names_.size(); return names_.size();
} }
/*! \return name of specific feature */ /*! \return name of specific feature */
inline const char* name(size_t idx) const { inline const char* Name(size_t idx) const {
CHECK_LT(idx, names_.size()) << "FeatureMap feature index exceed bound"; CHECK_LT(idx, names_.size()) << "FeatureMap feature index exceed bound";
return names_[idx].c_str(); return names_[idx].c_str();
} }
@ -75,7 +75,7 @@ class FeatureMap {
* \return The translated type. * \return The translated type.
*/ */
inline static Type GetType(const char* tname) { inline static Type GetType(const char* tname) {
using namespace std; using std::strcmp;
if (!strcmp("i", tname)) return kIndicator; if (!strcmp("i", tname)) return kIndicator;
if (!strcmp("q", tname)) return kQuantitive; if (!strcmp("q", tname)) return kQuantitive;
if (!strcmp("int", tname)) return kInteger; if (!strcmp("int", tname)) return kInteger;

View File

@ -27,7 +27,7 @@ namespace xgboost {
class GradientBooster { class GradientBooster {
public: public:
/*! \brief virtual destructor */ /*! \brief virtual destructor */
virtual ~GradientBooster() {} virtual ~GradientBooster() = default;
/*! /*!
* \brief set configuration from pair iterators. * \brief set configuration from pair iterators.
* \param begin The beginning iterator. * \param begin The beginning iterator.
@ -69,7 +69,7 @@ class GradientBooster {
* the booster may change content of gpair * the booster may change content of gpair
*/ */
virtual void DoBoost(DMatrix* p_fmat, virtual void DoBoost(DMatrix* p_fmat,
HostDeviceVector<bst_gpair>* in_gpair, HostDeviceVector<GradientPair>* in_gpair,
ObjFunction* obj = nullptr) = 0; ObjFunction* obj = nullptr) = 0;
/*! /*!

View File

@ -37,7 +37,7 @@ namespace xgboost {
class Learner : public rabit::Serializable { class Learner : public rabit::Serializable {
public: public:
/*! \brief virtual destructor */ /*! \brief virtual destructor */
virtual ~Learner() {} ~Learner() override = default;
/*! /*!
* \brief set configuration from pair iterators. * \brief set configuration from pair iterators.
* \param begin The beginning iterator. * \param begin The beginning iterator.
@ -62,12 +62,12 @@ class Learner : public rabit::Serializable {
* \brief load model from stream * \brief load model from stream
* \param fi input stream. * \param fi input stream.
*/ */
virtual void Load(dmlc::Stream* fi) = 0; void Load(dmlc::Stream* fi) override = 0;
/*! /*!
* \brief save model to stream. * \brief save model to stream.
* \param fo output stream * \param fo output stream
*/ */
virtual void Save(dmlc::Stream* fo) const = 0; void Save(dmlc::Stream* fo) const override = 0;
/*! /*!
* \brief update the model for one iteration * \brief update the model for one iteration
* With the specified objective function. * With the specified objective function.
@ -84,7 +84,7 @@ class Learner : public rabit::Serializable {
*/ */
virtual void BoostOneIter(int iter, virtual void BoostOneIter(int iter,
DMatrix* train, DMatrix* train,
HostDeviceVector<bst_gpair>* in_gpair) = 0; HostDeviceVector<GradientPair>* in_gpair) = 0;
/*! /*!
* \brief evaluate the model for specific iteration using the configured metrics. * \brief evaluate the model for specific iteration using the configured metrics.
* \param iter iteration number * \param iter iteration number
@ -194,7 +194,7 @@ inline void Learner::Predict(const SparseBatch::Inst& inst,
bool output_margin, bool output_margin,
HostDeviceVector<bst_float>* out_preds, HostDeviceVector<bst_float>* out_preds,
unsigned ntree_limit) const { unsigned ntree_limit) const {
gbm_->PredictInstance(inst, &out_preds->data_h(), ntree_limit); gbm_->PredictInstance(inst, &out_preds->HostVector(), ntree_limit);
if (!output_margin) { if (!output_margin) {
obj_->PredTransform(out_preds); obj_->PredTransform(out_preds);
} }

View File

@ -19,7 +19,7 @@ namespace xgboost {
class LinearUpdater { class LinearUpdater {
public: public:
/*! \brief virtual destructor */ /*! \brief virtual destructor */
virtual ~LinearUpdater() {} virtual ~LinearUpdater() = default;
/*! /*!
* \brief Initialize the updater with given arguments. * \brief Initialize the updater with given arguments.
* \param args arguments to the objective function. * \param args arguments to the objective function.
@ -36,7 +36,7 @@ class LinearUpdater {
* \param sum_instance_weight The sum instance weights, used to normalise l1/l2 penalty. * \param sum_instance_weight The sum instance weights, used to normalise l1/l2 penalty.
*/ */
virtual void Update(std::vector<bst_gpair>* in_gpair, DMatrix* data, virtual void Update(std::vector<GradientPair>* in_gpair, DMatrix* data,
gbm::GBLinearModel* model, gbm::GBLinearModel* model,
double sum_instance_weight) = 0; double sum_instance_weight) = 0;

View File

@ -21,7 +21,7 @@ class BaseLogger {
log_stream_ << "[" << dmlc::DateLogger().HumanDate() << "] "; log_stream_ << "[" << dmlc::DateLogger().HumanDate() << "] ";
#endif #endif
} }
std::ostream& stream() { return log_stream_; } std::ostream& stream() { return log_stream_; } // NOLINT
protected: protected:
std::ostringstream log_stream_; std::ostringstream log_stream_;

View File

@ -35,7 +35,7 @@ class Metric {
/*! \return name of metric */ /*! \return name of metric */
virtual const char* Name() const = 0; virtual const char* Name() const = 0;
/*! \brief virtual destructor */ /*! \brief virtual destructor */
virtual ~Metric() {} virtual ~Metric() = default;
/*! /*!
* \brief create a metric according to name. * \brief create a metric according to name.
* \param name name of the metric. * \param name name of the metric.

View File

@ -23,7 +23,7 @@ namespace xgboost {
class ObjFunction { class ObjFunction {
public: public:
/*! \brief virtual destructor */ /*! \brief virtual destructor */
virtual ~ObjFunction() {} virtual ~ObjFunction() = default;
/*! /*!
* \brief set configuration from pair iterators. * \brief set configuration from pair iterators.
* \param begin The beginning iterator. * \param begin The beginning iterator.
@ -47,7 +47,7 @@ class ObjFunction {
virtual void GetGradient(HostDeviceVector<bst_float>* preds, virtual void GetGradient(HostDeviceVector<bst_float>* preds,
const MetaInfo& info, const MetaInfo& info,
int iteration, int iteration,
HostDeviceVector<bst_gpair>* out_gpair) = 0; HostDeviceVector<GradientPair>* out_gpair) = 0;
/*! \return the default evaluation metric for the objective */ /*! \return the default evaluation metric for the objective */
virtual const char* DefaultEvalMetric() const = 0; virtual const char* DefaultEvalMetric() const = 0;

View File

@ -36,7 +36,7 @@ namespace xgboost {
class Predictor { class Predictor {
public: public:
virtual ~Predictor() {} virtual ~Predictor() = default;
/** /**
* \fn virtual void Predictor::Init(const std::vector<std::pair<std::string, * \fn virtual void Predictor::Init(const std::vector<std::pair<std::string,

View File

@ -71,70 +71,70 @@ template<typename TSplitCond, typename TNodeStat>
class TreeModel { class TreeModel {
public: public:
/*! \brief data type to indicate split condition */ /*! \brief data type to indicate split condition */
typedef TNodeStat NodeStat; using NodeStat = TNodeStat;
/*! \brief auxiliary statistics of node to help tree building */ /*! \brief auxiliary statistics of node to help tree building */
typedef TSplitCond SplitCond; using SplitCond = TSplitCond;
/*! \brief tree node */ /*! \brief tree node */
class Node { class Node {
public: public:
Node() : sindex_(0) { Node() {
// assert compact alignment // assert compact alignment
static_assert(sizeof(Node) == 4 * sizeof(int) + sizeof(Info), static_assert(sizeof(Node) == 4 * sizeof(int) + sizeof(Info),
"Node: 64 bit align"); "Node: 64 bit align");
} }
/*! \brief index of left child */ /*! \brief index of left child */
inline int cleft() const { inline int LeftChild() const {
return this->cleft_; return this->cleft_;
} }
/*! \brief index of right child */ /*! \brief index of right child */
inline int cright() const { inline int RightChild() const {
return this->cright_; return this->cright_;
} }
/*! \brief index of default child when feature is missing */ /*! \brief index of default child when feature is missing */
inline int cdefault() const { inline int DefaultChild() const {
return this->default_left() ? this->cleft() : this->cright(); return this->DefaultLeft() ? this->LeftChild() : this->RightChild();
} }
/*! \brief feature index of split condition */ /*! \brief feature index of split condition */
inline unsigned split_index() const { inline unsigned SplitIndex() const {
return sindex_ & ((1U << 31) - 1U); return sindex_ & ((1U << 31) - 1U);
} }
/*! \brief when feature is unknown, whether goes to left child */ /*! \brief when feature is unknown, whether goes to left child */
inline bool default_left() const { inline bool DefaultLeft() const {
return (sindex_ >> 31) != 0; return (sindex_ >> 31) != 0;
} }
/*! \brief whether current node is leaf node */ /*! \brief whether current node is leaf node */
inline bool is_leaf() const { inline bool IsLeaf() const {
return cleft_ == -1; return cleft_ == -1;
} }
/*! \return get leaf value of leaf node */ /*! \return get leaf value of leaf node */
inline bst_float leaf_value() const { inline bst_float LeafValue() const {
return (this->info_).leaf_value; return (this->info_).leaf_value;
} }
/*! \return get split condition of the node */ /*! \return get split condition of the node */
inline TSplitCond split_cond() const { inline TSplitCond SplitCond() const {
return (this->info_).split_cond; return (this->info_).split_cond;
} }
/*! \brief get parent of the node */ /*! \brief get parent of the node */
inline int parent() const { inline int Parent() const {
return parent_ & ((1U << 31) - 1); return parent_ & ((1U << 31) - 1);
} }
/*! \brief whether current node is left child */ /*! \brief whether current node is left child */
inline bool is_left_child() const { inline bool IsLeftChild() const {
return (parent_ & (1U << 31)) != 0; return (parent_ & (1U << 31)) != 0;
} }
/*! \brief whether this node is deleted */ /*! \brief whether this node is deleted */
inline bool is_deleted() const { inline bool IsDeleted() const {
return sindex_ == std::numeric_limits<unsigned>::max(); return sindex_ == std::numeric_limits<unsigned>::max();
} }
/*! \brief whether current node is root */ /*! \brief whether current node is root */
inline bool is_root() const { inline bool IsRoot() const {
return parent_ == -1; return parent_ == -1;
} }
/*! /*!
* \brief set the right child * \brief set the right child
* \param nid node id to right child * \param nid node id to right child
*/ */
inline void set_right_child(int nid) { inline void SetRightChild(int nid) {
this->cright_ = nid; this->cright_ = nid;
} }
/*! /*!
@ -143,7 +143,7 @@ class TreeModel {
* \param split_cond split condition * \param split_cond split condition
* \param default_left the default direction when feature is unknown * \param default_left the default direction when feature is unknown
*/ */
inline void set_split(unsigned split_index, TSplitCond split_cond, inline void SetSplit(unsigned split_index, TSplitCond split_cond,
bool default_left = false) { bool default_left = false) {
if (default_left) split_index |= (1U << 31); if (default_left) split_index |= (1U << 31);
this->sindex_ = split_index; this->sindex_ = split_index;
@ -155,13 +155,13 @@ class TreeModel {
* \param right right index, could be used to store * \param right right index, could be used to store
* additional information * additional information
*/ */
inline void set_leaf(bst_float value, int right = -1) { inline void SetLeaf(bst_float value, int right = -1) {
(this->info_).leaf_value = value; (this->info_).leaf_value = value;
this->cleft_ = -1; this->cleft_ = -1;
this->cright_ = right; this->cright_ = right;
} }
/*! \brief mark that this node is deleted */ /*! \brief mark that this node is deleted */
inline void mark_delete() { inline void MarkDelete() {
this->sindex_ = std::numeric_limits<unsigned>::max(); this->sindex_ = std::numeric_limits<unsigned>::max();
} }
@ -181,11 +181,11 @@ class TreeModel {
// pointer to left, right // pointer to left, right
int cleft_, cright_; int cleft_, cright_;
// split feature index, left split or right split depends on the highest bit // split feature index, left split or right split depends on the highest bit
unsigned sindex_; unsigned sindex_{0};
// extra info // extra info
Info info_; Info info_;
// set parent // set parent
inline void set_parent(int pidx, bool is_left_child = true) { inline void SetParent(int pidx, bool is_left_child = true) {
if (is_left_child) pidx |= (1U << 31); if (is_left_child) pidx |= (1U << 31);
this->parent_ = pidx; this->parent_ = pidx;
} }
@ -193,35 +193,35 @@ class TreeModel {
protected: protected:
// vector of nodes // vector of nodes
std::vector<Node> nodes; std::vector<Node> nodes_;
// free node space, used during training process // free node space, used during training process
std::vector<int> deleted_nodes; std::vector<int> deleted_nodes_;
// stats of nodes // stats of nodes
std::vector<TNodeStat> stats; std::vector<TNodeStat> stats_;
// leaf vector, that is used to store additional information // leaf vector, that is used to store additional information
std::vector<bst_float> leaf_vector; std::vector<bst_float> leaf_vector_;
// allocate a new node, // allocate a new node,
// !!!!!! NOTE: may cause BUG here, nodes.resize // !!!!!! NOTE: may cause BUG here, nodes.resize
inline int AllocNode() { inline int AllocNode() {
if (param.num_deleted != 0) { if (param.num_deleted != 0) {
int nd = deleted_nodes.back(); int nd = deleted_nodes_.back();
deleted_nodes.pop_back(); deleted_nodes_.pop_back();
--param.num_deleted; --param.num_deleted;
return nd; return nd;
} }
int nd = param.num_nodes++; int nd = param.num_nodes++;
CHECK_LT(param.num_nodes, std::numeric_limits<int>::max()) CHECK_LT(param.num_nodes, std::numeric_limits<int>::max())
<< "number of nodes in the tree exceed 2^31"; << "number of nodes in the tree exceed 2^31";
nodes.resize(param.num_nodes); nodes_.resize(param.num_nodes);
stats.resize(param.num_nodes); stats_.resize(param.num_nodes);
leaf_vector.resize(param.num_nodes * param.size_leaf_vector); leaf_vector_.resize(param.num_nodes * param.size_leaf_vector);
return nd; return nd;
} }
// delete a tree node, keep the parent field to allow trace back // delete a tree node, keep the parent field to allow trace back
inline void DeleteNode(int nid) { inline void DeleteNode(int nid) {
CHECK_GE(nid, param.num_roots); CHECK_GE(nid, param.num_roots);
deleted_nodes.push_back(nid); deleted_nodes_.push_back(nid);
nodes[nid].mark_delete(); nodes_[nid].MarkDelete();
++param.num_deleted; ++param.num_deleted;
} }
@ -232,11 +232,11 @@ class TreeModel {
* \param value new leaf value * \param value new leaf value
*/ */
inline void ChangeToLeaf(int rid, bst_float value) { inline void ChangeToLeaf(int rid, bst_float value) {
CHECK(nodes[nodes[rid].cleft() ].is_leaf()); CHECK(nodes_[nodes_[rid].LeftChild() ].IsLeaf());
CHECK(nodes[nodes[rid].cright()].is_leaf()); CHECK(nodes_[nodes_[rid].RightChild()].IsLeaf());
this->DeleteNode(nodes[rid].cleft()); this->DeleteNode(nodes_[rid].LeftChild());
this->DeleteNode(nodes[rid].cright()); this->DeleteNode(nodes_[rid].RightChild());
nodes[rid].set_leaf(value); nodes_[rid].SetLeaf(value);
} }
/*! /*!
* \brief collapse a non leaf node to a leaf node, delete its children * \brief collapse a non leaf node to a leaf node, delete its children
@ -244,12 +244,12 @@ class TreeModel {
* \param value new leaf value * \param value new leaf value
*/ */
inline void CollapseToLeaf(int rid, bst_float value) { inline void CollapseToLeaf(int rid, bst_float value) {
if (nodes[rid].is_leaf()) return; if (nodes_[rid].IsLeaf()) return;
if (!nodes[nodes[rid].cleft() ].is_leaf()) { if (!nodes_[nodes_[rid].LeftChild() ].IsLeaf()) {
CollapseToLeaf(nodes[rid].cleft(), 0.0f); CollapseToLeaf(nodes_[rid].LeftChild(), 0.0f);
} }
if (!nodes[nodes[rid].cright() ].is_leaf()) { if (!nodes_[nodes_[rid].RightChild() ].IsLeaf()) {
CollapseToLeaf(nodes[rid].cright(), 0.0f); CollapseToLeaf(nodes_[rid].RightChild(), 0.0f);
} }
this->ChangeToLeaf(rid, value); this->ChangeToLeaf(rid, value);
} }
@ -262,47 +262,47 @@ class TreeModel {
param.num_nodes = 1; param.num_nodes = 1;
param.num_roots = 1; param.num_roots = 1;
param.num_deleted = 0; param.num_deleted = 0;
nodes.resize(1); nodes_.resize(1);
} }
/*! \brief get node given nid */ /*! \brief get node given nid */
inline Node& operator[](int nid) { inline Node& operator[](int nid) {
return nodes[nid]; return nodes_[nid];
} }
/*! \brief get node given nid */ /*! \brief get node given nid */
inline const Node& operator[](int nid) const { inline const Node& operator[](int nid) const {
return nodes[nid]; return nodes_[nid];
} }
/*! \brief get const reference to nodes */ /*! \brief get const reference to nodes */
inline const std::vector<Node>& GetNodes() const { return nodes; } inline const std::vector<Node>& GetNodes() const { return nodes_; }
/*! \brief get node statistics given nid */ /*! \brief get node statistics given nid */
inline NodeStat& stat(int nid) { inline NodeStat& Stat(int nid) {
return stats[nid]; return stats_[nid];
} }
/*! \brief get node statistics given nid */ /*! \brief get node statistics given nid */
inline const NodeStat& stat(int nid) const { inline const NodeStat& Stat(int nid) const {
return stats[nid]; return stats_[nid];
} }
/*! \brief get leaf vector given nid */ /*! \brief get leaf vector given nid */
inline bst_float* leafvec(int nid) { inline bst_float* Leafvec(int nid) {
if (leaf_vector.size() == 0) return nullptr; if (leaf_vector_.size() == 0) return nullptr;
return& leaf_vector[nid * param.size_leaf_vector]; return& leaf_vector_[nid * param.size_leaf_vector];
} }
/*! \brief get leaf vector given nid */ /*! \brief get leaf vector given nid */
inline const bst_float* leafvec(int nid) const { inline const bst_float* Leafvec(int nid) const {
if (leaf_vector.size() == 0) return nullptr; if (leaf_vector_.size() == 0) return nullptr;
return& leaf_vector[nid * param.size_leaf_vector]; return& leaf_vector_[nid * param.size_leaf_vector];
} }
/*! \brief initialize the model */ /*! \brief initialize the model */
inline void InitModel() { inline void InitModel() {
param.num_nodes = param.num_roots; param.num_nodes = param.num_roots;
nodes.resize(param.num_nodes); nodes_.resize(param.num_nodes);
stats.resize(param.num_nodes); stats_.resize(param.num_nodes);
leaf_vector.resize(param.num_nodes * param.size_leaf_vector, 0.0f); leaf_vector_.resize(param.num_nodes * param.size_leaf_vector, 0.0f);
for (int i = 0; i < param.num_nodes; i ++) { for (int i = 0; i < param.num_nodes; i ++) {
nodes[i].set_leaf(0.0f); nodes_[i].SetLeaf(0.0f);
nodes[i].set_parent(-1); nodes_[i].SetParent(-1);
} }
} }
/*! /*!
@ -311,35 +311,35 @@ class TreeModel {
*/ */
inline void Load(dmlc::Stream* fi) { inline void Load(dmlc::Stream* fi) {
CHECK_EQ(fi->Read(&param, sizeof(TreeParam)), sizeof(TreeParam)); CHECK_EQ(fi->Read(&param, sizeof(TreeParam)), sizeof(TreeParam));
nodes.resize(param.num_nodes); nodes_.resize(param.num_nodes);
stats.resize(param.num_nodes); stats_.resize(param.num_nodes);
CHECK_NE(param.num_nodes, 0); CHECK_NE(param.num_nodes, 0);
CHECK_EQ(fi->Read(dmlc::BeginPtr(nodes), sizeof(Node) * nodes.size()), CHECK_EQ(fi->Read(dmlc::BeginPtr(nodes_), sizeof(Node) * nodes_.size()),
sizeof(Node) * nodes.size()); sizeof(Node) * nodes_.size());
CHECK_EQ(fi->Read(dmlc::BeginPtr(stats), sizeof(NodeStat) * stats.size()), CHECK_EQ(fi->Read(dmlc::BeginPtr(stats_), sizeof(NodeStat) * stats_.size()),
sizeof(NodeStat) * stats.size()); sizeof(NodeStat) * stats_.size());
if (param.size_leaf_vector != 0) { if (param.size_leaf_vector != 0) {
CHECK(fi->Read(&leaf_vector)); CHECK(fi->Read(&leaf_vector_));
} }
// chg deleted nodes // chg deleted nodes
deleted_nodes.resize(0); deleted_nodes_.resize(0);
for (int i = param.num_roots; i < param.num_nodes; ++i) { for (int i = param.num_roots; i < param.num_nodes; ++i) {
if (nodes[i].is_deleted()) deleted_nodes.push_back(i); if (nodes_[i].IsDeleted()) deleted_nodes_.push_back(i);
} }
CHECK_EQ(static_cast<int>(deleted_nodes.size()), param.num_deleted); CHECK_EQ(static_cast<int>(deleted_nodes_.size()), param.num_deleted);
} }
/*! /*!
* \brief save model to stream * \brief save model to stream
* \param fo output stream * \param fo output stream
*/ */
inline void Save(dmlc::Stream* fo) const { inline void Save(dmlc::Stream* fo) const {
CHECK_EQ(param.num_nodes, static_cast<int>(nodes.size())); CHECK_EQ(param.num_nodes, static_cast<int>(nodes_.size()));
CHECK_EQ(param.num_nodes, static_cast<int>(stats.size())); CHECK_EQ(param.num_nodes, static_cast<int>(stats_.size()));
fo->Write(&param, sizeof(TreeParam)); fo->Write(&param, sizeof(TreeParam));
CHECK_NE(param.num_nodes, 0); CHECK_NE(param.num_nodes, 0);
fo->Write(dmlc::BeginPtr(nodes), sizeof(Node) * nodes.size()); fo->Write(dmlc::BeginPtr(nodes_), sizeof(Node) * nodes_.size());
fo->Write(dmlc::BeginPtr(stats), sizeof(NodeStat) * nodes.size()); fo->Write(dmlc::BeginPtr(stats_), sizeof(NodeStat) * nodes_.size());
if (param.size_leaf_vector != 0) fo->Write(leaf_vector); if (param.size_leaf_vector != 0) fo->Write(leaf_vector_);
} }
/*! /*!
* \brief add child nodes to node * \brief add child nodes to node
@ -348,10 +348,10 @@ class TreeModel {
inline void AddChilds(int nid) { inline void AddChilds(int nid) {
int pleft = this->AllocNode(); int pleft = this->AllocNode();
int pright = this->AllocNode(); int pright = this->AllocNode();
nodes[nid].cleft_ = pleft; nodes_[nid].cleft_ = pleft;
nodes[nid].cright_ = pright; nodes_[nid].cright_ = pright;
nodes[nodes[nid].cleft() ].set_parent(nid, true); nodes_[nodes_[nid].LeftChild() ].SetParent(nid, true);
nodes[nodes[nid].cright()].set_parent(nid, false); nodes_[nodes_[nid].RightChild()].SetParent(nid, false);
} }
/*! /*!
* \brief only add a right child to a leaf node * \brief only add a right child to a leaf node
@ -359,8 +359,8 @@ class TreeModel {
*/ */
inline void AddRightChild(int nid) { inline void AddRightChild(int nid) {
int pright = this->AllocNode(); int pright = this->AllocNode();
nodes[nid].right = pright; nodes_[nid].right = pright;
nodes[nodes[nid].right].set_parent(nid, false); nodes_[nodes_[nid].right].SetParent(nid, false);
} }
/*! /*!
* \brief get current depth * \brief get current depth
@ -369,9 +369,9 @@ class TreeModel {
*/ */
inline int GetDepth(int nid, bool pass_rchild = false) const { inline int GetDepth(int nid, bool pass_rchild = false) const {
int depth = 0; int depth = 0;
while (!nodes[nid].is_root()) { while (!nodes_[nid].IsRoot()) {
if (!pass_rchild || nodes[nid].is_left_child()) ++depth; if (!pass_rchild || nodes_[nid].IsLeftChild()) ++depth;
nid = nodes[nid].parent(); nid = nodes_[nid].Parent();
} }
return depth; return depth;
} }
@ -380,9 +380,9 @@ class TreeModel {
* \param nid node id * \param nid node id
*/ */
inline int MaxDepth(int nid) const { inline int MaxDepth(int nid) const {
if (nodes[nid].is_leaf()) return 0; if (nodes_[nid].IsLeaf()) return 0;
return std::max(MaxDepth(nodes[nid].cleft())+1, return std::max(MaxDepth(nodes_[nid].LeftChild())+1,
MaxDepth(nodes[nid].cright())+1); MaxDepth(nodes_[nid].RightChild())+1);
} }
/*! /*!
* \brief get maximum depth * \brief get maximum depth
@ -395,7 +395,7 @@ class TreeModel {
return maxd; return maxd;
} }
/*! \brief number of extra nodes besides the root */ /*! \brief number of extra nodes besides the root */
inline int num_extra_nodes() const { inline int NumExtraNodes() const {
return param.num_nodes - param.num_roots - param.num_deleted; return param.num_nodes - param.num_roots - param.num_deleted;
} }
}; };
@ -421,7 +421,7 @@ struct PathElement {
bst_float zero_fraction; bst_float zero_fraction;
bst_float one_fraction; bst_float one_fraction;
bst_float pweight; bst_float pweight;
PathElement() {} PathElement() = default;
PathElement(int i, bst_float z, bst_float o, bst_float w) : PathElement(int i, bst_float z, bst_float o, bst_float w) :
feature_index(i), zero_fraction(z), one_fraction(o), pweight(w) {} feature_index(i), zero_fraction(z), one_fraction(o), pweight(w) {}
}; };
@ -457,19 +457,19 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat> {
* \brief returns the size of the feature vector * \brief returns the size of the feature vector
* \return the size of the feature vector * \return the size of the feature vector
*/ */
inline size_t size() const; inline size_t Size() const;
/*! /*!
* \brief get ith value * \brief get ith value
* \param i feature index. * \param i feature index.
* \return the i-th feature value * \return the i-th feature value
*/ */
inline bst_float fvalue(size_t i) const; inline bst_float Fvalue(size_t i) const;
/*! /*!
* \brief check whether i-th entry is missing * \brief check whether i-th entry is missing
* \param i feature index. * \param i feature index.
* \return whether i-th value is missing. * \return whether i-th value is missing.
*/ */
inline bool is_missing(size_t i) const; inline bool IsMissing(size_t i) const;
private: private:
/*! /*!
@ -480,7 +480,7 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat> {
bst_float fvalue; bst_float fvalue;
int flag; int flag;
}; };
std::vector<Entry> data; std::vector<Entry> data_;
}; };
/*! /*!
* \brief get the leaf index * \brief get the leaf index
@ -562,63 +562,63 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat> {
private: private:
inline bst_float FillNodeMeanValue(int nid); inline bst_float FillNodeMeanValue(int nid);
std::vector<bst_float> node_mean_values; std::vector<bst_float> node_mean_values_;
}; };
// implementations of inline functions // implementations of inline functions
// do not need to read if only use the model // do not need to read if only use the model
inline void RegTree::FVec::Init(size_t size) { inline void RegTree::FVec::Init(size_t size) {
Entry e; e.flag = -1; Entry e; e.flag = -1;
data.resize(size); data_.resize(size);
std::fill(data.begin(), data.end(), e); std::fill(data_.begin(), data_.end(), e);
} }
inline void RegTree::FVec::Fill(const RowBatch::Inst& inst) { inline void RegTree::FVec::Fill(const RowBatch::Inst& inst) {
for (bst_uint i = 0; i < inst.length; ++i) { for (bst_uint i = 0; i < inst.length; ++i) {
if (inst[i].index >= data.size()) continue; if (inst[i].index >= data_.size()) continue;
data[inst[i].index].fvalue = inst[i].fvalue; data_[inst[i].index].fvalue = inst[i].fvalue;
} }
} }
inline void RegTree::FVec::Drop(const RowBatch::Inst& inst) { inline void RegTree::FVec::Drop(const RowBatch::Inst& inst) {
for (bst_uint i = 0; i < inst.length; ++i) { for (bst_uint i = 0; i < inst.length; ++i) {
if (inst[i].index >= data.size()) continue; if (inst[i].index >= data_.size()) continue;
data[inst[i].index].flag = -1; data_[inst[i].index].flag = -1;
} }
} }
inline size_t RegTree::FVec::size() const { inline size_t RegTree::FVec::Size() const {
return data.size(); return data_.size();
} }
inline bst_float RegTree::FVec::fvalue(size_t i) const { inline bst_float RegTree::FVec::Fvalue(size_t i) const {
return data[i].fvalue; return data_[i].fvalue;
} }
inline bool RegTree::FVec::is_missing(size_t i) const { inline bool RegTree::FVec::IsMissing(size_t i) const {
return data[i].flag == -1; return data_[i].flag == -1;
} }
inline int RegTree::GetLeafIndex(const RegTree::FVec& feat, unsigned root_id) const { inline int RegTree::GetLeafIndex(const RegTree::FVec& feat, unsigned root_id) const {
int pid = static_cast<int>(root_id); auto pid = static_cast<int>(root_id);
while (!(*this)[pid].is_leaf()) { while (!(*this)[pid].IsLeaf()) {
unsigned split_index = (*this)[pid].split_index(); unsigned split_index = (*this)[pid].SplitIndex();
pid = this->GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index)); pid = this->GetNext(pid, feat.Fvalue(split_index), feat.IsMissing(split_index));
} }
return pid; return pid;
} }
inline bst_float RegTree::Predict(const RegTree::FVec& feat, unsigned root_id) const { inline bst_float RegTree::Predict(const RegTree::FVec& feat, unsigned root_id) const {
int pid = this->GetLeafIndex(feat, root_id); int pid = this->GetLeafIndex(feat, root_id);
return (*this)[pid].leaf_value(); return (*this)[pid].LeafValue();
} }
inline void RegTree::FillNodeMeanValues() { inline void RegTree::FillNodeMeanValues() {
size_t num_nodes = this->param.num_nodes; size_t num_nodes = this->param.num_nodes;
if (this->node_mean_values.size() == num_nodes) { if (this->node_mean_values_.size() == num_nodes) {
return; return;
} }
this->node_mean_values.resize(num_nodes); this->node_mean_values_.resize(num_nodes);
for (int root_id = 0; root_id < param.num_roots; ++root_id) { for (int root_id = 0; root_id < param.num_roots; ++root_id) {
this->FillNodeMeanValue(root_id); this->FillNodeMeanValue(root_id);
} }
@ -627,40 +627,39 @@ inline void RegTree::FillNodeMeanValues() {
inline bst_float RegTree::FillNodeMeanValue(int nid) { inline bst_float RegTree::FillNodeMeanValue(int nid) {
bst_float result; bst_float result;
auto& node = (*this)[nid]; auto& node = (*this)[nid];
if (node.is_leaf()) { if (node.IsLeaf()) {
result = node.leaf_value(); result = node.LeafValue();
} else { } else {
result = this->FillNodeMeanValue(node.cleft()) * this->stat(node.cleft()).sum_hess; result = this->FillNodeMeanValue(node.LeftChild()) * this->Stat(node.LeftChild()).sum_hess;
result += this->FillNodeMeanValue(node.cright()) * this->stat(node.cright()).sum_hess; result += this->FillNodeMeanValue(node.RightChild()) * this->Stat(node.RightChild()).sum_hess;
result /= this->stat(nid).sum_hess; result /= this->Stat(nid).sum_hess;
} }
this->node_mean_values[nid] = result; this->node_mean_values_[nid] = result;
return result; return result;
} }
inline void RegTree::CalculateContributionsApprox(const RegTree::FVec& feat, unsigned root_id, inline void RegTree::CalculateContributionsApprox(const RegTree::FVec& feat, unsigned root_id,
bst_float *out_contribs) const { bst_float *out_contribs) const {
CHECK_GT(this->node_mean_values.size(), 0U); CHECK_GT(this->node_mean_values_.size(), 0U);
// this follows the idea of http://blog.datadive.net/interpreting-random-forests/ // this follows the idea of http://blog.datadive.net/interpreting-random-forests/
bst_float node_value; unsigned split_index = 0;
unsigned split_index; auto pid = static_cast<int>(root_id);
int pid = static_cast<int>(root_id);
// update bias value // update bias value
node_value = this->node_mean_values[pid]; bst_float node_value = this->node_mean_values_[pid];
out_contribs[feat.size()] += node_value; out_contribs[feat.Size()] += node_value;
if ((*this)[pid].is_leaf()) { if ((*this)[pid].IsLeaf()) {
// nothing to do anymore // nothing to do anymore
return; return;
} }
while (!(*this)[pid].is_leaf()) { while (!(*this)[pid].IsLeaf()) {
split_index = (*this)[pid].split_index(); split_index = (*this)[pid].SplitIndex();
pid = this->GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index)); pid = this->GetNext(pid, feat.Fvalue(split_index), feat.IsMissing(split_index));
bst_float new_value = this->node_mean_values[pid]; bst_float new_value = this->node_mean_values_[pid];
// update feature weight // update feature weight
out_contribs[split_index] += new_value - node_value; out_contribs[split_index] += new_value - node_value;
node_value = new_value; node_value = new_value;
} }
bst_float leaf_value = (*this)[pid].leaf_value(); bst_float leaf_value = (*this)[pid].LeafValue();
// update leaf feature weight // update leaf feature weight
out_contribs[split_index] += leaf_value - node_value; out_contribs[split_index] += leaf_value - node_value;
} }
@ -749,33 +748,33 @@ inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
ExtendPath(unique_path, unique_depth, parent_zero_fraction, ExtendPath(unique_path, unique_depth, parent_zero_fraction,
parent_one_fraction, parent_feature_index); parent_one_fraction, parent_feature_index);
} }
const unsigned split_index = node.split_index(); const unsigned split_index = node.SplitIndex();
// leaf node // leaf node
if (node.is_leaf()) { if (node.IsLeaf()) {
for (unsigned i = 1; i <= unique_depth; ++i) { for (unsigned i = 1; i <= unique_depth; ++i) {
const bst_float w = UnwoundPathSum(unique_path, unique_depth, i); const bst_float w = UnwoundPathSum(unique_path, unique_depth, i);
const PathElement &el = unique_path[i]; const PathElement &el = unique_path[i];
phi[el.feature_index] += w * (el.one_fraction - el.zero_fraction) phi[el.feature_index] += w * (el.one_fraction - el.zero_fraction)
* node.leaf_value() * condition_fraction; * node.LeafValue() * condition_fraction;
} }
// internal node // internal node
} else { } else {
// find which branch is "hot" (meaning x would follow it) // find which branch is "hot" (meaning x would follow it)
unsigned hot_index = 0; unsigned hot_index = 0;
if (feat.is_missing(split_index)) { if (feat.IsMissing(split_index)) {
hot_index = node.cdefault(); hot_index = node.DefaultChild();
} else if (feat.fvalue(split_index) < node.split_cond()) { } else if (feat.Fvalue(split_index) < node.SplitCond()) {
hot_index = node.cleft(); hot_index = node.LeftChild();
} else { } else {
hot_index = node.cright(); hot_index = node.RightChild();
} }
const unsigned cold_index = (static_cast<int>(hot_index) == node.cleft() ? const unsigned cold_index = (static_cast<int>(hot_index) == node.LeftChild() ?
node.cright() : node.cleft()); node.RightChild() : node.LeftChild());
const bst_float w = this->stat(node_index).sum_hess; const bst_float w = this->Stat(node_index).sum_hess;
const bst_float hot_zero_fraction = this->stat(hot_index).sum_hess / w; const bst_float hot_zero_fraction = this->Stat(hot_index).sum_hess / w;
const bst_float cold_zero_fraction = this->stat(cold_index).sum_hess / w; const bst_float cold_zero_fraction = this->Stat(cold_index).sum_hess / w;
bst_float incoming_zero_fraction = 1; bst_float incoming_zero_fraction = 1;
bst_float incoming_one_fraction = 1; bst_float incoming_one_fraction = 1;
@ -820,13 +819,13 @@ inline void RegTree::CalculateContributions(const RegTree::FVec& feat, unsigned
unsigned condition_feature) const { unsigned condition_feature) const {
// find the expected value of the tree's predictions // find the expected value of the tree's predictions
if (condition == 0) { if (condition == 0) {
bst_float node_value = this->node_mean_values[static_cast<int>(root_id)]; bst_float node_value = this->node_mean_values_[static_cast<int>(root_id)];
out_contribs[feat.size()] += node_value; out_contribs[feat.Size()] += node_value;
} }
// Preallocate space for the unique path data // Preallocate space for the unique path data
const int maxd = this->MaxDepth(root_id) + 2; const int maxd = this->MaxDepth(root_id) + 2;
PathElement *unique_path_data = new PathElement[(maxd * (maxd + 1)) / 2]; auto *unique_path_data = new PathElement[(maxd * (maxd + 1)) / 2];
TreeShap(feat, out_contribs, root_id, 0, unique_path_data, TreeShap(feat, out_contribs, root_id, 0, unique_path_data,
1, 1, -1, condition, condition_feature, 1); 1, 1, -1, condition, condition_feature, 1);
@ -835,14 +834,14 @@ inline void RegTree::CalculateContributions(const RegTree::FVec& feat, unsigned
/*! \brief get next position of the tree given current pid */ /*! \brief get next position of the tree given current pid */
inline int RegTree::GetNext(int pid, bst_float fvalue, bool is_unknown) const { inline int RegTree::GetNext(int pid, bst_float fvalue, bool is_unknown) const {
bst_float split_value = (*this)[pid].split_cond(); bst_float split_value = (*this)[pid].SplitCond();
if (is_unknown) { if (is_unknown) {
return (*this)[pid].cdefault(); return (*this)[pid].DefaultChild();
} else { } else {
if (fvalue < split_value) { if (fvalue < split_value) {
return (*this)[pid].cleft(); return (*this)[pid].LeftChild();
} else { } else {
return (*this)[pid].cright(); return (*this)[pid].RightChild();
} }
} }
} }

View File

@ -25,7 +25,7 @@ namespace xgboost {
class TreeUpdater { class TreeUpdater {
public: public:
/*! \brief virtual destructor */ /*! \brief virtual destructor */
virtual ~TreeUpdater() {} virtual ~TreeUpdater() = default;
/*! /*!
* \brief Initialize the updater with given arguments. * \brief Initialize the updater with given arguments.
* \param args arguments to the objective function. * \param args arguments to the objective function.
@ -40,7 +40,7 @@ class TreeUpdater {
* but maybe different random seeds, usually one tree is passed in at a time, * but maybe different random seeds, usually one tree is passed in at a time,
* there can be multiple trees when we train random forest style model * there can be multiple trees when we train random forest style model
*/ */
virtual void Update(HostDeviceVector<bst_gpair>* gpair, virtual void Update(HostDeviceVector<GradientPair>* gpair,
DMatrix* data, DMatrix* data,
const std::vector<RegTree*>& trees) = 0; const std::vector<RegTree*>& trees) = 0;

View File

@ -36,21 +36,21 @@ class MyLogistic : public ObjFunction {
void GetGradient(HostDeviceVector<bst_float> *preds, void GetGradient(HostDeviceVector<bst_float> *preds,
const MetaInfo &info, const MetaInfo &info,
int iter, int iter,
HostDeviceVector<bst_gpair> *out_gpair) override { HostDeviceVector<GradientPair> *out_gpair) override {
out_gpair->resize(preds->size()); out_gpair->Resize(preds->Size());
std::vector<bst_float>& preds_h = preds->data_h(); std::vector<bst_float>& preds_h = preds->HostVector();
std::vector<bst_gpair>& out_gpair_h = out_gpair->data_h(); std::vector<GradientPair>& out_gpair_h = out_gpair->HostVector();
for (size_t i = 0; i < preds_h.size(); ++i) { for (size_t i = 0; i < preds_h.size(); ++i) {
bst_float w = info.GetWeight(i); bst_float w = info.GetWeight(i);
// scale the negative examples! // scale the negative examples!
if (info.labels[i] == 0.0f) w *= param_.scale_neg_weight; if (info.labels_[i] == 0.0f) w *= param_.scale_neg_weight;
// logistic transformation // logistic transformation
bst_float p = 1.0f / (1.0f + std::exp(-preds_h[i])); bst_float p = 1.0f / (1.0f + std::exp(-preds_h[i]));
// this is the gradient // this is the gradient
bst_float grad = (p - info.labels[i]) * w; bst_float grad = (p - info.labels_[i]) * w;
// this is the second order gradient // this is the second order gradient
bst_float hess = p * (1.0f - p) * w; bst_float hess = p * (1.0f - p) * w;
out_gpair_h.at(i) = bst_gpair(grad, hess); out_gpair_h.at(i) = GradientPair(grad, hess);
} }
} }
const char* DefaultEvalMetric() const override { const char* DefaultEvalMetric() const override {
@ -58,7 +58,7 @@ class MyLogistic : public ObjFunction {
} }
void PredTransform(HostDeviceVector<bst_float> *io_preds) override { void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
// transform margin value to probability. // transform margin value to probability.
std::vector<bst_float> &preds = io_preds->data_h(); std::vector<bst_float> &preds = io_preds->HostVector();
for (size_t i = 0; i < preds.size(); ++i) { for (size_t i = 0; i < preds.size(); ++i) {
preds[i] = 1.0f / (1.0f + std::exp(-preds[i])); preds[i] = 1.0f / (1.0f + std::exp(-preds[i]));
} }

View File

@ -27,7 +27,7 @@ class Booster {
initialized_(false), initialized_(false),
learner_(Learner::Create(cache_mats)) {} learner_(Learner::Create(cache_mats)) {}
inline Learner* learner() { inline Learner* learner() { // NOLINT
return learner_.get(); return learner_.get();
} }
@ -40,7 +40,7 @@ class Booster {
return x.first == name; return x.first == name;
}); });
if (it == cfg_.end()) { if (it == cfg_.end()) {
cfg_.push_back(std::make_pair(name, val)); cfg_.emplace_back(name, val);
} else { } else {
(*it).second = val; (*it).second = val;
} }
@ -193,11 +193,11 @@ struct XGBAPIThreadLocalEntry {
/*! \brief returning float vector. */ /*! \brief returning float vector. */
HostDeviceVector<bst_float> ret_vec_float; HostDeviceVector<bst_float> ret_vec_float;
/*! \brief temp variable of gradient pairs. */ /*! \brief temp variable of gradient pairs. */
HostDeviceVector<bst_gpair> tmp_gpair; HostDeviceVector<GradientPair> tmp_gpair;
}; };
// define the threadlocal store. // define the threadlocal store.
typedef dmlc::ThreadLocalStore<XGBAPIThreadLocalEntry> XGBAPIThreadLocalStore; using XGBAPIThreadLocalStore = dmlc::ThreadLocalStore<XGBAPIThreadLocalEntry>;
int XGDMatrixCreateFromFile(const char *fname, int XGDMatrixCreateFromFile(const char *fname,
int silent, int silent,
@ -254,14 +254,14 @@ XGB_DLL int XGDMatrixCreateFromCSREx(const size_t* indptr,
mat.row_ptr_.push_back(mat.row_data_.size()); mat.row_ptr_.push_back(mat.row_data_.size());
} }
mat.info.num_col = num_column; mat.info.num_col_ = num_column;
if (num_col > 0) { if (num_col > 0) {
CHECK_LE(mat.info.num_col, num_col) CHECK_LE(mat.info.num_col_, num_col)
<< "num_col=" << num_col << " vs " << mat.info.num_col; << "num_col=" << num_col << " vs " << mat.info.num_col_;
mat.info.num_col = num_col; mat.info.num_col_ = num_col;
} }
mat.info.num_row = nindptr - 1; mat.info.num_row_ = nindptr - 1;
mat.info.num_nonzero = mat.row_data_.size(); mat.info.num_nonzero_ = mat.row_data_.size();
*out = new std::shared_ptr<DMatrix>(DMatrix::Create(std::move(source))); *out = new std::shared_ptr<DMatrix>(DMatrix::Create(std::move(source)));
API_END(); API_END();
} }
@ -317,13 +317,13 @@ XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t* col_ptr,
} }
} }
} }
mat.info.num_row = mat.row_ptr_.size() - 1; mat.info.num_row_ = mat.row_ptr_.size() - 1;
if (num_row > 0) { if (num_row > 0) {
CHECK_LE(mat.info.num_row, num_row); CHECK_LE(mat.info.num_row_, num_row);
mat.info.num_row = num_row; mat.info.num_row_ = num_row;
} }
mat.info.num_col = ncol; mat.info.num_col_ = ncol;
mat.info.num_nonzero = nelem; mat.info.num_nonzero_ = nelem;
*out = new std::shared_ptr<DMatrix>(DMatrix::Create(std::move(source))); *out = new std::shared_ptr<DMatrix>(DMatrix::Create(std::move(source)));
API_END(); API_END();
} }
@ -353,8 +353,8 @@ XGB_DLL int XGDMatrixCreateFromMat(const bst_float* data,
data::SimpleCSRSource& mat = *source; data::SimpleCSRSource& mat = *source;
mat.row_ptr_.resize(1+nrow); mat.row_ptr_.resize(1+nrow);
bool nan_missing = common::CheckNAN(missing); bool nan_missing = common::CheckNAN(missing);
mat.info.num_row = nrow; mat.info.num_row_ = nrow;
mat.info.num_col = ncol; mat.info.num_col_ = ncol;
const bst_float* data0 = data; const bst_float* data0 = data;
// count elements for sizing data // count elements for sizing data
@ -389,12 +389,12 @@ XGB_DLL int XGDMatrixCreateFromMat(const bst_float* data,
} }
} }
mat.info.num_nonzero = mat.row_data_.size(); mat.info.num_nonzero_ = mat.row_data_.size();
*out = new std::shared_ptr<DMatrix>(DMatrix::Create(std::move(source))); *out = new std::shared_ptr<DMatrix>(DMatrix::Create(std::move(source)));
API_END(); API_END();
} }
void prefixsum_inplace(size_t *x, size_t N) { void PrefixSum(size_t *x, size_t N) {
size_t *suma; size_t *suma;
#pragma omp parallel #pragma omp parallel
{ {
@ -425,12 +425,10 @@ void prefixsum_inplace(size_t *x, size_t N) {
delete[] suma; delete[] suma;
} }
XGB_DLL int XGDMatrixCreateFromMat_omp(const bst_float* data, // NOLINT
XGB_DLL int XGDMatrixCreateFromMat_omp(const bst_float* data,
xgboost::bst_ulong nrow, xgboost::bst_ulong nrow,
xgboost::bst_ulong ncol, xgboost::bst_ulong ncol,
bst_float missing, bst_float missing, DMatrixHandle* out,
DMatrixHandle* out,
int nthread) { int nthread) {
// avoid openmp unless enough data to be worth it to avoid overhead costs // avoid openmp unless enough data to be worth it to avoid overhead costs
if (nrow*ncol <= 10000*50) { if (nrow*ncol <= 10000*50) {
@ -446,8 +444,8 @@ XGB_DLL int XGDMatrixCreateFromMat_omp(const bst_float* data,
std::unique_ptr<data::SimpleCSRSource> source(new data::SimpleCSRSource()); std::unique_ptr<data::SimpleCSRSource> source(new data::SimpleCSRSource());
data::SimpleCSRSource& mat = *source; data::SimpleCSRSource& mat = *source;
mat.row_ptr_.resize(1+nrow); mat.row_ptr_.resize(1+nrow);
mat.info.num_row = nrow; mat.info.num_row_ = nrow;
mat.info.num_col = ncol; mat.info.num_col_ = ncol;
// Check for errors in missing elements // Check for errors in missing elements
// Count elements per row (to avoid otherwise need to copy) // Count elements per row (to avoid otherwise need to copy)
@ -480,7 +478,7 @@ XGB_DLL int XGDMatrixCreateFromMat_omp(const bst_float* data,
} }
// do cumulative sum (to avoid otherwise need to copy) // do cumulative sum (to avoid otherwise need to copy)
prefixsum_inplace(&mat.row_ptr_[0], mat.row_ptr_.size()); PrefixSum(&mat.row_ptr_[0], mat.row_ptr_.size());
mat.row_data_.resize(mat.row_data_.size() + mat.row_ptr_.back()); mat.row_data_.resize(mat.row_data_.size() + mat.row_ptr_.back());
// Fill data matrix (now that know size, no need for slow push_back()) // Fill data matrix (now that know size, no need for slow push_back())
@ -500,7 +498,7 @@ XGB_DLL int XGDMatrixCreateFromMat_omp(const bst_float* data,
} }
} }
mat.info.num_nonzero = mat.row_data_.size(); mat.info.num_nonzero_ = mat.row_data_.size();
*out = new std::shared_ptr<DMatrix>(DMatrix::Create(std::move(source))); *out = new std::shared_ptr<DMatrix>(DMatrix::Create(std::move(source)));
API_END(); API_END();
} }
@ -516,12 +514,12 @@ XGB_DLL int XGDMatrixSliceDMatrix(DMatrixHandle handle,
src.CopyFrom(static_cast<std::shared_ptr<DMatrix>*>(handle)->get()); src.CopyFrom(static_cast<std::shared_ptr<DMatrix>*>(handle)->get());
data::SimpleCSRSource& ret = *source; data::SimpleCSRSource& ret = *source;
CHECK_EQ(src.info.group_ptr.size(), 0U) CHECK_EQ(src.info.group_ptr_.size(), 0U)
<< "slice does not support group structure"; << "slice does not support group structure";
ret.Clear(); ret.Clear();
ret.info.num_row = len; ret.info.num_row_ = len;
ret.info.num_col = src.info.num_col; ret.info.num_col_ = src.info.num_col_;
dmlc::DataIter<RowBatch>* iter = &src; dmlc::DataIter<RowBatch>* iter = &src;
iter->BeforeFirst(); iter->BeforeFirst();
@ -532,23 +530,22 @@ XGB_DLL int XGDMatrixSliceDMatrix(DMatrixHandle handle,
const int ridx = idxset[i]; const int ridx = idxset[i];
RowBatch::Inst inst = batch[ridx]; RowBatch::Inst inst = batch[ridx];
CHECK_LT(static_cast<xgboost::bst_ulong>(ridx), batch.size); CHECK_LT(static_cast<xgboost::bst_ulong>(ridx), batch.size);
ret.row_data_.resize(ret.row_data_.size() + inst.length); ret.row_data_.insert(ret.row_data_.end(), inst.data,
std::memcpy(dmlc::BeginPtr(ret.row_data_) + ret.row_ptr_.back(), inst.data, inst.data + inst.length);
sizeof(RowBatch::Entry) * inst.length);
ret.row_ptr_.push_back(ret.row_ptr_.back() + inst.length); ret.row_ptr_.push_back(ret.row_ptr_.back() + inst.length);
ret.info.num_nonzero += inst.length; ret.info.num_nonzero_ += inst.length;
if (src.info.labels.size() != 0) { if (src.info.labels_.size() != 0) {
ret.info.labels.push_back(src.info.labels[ridx]); ret.info.labels_.push_back(src.info.labels_[ridx]);
} }
if (src.info.weights.size() != 0) { if (src.info.weights_.size() != 0) {
ret.info.weights.push_back(src.info.weights[ridx]); ret.info.weights_.push_back(src.info.weights_[ridx]);
} }
if (src.info.base_margin.size() != 0) { if (src.info.base_margin_.size() != 0) {
ret.info.base_margin.push_back(src.info.base_margin[ridx]); ret.info.base_margin_.push_back(src.info.base_margin_[ridx]);
} }
if (src.info.root_index.size() != 0) { if (src.info.root_index_.size() != 0) {
ret.info.root_index.push_back(src.info.root_index[ridx]); ret.info.root_index_.push_back(src.info.root_index_[ridx]);
} }
} }
*out = new std::shared_ptr<DMatrix>(DMatrix::Create(std::move(source))); *out = new std::shared_ptr<DMatrix>(DMatrix::Create(std::move(source)));
@ -575,7 +572,7 @@ XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle,
xgboost::bst_ulong len) { xgboost::bst_ulong len) {
API_BEGIN(); API_BEGIN();
static_cast<std::shared_ptr<DMatrix>*>(handle) static_cast<std::shared_ptr<DMatrix>*>(handle)
->get()->info().SetInfo(field, info, kFloat32, len); ->get()->Info().SetInfo(field, info, kFloat32, len);
API_END(); API_END();
} }
@ -585,7 +582,7 @@ XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle,
xgboost::bst_ulong len) { xgboost::bst_ulong len) {
API_BEGIN(); API_BEGIN();
static_cast<std::shared_ptr<DMatrix>*>(handle) static_cast<std::shared_ptr<DMatrix>*>(handle)
->get()->info().SetInfo(field, info, kUInt32, len); ->get()->Info().SetInfo(field, info, kUInt32, len);
API_END(); API_END();
} }
@ -593,12 +590,12 @@ XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle,
const unsigned* group, const unsigned* group,
xgboost::bst_ulong len) { xgboost::bst_ulong len) {
API_BEGIN(); API_BEGIN();
std::shared_ptr<DMatrix> *pmat = static_cast<std::shared_ptr<DMatrix>*>(handle); auto *pmat = static_cast<std::shared_ptr<DMatrix>*>(handle);
MetaInfo& info = pmat->get()->info(); MetaInfo& info = pmat->get()->Info();
info.group_ptr.resize(len + 1); info.group_ptr_.resize(len + 1);
info.group_ptr[0] = 0; info.group_ptr_[0] = 0;
for (uint64_t i = 0; i < len; ++i) { for (uint64_t i = 0; i < len; ++i) {
info.group_ptr[i + 1] = info.group_ptr[i] + group[i]; info.group_ptr_[i + 1] = info.group_ptr_[i] + group[i];
} }
API_END(); API_END();
} }
@ -608,18 +605,18 @@ XGB_DLL int XGDMatrixGetFloatInfo(const DMatrixHandle handle,
xgboost::bst_ulong* out_len, xgboost::bst_ulong* out_len,
const bst_float** out_dptr) { const bst_float** out_dptr) {
API_BEGIN(); API_BEGIN();
const MetaInfo& info = static_cast<std::shared_ptr<DMatrix>*>(handle)->get()->info(); const MetaInfo& info = static_cast<std::shared_ptr<DMatrix>*>(handle)->get()->Info();
const std::vector<bst_float>* vec = nullptr; const std::vector<bst_float>* vec = nullptr;
if (!std::strcmp(field, "label")) { if (!std::strcmp(field, "label")) {
vec = &info.labels; vec = &info.labels_;
} else if (!std::strcmp(field, "weight")) { } else if (!std::strcmp(field, "weight")) {
vec = &info.weights; vec = &info.weights_;
} else if (!std::strcmp(field, "base_margin")) { } else if (!std::strcmp(field, "base_margin")) {
vec = &info.base_margin; vec = &info.base_margin_;
} else { } else {
LOG(FATAL) << "Unknown float field name " << field; LOG(FATAL) << "Unknown float field name " << field;
} }
*out_len = static_cast<xgboost::bst_ulong>(vec->size()); *out_len = static_cast<xgboost::bst_ulong>(vec->size()); // NOLINT
*out_dptr = dmlc::BeginPtr(*vec); *out_dptr = dmlc::BeginPtr(*vec);
API_END(); API_END();
} }
@ -629,15 +626,15 @@ XGB_DLL int XGDMatrixGetUIntInfo(const DMatrixHandle handle,
xgboost::bst_ulong *out_len, xgboost::bst_ulong *out_len,
const unsigned **out_dptr) { const unsigned **out_dptr) {
API_BEGIN(); API_BEGIN();
const MetaInfo& info = static_cast<std::shared_ptr<DMatrix>*>(handle)->get()->info(); const MetaInfo& info = static_cast<std::shared_ptr<DMatrix>*>(handle)->get()->Info();
const std::vector<unsigned>* vec = nullptr; const std::vector<unsigned>* vec = nullptr;
if (!std::strcmp(field, "root_index")) { if (!std::strcmp(field, "root_index")) {
vec = &info.root_index; vec = &info.root_index_;
*out_len = static_cast<xgboost::bst_ulong>(vec->size());
*out_dptr = dmlc::BeginPtr(*vec);
} else { } else {
LOG(FATAL) << "Unknown uint field name " << field; LOG(FATAL) << "Unknown uint field name " << field;
} }
*out_len = static_cast<xgboost::bst_ulong>(vec->size());
*out_dptr = dmlc::BeginPtr(*vec);
API_END(); API_END();
} }
@ -645,7 +642,7 @@ XGB_DLL int XGDMatrixNumRow(const DMatrixHandle handle,
xgboost::bst_ulong *out) { xgboost::bst_ulong *out) {
API_BEGIN(); API_BEGIN();
*out = static_cast<xgboost::bst_ulong>( *out = static_cast<xgboost::bst_ulong>(
static_cast<std::shared_ptr<DMatrix>*>(handle)->get()->info().num_row); static_cast<std::shared_ptr<DMatrix>*>(handle)->get()->Info().num_row_);
API_END(); API_END();
} }
@ -653,7 +650,7 @@ XGB_DLL int XGDMatrixNumCol(const DMatrixHandle handle,
xgboost::bst_ulong *out) { xgboost::bst_ulong *out) {
API_BEGIN(); API_BEGIN();
*out = static_cast<size_t>( *out = static_cast<size_t>(
static_cast<std::shared_ptr<DMatrix>*>(handle)->get()->info().num_col); static_cast<std::shared_ptr<DMatrix>*>(handle)->get()->Info().num_col_);
API_END(); API_END();
} }
@ -688,8 +685,8 @@ XGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle,
int iter, int iter,
DMatrixHandle dtrain) { DMatrixHandle dtrain) {
API_BEGIN(); API_BEGIN();
Booster* bst = static_cast<Booster*>(handle); auto* bst = static_cast<Booster*>(handle);
std::shared_ptr<DMatrix> *dtr = auto *dtr =
static_cast<std::shared_ptr<DMatrix>*>(dtrain); static_cast<std::shared_ptr<DMatrix>*>(dtrain);
bst->LazyInit(); bst->LazyInit();
@ -702,15 +699,15 @@ XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle,
bst_float *grad, bst_float *grad,
bst_float *hess, bst_float *hess,
xgboost::bst_ulong len) { xgboost::bst_ulong len) {
HostDeviceVector<bst_gpair>& tmp_gpair = XGBAPIThreadLocalStore::Get()->tmp_gpair; HostDeviceVector<GradientPair>& tmp_gpair = XGBAPIThreadLocalStore::Get()->tmp_gpair;
API_BEGIN(); API_BEGIN();
Booster* bst = static_cast<Booster*>(handle); auto* bst = static_cast<Booster*>(handle);
std::shared_ptr<DMatrix>* dtr = auto* dtr =
static_cast<std::shared_ptr<DMatrix>*>(dtrain); static_cast<std::shared_ptr<DMatrix>*>(dtrain);
tmp_gpair.resize(len); tmp_gpair.Resize(len);
std::vector<bst_gpair>& tmp_gpair_h = tmp_gpair.data_h(); std::vector<GradientPair>& tmp_gpair_h = tmp_gpair.HostVector();
for (xgboost::bst_ulong i = 0; i < len; ++i) { for (xgboost::bst_ulong i = 0; i < len; ++i) {
tmp_gpair_h[i] = bst_gpair(grad[i], hess[i]); tmp_gpair_h[i] = GradientPair(grad[i], hess[i]);
} }
bst->LazyInit(); bst->LazyInit();
@ -726,13 +723,13 @@ XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle,
const char** out_str) { const char** out_str) {
std::string& eval_str = XGBAPIThreadLocalStore::Get()->ret_str; std::string& eval_str = XGBAPIThreadLocalStore::Get()->ret_str;
API_BEGIN(); API_BEGIN();
Booster* bst = static_cast<Booster*>(handle); auto* bst = static_cast<Booster*>(handle);
std::vector<DMatrix*> data_sets; std::vector<DMatrix*> data_sets;
std::vector<std::string> data_names; std::vector<std::string> data_names;
for (xgboost::bst_ulong i = 0; i < len; ++i) { for (xgboost::bst_ulong i = 0; i < len; ++i) {
data_sets.push_back(static_cast<std::shared_ptr<DMatrix>*>(dmats[i])->get()); data_sets.push_back(static_cast<std::shared_ptr<DMatrix>*>(dmats[i])->get());
data_names.push_back(std::string(evnames[i])); data_names.emplace_back(evnames[i]);
} }
bst->LazyInit(); bst->LazyInit();
@ -750,7 +747,7 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
HostDeviceVector<bst_float>& preds = HostDeviceVector<bst_float>& preds =
XGBAPIThreadLocalStore::Get()->ret_vec_float; XGBAPIThreadLocalStore::Get()->ret_vec_float;
API_BEGIN(); API_BEGIN();
Booster *bst = static_cast<Booster*>(handle); auto *bst = static_cast<Booster*>(handle);
bst->LazyInit(); bst->LazyInit();
bst->learner()->Predict( bst->learner()->Predict(
static_cast<std::shared_ptr<DMatrix>*>(dmat)->get(), static_cast<std::shared_ptr<DMatrix>*>(dmat)->get(),
@ -760,8 +757,8 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
(option_mask & 4) != 0, (option_mask & 4) != 0,
(option_mask & 8) != 0, (option_mask & 8) != 0,
(option_mask & 16) != 0); (option_mask & 16) != 0);
*out_result = dmlc::BeginPtr(preds.data_h()); *out_result = dmlc::BeginPtr(preds.HostVector());
*len = static_cast<xgboost::bst_ulong>(preds.size()); *len = static_cast<xgboost::bst_ulong>(preds.Size());
API_END(); API_END();
} }
@ -775,7 +772,7 @@ XGB_DLL int XGBoosterLoadModel(BoosterHandle handle, const char* fname) {
XGB_DLL int XGBoosterSaveModel(BoosterHandle handle, const char* fname) { XGB_DLL int XGBoosterSaveModel(BoosterHandle handle, const char* fname) {
API_BEGIN(); API_BEGIN();
std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname, "w")); std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname, "w"));
Booster *bst = static_cast<Booster*>(handle); auto *bst = static_cast<Booster*>(handle);
bst->LazyInit(); bst->LazyInit();
bst->learner()->Save(fo.get()); bst->learner()->Save(fo.get());
API_END(); API_END();
@ -798,7 +795,7 @@ XGB_DLL int XGBoosterGetModelRaw(BoosterHandle handle,
API_BEGIN(); API_BEGIN();
common::MemoryBufferStream fo(&raw_str); common::MemoryBufferStream fo(&raw_str);
Booster *bst = static_cast<Booster*>(handle); auto *bst = static_cast<Booster*>(handle);
bst->LazyInit(); bst->LazyInit();
bst->learner()->Save(&fo); bst->learner()->Save(&fo);
*out_dptr = dmlc::BeginPtr(raw_str); *out_dptr = dmlc::BeginPtr(raw_str);
@ -815,7 +812,7 @@ inline void XGBoostDumpModelImpl(
const char*** out_models) { const char*** out_models) {
std::vector<std::string>& str_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_str; std::vector<std::string>& str_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_str;
std::vector<const char*>& charp_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_charp; std::vector<const char*>& charp_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_charp;
Booster *bst = static_cast<Booster*>(handle); auto *bst = static_cast<Booster*>(handle);
bst->LazyInit(); bst->LazyInit();
str_vecs = bst->learner()->DumpModel(fmap, with_stats != 0, format); str_vecs = bst->learner()->DumpModel(fmap, with_stats != 0, format);
charp_vecs.resize(str_vecs.size()); charp_vecs.resize(str_vecs.size());
@ -881,7 +878,7 @@ XGB_DLL int XGBoosterGetAttr(BoosterHandle handle,
const char* key, const char* key,
const char** out, const char** out,
int* success) { int* success) {
Booster* bst = static_cast<Booster*>(handle); auto* bst = static_cast<Booster*>(handle);
std::string& ret_str = XGBAPIThreadLocalStore::Get()->ret_str; std::string& ret_str = XGBAPIThreadLocalStore::Get()->ret_str;
API_BEGIN(); API_BEGIN();
if (bst->learner()->GetAttr(key, &ret_str)) { if (bst->learner()->GetAttr(key, &ret_str)) {
@ -897,7 +894,7 @@ XGB_DLL int XGBoosterGetAttr(BoosterHandle handle,
XGB_DLL int XGBoosterSetAttr(BoosterHandle handle, XGB_DLL int XGBoosterSetAttr(BoosterHandle handle,
const char* key, const char* key,
const char* value) { const char* value) {
Booster* bst = static_cast<Booster*>(handle); auto* bst = static_cast<Booster*>(handle);
API_BEGIN(); API_BEGIN();
if (value == nullptr) { if (value == nullptr) {
bst->learner()->DelAttr(key); bst->learner()->DelAttr(key);
@ -912,7 +909,7 @@ XGB_DLL int XGBoosterGetAttrNames(BoosterHandle handle,
const char*** out) { const char*** out) {
std::vector<std::string>& str_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_str; std::vector<std::string>& str_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_str;
std::vector<const char*>& charp_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_charp; std::vector<const char*>& charp_vecs = XGBAPIThreadLocalStore::Get()->ret_vec_charp;
Booster *bst = static_cast<Booster*>(handle); auto *bst = static_cast<Booster*>(handle);
API_BEGIN(); API_BEGIN();
str_vecs = bst->learner()->GetAttrNames(); str_vecs = bst->learner()->GetAttrNames();
charp_vecs.resize(str_vecs.size()); charp_vecs.resize(str_vecs.size());
@ -927,7 +924,7 @@ XGB_DLL int XGBoosterGetAttrNames(BoosterHandle handle,
XGB_DLL int XGBoosterLoadRabitCheckpoint(BoosterHandle handle, XGB_DLL int XGBoosterLoadRabitCheckpoint(BoosterHandle handle,
int* version) { int* version) {
API_BEGIN(); API_BEGIN();
Booster* bst = static_cast<Booster*>(handle); auto* bst = static_cast<Booster*>(handle);
*version = rabit::LoadCheckPoint(bst->learner()); *version = rabit::LoadCheckPoint(bst->learner());
if (*version != 0) { if (*version != 0) {
bst->initialized_ = true; bst->initialized_ = true;
@ -937,7 +934,7 @@ XGB_DLL int XGBoosterLoadRabitCheckpoint(BoosterHandle handle,
XGB_DLL int XGBoosterSaveRabitCheckpoint(BoosterHandle handle) { XGB_DLL int XGBoosterSaveRabitCheckpoint(BoosterHandle handle) {
API_BEGIN(); API_BEGIN();
Booster* bst = static_cast<Booster*>(handle); auto* bst = static_cast<Booster*>(handle);
if (bst->learner()->AllowLazyCheckPoint()) { if (bst->learner()->AllowLazyCheckPoint()) {
rabit::LazyCheckPoint(bst->learner()); rabit::LazyCheckPoint(bst->learner());
} else { } else {

View File

@ -10,7 +10,7 @@ struct XGBAPIErrorEntry {
std::string last_error; std::string last_error;
}; };
typedef dmlc::ThreadLocalStore<XGBAPIErrorEntry> XGBAPIErrorStore; using XGBAPIErrorStore = dmlc::ThreadLocalStore<XGBAPIErrorEntry>;
const char *XGBGetLastError() { const char *XGBGetLastError() {
return XGBAPIErrorStore::Get()->last_error.c_str(); return XGBAPIErrorStore::Get()->last_error.c_str();

View File

@ -134,7 +134,7 @@ struct CLIParam : public dmlc::Parameter<CLIParam> {
char evname[256]; char evname[256];
CHECK_EQ(sscanf(kv.first.c_str(), "eval[%[^]]", evname), 1) CHECK_EQ(sscanf(kv.first.c_str(), "eval[%[^]]", evname), 1)
<< "must specify evaluation name for display"; << "must specify evaluation name for display";
eval_data_names.push_back(std::string(evname)); eval_data_names.emplace_back(evname);
eval_data_paths.push_back(kv.second); eval_data_paths.push_back(kv.second);
} }
} }
@ -177,7 +177,7 @@ void CLITrain(const CLIParam& param) {
std::vector<std::string> eval_data_names = param.eval_data_names; std::vector<std::string> eval_data_names = param.eval_data_names;
if (param.eval_train) { if (param.eval_train) {
eval_datasets.push_back(dtrain.get()); eval_datasets.push_back(dtrain.get());
eval_data_names.push_back(std::string("train")); eval_data_names.emplace_back("train");
} }
// initialize the learner. // initialize the learner.
std::unique_ptr<Learner> learner(Learner::Create(cache_mats)); std::unique_ptr<Learner> learner(Learner::Create(cache_mats));
@ -332,7 +332,7 @@ void CLIPredict(const CLIParam& param) {
std::unique_ptr<dmlc::Stream> fo( std::unique_ptr<dmlc::Stream> fo(
dmlc::Stream::Create(param.name_pred.c_str(), "w")); dmlc::Stream::Create(param.name_pred.c_str(), "w"));
dmlc::ostream os(fo.get()); dmlc::ostream os(fo.get());
for (bst_float p : preds.data_h()) { for (bst_float p : preds.HostVector()) {
os << p << '\n'; os << p << '\n';
} }
// force flush before fo destruct. // force flush before fo destruct.
@ -347,17 +347,17 @@ int CLIRunTask(int argc, char *argv[]) {
rabit::Init(argc, argv); rabit::Init(argc, argv);
std::vector<std::pair<std::string, std::string> > cfg; std::vector<std::pair<std::string, std::string> > cfg;
cfg.push_back(std::make_pair("seed", "0")); cfg.emplace_back("seed", "0");
common::ConfigIterator itr(argv[1]); common::ConfigIterator itr(argv[1]);
while (itr.Next()) { while (itr.Next()) {
cfg.push_back(std::make_pair(std::string(itr.name()), std::string(itr.val()))); cfg.emplace_back(std::string(itr.Name()), std::string(itr.Val()));
} }
for (int i = 2; i < argc; ++i) { for (int i = 2; i < argc; ++i) {
char name[256], val[256]; char name[256], val[256];
if (sscanf(argv[i], "%[^=]=%s", name, val) == 2) { if (sscanf(argv[i], "%[^=]=%s", name, val) == 2) {
cfg.push_back(std::make_pair(std::string(name), std::string(val))); cfg.emplace_back(std::string(name), std::string(val));
} }
} }
CLIParam param; CLIParam param;

View File

@ -68,10 +68,10 @@ inline Float8 round(const Float8& x) {
// Overload std::max/min // Overload std::max/min
namespace std { namespace std {
inline avx::Float8 max(const avx::Float8& a, const avx::Float8& b) { inline avx::Float8 max(const avx::Float8& a, const avx::Float8& b) { // NOLINT
return avx::Float8(_mm256_max_ps(a.x, b.x)); return avx::Float8(_mm256_max_ps(a.x, b.x));
} }
inline avx::Float8 min(const avx::Float8& a, const avx::Float8& b) { inline avx::Float8 min(const avx::Float8& a, const avx::Float8& b) { // NOLINT
return avx::Float8(_mm256_min_ps(a.x, b.x)); return avx::Float8(_mm256_min_ps(a.x, b.x));
} }
} // namespace std } // namespace std
@ -172,7 +172,7 @@ inline Float8 Sigmoid(Float8 x) {
} }
// Store 8 gradient pairs given vectors containing gradient and Hessian // Store 8 gradient pairs given vectors containing gradient and Hessian
inline void StoreGpair(xgboost::bst_gpair* dst, const Float8& grad, inline void StoreGpair(xgboost::GradientPair* dst, const Float8& grad,
const Float8& hess) { const Float8& hess) {
float* ptr = reinterpret_cast<float*>(dst); float* ptr = reinterpret_cast<float*>(dst);
__m256 gpair_low = _mm256_unpacklo_ps(grad.x, hess.x); __m256 gpair_low = _mm256_unpacklo_ps(grad.x, hess.x);
@ -190,11 +190,11 @@ namespace avx {
* \brief Fallback implementation not using AVX. * \brief Fallback implementation not using AVX.
*/ */
struct Float8 { struct Float8 { // NOLINT
float x[8]; float x[8];
explicit Float8(const float& val) { explicit Float8(const float& val) {
for (int i = 0; i < 8; i++) { for (float & i : x) {
x[i] = val; i = val;
} }
} }
explicit Float8(const float* vec) { explicit Float8(const float* vec) {
@ -202,7 +202,7 @@ struct Float8 {
x[i] = vec[i]; x[i] = vec[i];
} }
} }
Float8() {} Float8() = default;
Float8& operator+=(const Float8& rhs) { Float8& operator+=(const Float8& rhs) {
for (int i = 0; i < 8; i++) { for (int i = 0; i < 8; i++) {
x[i] += rhs.x[i]; x[i] += rhs.x[i];
@ -228,7 +228,7 @@ struct Float8 {
return *this; return *this;
} }
void Print() { void Print() {
float* f = reinterpret_cast<float*>(&x); auto* f = reinterpret_cast<float*>(&x);
printf("%f %f %f %f %f %f %f %f\n", f[0], f[1], f[2], f[3], f[4], f[5], printf("%f %f %f %f %f %f %f %f\n", f[0], f[1], f[2], f[3], f[4], f[5],
f[6], f[7]); f[6], f[7]);
} }
@ -252,10 +252,10 @@ inline Float8 operator/(Float8 lhs, const Float8& rhs) {
} }
// Store 8 gradient pairs given vectors containing gradient and Hessian // Store 8 gradient pairs given vectors containing gradient and Hessian
inline void StoreGpair(xgboost::bst_gpair* dst, const Float8& grad, inline void StoreGpair(xgboost::GradientPair* dst, const Float8& grad,
const Float8& hess) { const Float8& hess) {
for (int i = 0; i < 8; i++) { for (int i = 0; i < 8; i++) {
dst[i] = xgboost::bst_gpair(grad.x[i], hess.x[i]); dst[i] = xgboost::GradientPair(grad.x[i], hess.x[i]);
} }
} }
@ -269,14 +269,14 @@ inline Float8 Sigmoid(Float8 x) {
} // namespace avx } // namespace avx
namespace std { namespace std {
inline avx::Float8 max(const avx::Float8& a, const avx::Float8& b) { inline avx::Float8 max(const avx::Float8& a, const avx::Float8& b) { // NOLINT
avx::Float8 max; avx::Float8 max;
for (int i = 0; i < 8; i++) { for (int i = 0; i < 8; i++) {
max.x[i] = std::max(a.x[i], b.x[i]); max.x[i] = std::max(a.x[i], b.x[i]);
} }
return max; return max;
} }
inline avx::Float8 min(const avx::Float8& a, const avx::Float8& b) { inline avx::Float8 min(const avx::Float8& a, const avx::Float8& b) { // NOLINT
avx::Float8 min; avx::Float8 min;
for (int i = 0; i < 8; i++) { for (int i = 0; i < 8; i++) {
min.x[i] = std::min(a.x[i], b.x[i]); min.x[i] = std::min(a.x[i], b.x[i]);

View File

@ -42,7 +42,7 @@ struct BitMap {
inline void InitFromBool(const std::vector<int>& vec) { inline void InitFromBool(const std::vector<int>& vec) {
this->Resize(vec.size()); this->Resize(vec.size());
// parallel over the full cases // parallel over the full cases
bst_omp_uint nsize = static_cast<bst_omp_uint>(vec.size() / 32); auto nsize = static_cast<bst_omp_uint>(vec.size() / 32);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nsize; ++i) { for (bst_omp_uint i = 0; i < nsize; ++i) {
uint32_t res = 0; uint32_t res = 0;

View File

@ -9,20 +9,26 @@
#define XGBOOST_COMMON_COLUMN_MATRIX_H_ #define XGBOOST_COMMON_COLUMN_MATRIX_H_
#define XGBOOST_TYPE_SWITCH(dtype, OP) \ #define XGBOOST_TYPE_SWITCH(dtype, OP) \
switch (dtype) { \ \
case xgboost::common::uint32 : { \ switch(dtype) { \
typedef uint32_t DType; \ case xgboost::common::uint32: { \
OP; break; \ using DType = uint32_t; \
OP; \
break; \
} \ } \
case xgboost::common::uint16 : { \ case xgboost::common::uint16: { \
typedef uint16_t DType; \ using DType = uint16_t; \
OP; break; \ OP; \
break; \
} \ } \
case xgboost::common::uint8 : { \ case xgboost::common::uint8: { \
typedef uint8_t DType; \ using DType = uint8_t; \
OP; break; \ OP; \
default: LOG(FATAL) << "don't recognize type flag" << dtype; \ break; \
default: \
LOG(FATAL) << "don't recognize type flag" << dtype; \
} \ } \
\
} }
#include <type_traits> #include <type_traits>
@ -31,11 +37,12 @@ switch (dtype) { \
#include "hist_util.h" #include "hist_util.h"
#include "../tree/fast_hist_param.h" #include "../tree/fast_hist_param.h"
using xgboost::tree::FastHistParam;
namespace xgboost { namespace xgboost {
namespace common { namespace common {
using tree::FastHistParam;
/*! \brief indicator of data type used for storing bin id's in a column. */ /*! \brief indicator of data type used for storing bin id's in a column. */
enum DataType { enum DataType {
uint8 = 1, uint8 = 1,
@ -78,7 +85,7 @@ class ColumnMatrix {
slot of internal buffer. */ slot of internal buffer. */
packing_factor_ = sizeof(uint32_t) / static_cast<size_t>(this->dtype); packing_factor_ = sizeof(uint32_t) / static_cast<size_t>(this->dtype);
const bst_uint nfeature = static_cast<bst_uint>(gmat.cut->row_ptr.size() - 1); const auto nfeature = static_cast<bst_uint>(gmat.cut->row_ptr.size() - 1);
const size_t nrow = gmat.row_ptr.size() - 1; const size_t nrow = gmat.row_ptr.size() - 1;
// identify type of each column // identify type of each column

View File

@ -14,7 +14,7 @@ struct RandomThreadLocalEntry {
GlobalRandomEngine engine; GlobalRandomEngine engine;
}; };
typedef dmlc::ThreadLocalStore<RandomThreadLocalEntry> RandomThreadLocalStore; using RandomThreadLocalStore = dmlc::ThreadLocalStore<RandomThreadLocalEntry>;
GlobalRandomEngine& GlobalRandom() { GlobalRandomEngine& GlobalRandom() {
return RandomThreadLocalStore::Get()->engine; return RandomThreadLocalStore::Get()->engine;

View File

@ -11,20 +11,20 @@
namespace xgboost { namespace xgboost {
namespace common { namespace common {
typedef unsigned char compressed_byte_t; using CompressedByteT = unsigned char;
namespace detail { namespace detail {
inline void SetBit(compressed_byte_t *byte, int bit_idx) { inline void SetBit(CompressedByteT *byte, int bit_idx) {
*byte |= 1 << bit_idx; *byte |= 1 << bit_idx;
} }
template <typename T> template <typename T>
inline T CheckBit(const T &byte, int bit_idx) { inline T CheckBit(const T &byte, int bit_idx) {
return byte & (1 << bit_idx); return byte & (1 << bit_idx);
} }
inline void ClearBit(compressed_byte_t *byte, int bit_idx) { inline void ClearBit(CompressedByteT *byte, int bit_idx) {
*byte &= ~(1 << bit_idx); *byte &= ~(1 << bit_idx);
} }
static const int padding = 4; // Assign padding so we can read slightly off static const int kPadding = 4; // Assign padding so we can read slightly off
// the beginning of the array // the beginning of the array
// The number of bits required to represent a given unsigned range // The number of bits required to represent a given unsigned range
@ -76,16 +76,16 @@ class CompressedBufferWriter {
size_t compressed_size = static_cast<size_t>(std::ceil( size_t compressed_size = static_cast<size_t>(std::ceil(
static_cast<double>(detail::SymbolBits(num_symbols) * num_elements) / static_cast<double>(detail::SymbolBits(num_symbols) * num_elements) /
bits_per_byte)); bits_per_byte));
return compressed_size + detail::padding; return compressed_size + detail::kPadding;
} }
template <typename T> template <typename T>
void WriteSymbol(compressed_byte_t *buffer, T symbol, size_t offset) { void WriteSymbol(CompressedByteT *buffer, T symbol, size_t offset) {
const int bits_per_byte = 8; const int bits_per_byte = 8;
for (size_t i = 0; i < symbol_bits_; i++) { for (size_t i = 0; i < symbol_bits_; i++) {
size_t byte_idx = ((offset + 1) * symbol_bits_ - (i + 1)) / bits_per_byte; size_t byte_idx = ((offset + 1) * symbol_bits_ - (i + 1)) / bits_per_byte;
byte_idx += detail::padding; byte_idx += detail::kPadding;
size_t bit_idx = size_t bit_idx =
((bits_per_byte + i) - ((offset + 1) * symbol_bits_)) % bits_per_byte; ((bits_per_byte + i) - ((offset + 1) * symbol_bits_)) % bits_per_byte;
@ -96,20 +96,20 @@ class CompressedBufferWriter {
} }
} }
} }
template <typename iter_t> template <typename IterT>
void Write(compressed_byte_t *buffer, iter_t input_begin, iter_t input_end) { void Write(CompressedByteT *buffer, IterT input_begin, IterT input_end) {
uint64_t tmp = 0; uint64_t tmp = 0;
size_t stored_bits = 0; size_t stored_bits = 0;
const size_t max_stored_bits = 64 - symbol_bits_; const size_t max_stored_bits = 64 - symbol_bits_;
size_t buffer_position = detail::padding; size_t buffer_position = detail::kPadding;
const size_t num_symbols = input_end - input_begin; const size_t num_symbols = input_end - input_begin;
for (size_t i = 0; i < num_symbols; i++) { for (size_t i = 0; i < num_symbols; i++) {
typename std::iterator_traits<iter_t>::value_type symbol = input_begin[i]; typename std::iterator_traits<IterT>::value_type symbol = input_begin[i];
if (stored_bits > max_stored_bits) { if (stored_bits > max_stored_bits) {
// Eject only full bytes // Eject only full bytes
size_t tmp_bytes = stored_bits / 8; size_t tmp_bytes = stored_bits / 8;
for (size_t j = 0; j < tmp_bytes; j++) { for (size_t j = 0; j < tmp_bytes; j++) {
buffer[buffer_position] = static_cast<compressed_byte_t>( buffer[buffer_position] = static_cast<CompressedByteT>(
tmp >> (stored_bits - (j + 1) * 8)); tmp >> (stored_bits - (j + 1) * 8));
buffer_position++; buffer_position++;
} }
@ -129,10 +129,10 @@ class CompressedBufferWriter {
int shift_bits = static_cast<int>(stored_bits) - (j + 1) * 8; int shift_bits = static_cast<int>(stored_bits) - (j + 1) * 8;
if (shift_bits >= 0) { if (shift_bits >= 0) {
buffer[buffer_position] = buffer[buffer_position] =
static_cast<compressed_byte_t>(tmp >> shift_bits); static_cast<CompressedByteT>(tmp >> shift_bits);
} else { } else {
buffer[buffer_position] = buffer[buffer_position] =
static_cast<compressed_byte_t>(tmp << std::abs(shift_bits)); static_cast<CompressedByteT>(tmp << std::abs(shift_bits));
} }
buffer_position++; buffer_position++;
} }
@ -153,23 +153,21 @@ template <typename T>
class CompressedIterator { class CompressedIterator {
public: public:
typedef CompressedIterator<T> self_type; ///< My own type // Type definitions for thrust
typedef ptrdiff_t typedef CompressedIterator<T> self_type; // NOLINT
difference_type; ///< Type to express the result of subtracting typedef ptrdiff_t difference_type; // NOLINT
/// one iterator from another typedef T value_type; // NOLINT
typedef T value_type; ///< The type of the element the iterator can point to typedef value_type *pointer; // NOLINT
typedef value_type *pointer; ///< The type of a pointer to an element the typedef value_type reference; // NOLINT
/// iterator can point to
typedef value_type reference; ///< The type of a reference to an element the
/// iterator can point to
private: private:
compressed_byte_t *buffer_; CompressedByteT *buffer_;
size_t symbol_bits_; size_t symbol_bits_;
size_t offset_; size_t offset_;
public: public:
CompressedIterator() : buffer_(nullptr), symbol_bits_(0), offset_(0) {} CompressedIterator() : buffer_(nullptr), symbol_bits_(0), offset_(0) {}
CompressedIterator(compressed_byte_t *buffer, int num_symbols) CompressedIterator(CompressedByteT *buffer, int num_symbols)
: buffer_(buffer), offset_(0) { : buffer_(buffer), offset_(0) {
symbol_bits_ = detail::SymbolBits(num_symbols); symbol_bits_ = detail::SymbolBits(num_symbols);
} }
@ -178,7 +176,7 @@ class CompressedIterator {
const int bits_per_byte = 8; const int bits_per_byte = 8;
size_t start_bit_idx = ((offset_ + 1) * symbol_bits_ - 1); size_t start_bit_idx = ((offset_ + 1) * symbol_bits_ - 1);
size_t start_byte_idx = start_bit_idx / bits_per_byte; size_t start_byte_idx = start_bit_idx / bits_per_byte;
start_byte_idx += detail::padding; start_byte_idx += detail::kPadding;
// Read 5 bytes - the maximum we will need // Read 5 bytes - the maximum we will need
uint64_t tmp = static_cast<uint64_t>(buffer_[start_byte_idx - 4]) << 32 | uint64_t tmp = static_cast<uint64_t>(buffer_[start_byte_idx - 4]) << 32 |

View File

@ -24,33 +24,33 @@ class ConfigReaderBase {
* \brief get current name, called after Next returns true * \brief get current name, called after Next returns true
* \return current parameter name * \return current parameter name
*/ */
inline const char *name(void) const { inline const char *Name() const {
return s_name.c_str(); return s_name_.c_str();
} }
/*! /*!
* \brief get current value, called after Next returns true * \brief get current value, called after Next returns true
* \return current parameter value * \return current parameter value
*/ */
inline const char *val(void) const { inline const char *Val() const {
return s_val.c_str(); return s_val_.c_str();
} }
/*! /*!
* \brief move iterator to next position * \brief move iterator to next position
* \return true if there is value in next position * \return true if there is value in next position
*/ */
inline bool Next(void) { inline bool Next() {
while (!this->IsEnd()) { while (!this->IsEnd()) {
GetNextToken(&s_name); GetNextToken(&s_name_);
if (s_name == "=") return false; if (s_name_ == "=") return false;
if (GetNextToken(&s_buf) || s_buf != "=") return false; if (GetNextToken(&s_buf_) || s_buf_ != "=") return false;
if (GetNextToken(&s_val) || s_val == "=") return false; if (GetNextToken(&s_val_) || s_val_ == "=") return false;
return true; return true;
} }
return false; return false;
} }
// called before usage // called before usage
inline void Init(void) { inline void Init() {
ch_buf = this->GetChar(); ch_buf_ = this->GetChar();
} }
protected: protected:
@ -58,38 +58,38 @@ class ConfigReaderBase {
* \brief to be implemented by subclass, * \brief to be implemented by subclass,
* get next token, return EOF if end of file * get next token, return EOF if end of file
*/ */
virtual char GetChar(void) = 0; virtual char GetChar() = 0;
/*! \brief to be implemented by child, check if end of stream */ /*! \brief to be implemented by child, check if end of stream */
virtual bool IsEnd(void) = 0; virtual bool IsEnd() = 0;
private: private:
char ch_buf; char ch_buf_;
std::string s_name, s_val, s_buf; std::string s_name_, s_val_, s_buf_;
inline void SkipLine(void) { inline void SkipLine() {
do { do {
ch_buf = this->GetChar(); ch_buf_ = this->GetChar();
} while (ch_buf != EOF && ch_buf != '\n' && ch_buf != '\r'); } while (ch_buf_ != EOF && ch_buf_ != '\n' && ch_buf_ != '\r');
} }
inline void ParseStr(std::string *tok) { inline void ParseStr(std::string *tok) {
while ((ch_buf = this->GetChar()) != EOF) { while ((ch_buf_ = this->GetChar()) != EOF) {
switch (ch_buf) { switch (ch_buf_) {
case '\\': *tok += this->GetChar(); break; case '\\': *tok += this->GetChar(); break;
case '\"': return; case '\"': return;
case '\r': case '\r':
case '\n': LOG(FATAL)<< "ConfigReader: unterminated string"; case '\n': LOG(FATAL)<< "ConfigReader: unterminated string";
default: *tok += ch_buf; default: *tok += ch_buf_;
} }
} }
LOG(FATAL) << "ConfigReader: unterminated string"; LOG(FATAL) << "ConfigReader: unterminated string";
} }
inline void ParseStrML(std::string *tok) { inline void ParseStrML(std::string *tok) {
while ((ch_buf = this->GetChar()) != EOF) { while ((ch_buf_ = this->GetChar()) != EOF) {
switch (ch_buf) { switch (ch_buf_) {
case '\\': *tok += this->GetChar(); break; case '\\': *tok += this->GetChar(); break;
case '\'': return; case '\'': return;
default: *tok += ch_buf; default: *tok += ch_buf_;
} }
} }
LOG(FATAL) << "unterminated string"; LOG(FATAL) << "unterminated string";
@ -98,24 +98,24 @@ class ConfigReaderBase {
inline bool GetNextToken(std::string *tok) { inline bool GetNextToken(std::string *tok) {
tok->clear(); tok->clear();
bool new_line = false; bool new_line = false;
while (ch_buf != EOF) { while (ch_buf_ != EOF) {
switch (ch_buf) { switch (ch_buf_) {
case '#' : SkipLine(); new_line = true; break; case '#' : SkipLine(); new_line = true; break;
case '\"': case '\"':
if (tok->length() == 0) { if (tok->length() == 0) {
ParseStr(tok); ch_buf = this->GetChar(); return new_line; ParseStr(tok); ch_buf_ = this->GetChar(); return new_line;
} else { } else {
LOG(FATAL) << "ConfigReader: token followed directly by string"; LOG(FATAL) << "ConfigReader: token followed directly by string";
} }
case '\'': case '\'':
if (tok->length() == 0) { if (tok->length() == 0) {
ParseStrML(tok); ch_buf = this->GetChar(); return new_line; ParseStrML(tok); ch_buf_ = this->GetChar(); return new_line;
} else { } else {
LOG(FATAL) << "ConfigReader: token followed directly by string"; LOG(FATAL) << "ConfigReader: token followed directly by string";
} }
case '=': case '=':
if (tok->length() == 0) { if (tok->length() == 0) {
ch_buf = this->GetChar(); ch_buf_ = this->GetChar();
*tok = '='; *tok = '=';
} }
return new_line; return new_line;
@ -124,12 +124,12 @@ class ConfigReaderBase {
if (tok->length() == 0) new_line = true; if (tok->length() == 0) new_line = true;
case '\t': case '\t':
case ' ' : case ' ' :
ch_buf = this->GetChar(); ch_buf_ = this->GetChar();
if (tok->length() != 0) return new_line; if (tok->length() != 0) return new_line;
break; break;
default: default:
*tok += ch_buf; *tok += ch_buf_;
ch_buf = this->GetChar(); ch_buf_ = this->GetChar();
break; break;
} }
} }
@ -149,19 +149,19 @@ class ConfigStreamReader: public ConfigReaderBase {
* \brief constructor * \brief constructor
* \param fin istream input stream * \param fin istream input stream
*/ */
explicit ConfigStreamReader(std::istream &fin) : fin(fin) {} explicit ConfigStreamReader(std::istream &fin) : fin_(fin) {}
protected: protected:
virtual char GetChar(void) { char GetChar() override {
return fin.get(); return fin_.get();
} }
/*! \brief to be implemented by child, check if end of stream */ /*! \brief to be implemented by child, check if end of stream */
virtual bool IsEnd(void) { bool IsEnd() override {
return fin.eof(); return fin_.eof();
} }
private: private:
std::istream &fin; std::istream &fin_;
}; };
/*! /*!
@ -173,20 +173,20 @@ class ConfigIterator: public ConfigStreamReader {
* \brief constructor * \brief constructor
* \param fname name of configure file * \param fname name of configure file
*/ */
explicit ConfigIterator(const char *fname) : ConfigStreamReader(fi) { explicit ConfigIterator(const char *fname) : ConfigStreamReader(fi_) {
fi.open(fname); fi_.open(fname);
if (fi.fail()) { if (fi_.fail()) {
LOG(FATAL) << "cannot open file " << fname; LOG(FATAL) << "cannot open file " << fname;
} }
ConfigReaderBase::Init(); ConfigReaderBase::Init();
} }
/*! \brief destructor */ /*! \brief destructor */
~ConfigIterator(void) { ~ConfigIterator() {
fi.close(); fi_.close();
} }
private: private:
std::ifstream fi; std::ifstream fi_;
}; };
} // namespace common } // namespace common
} // namespace xgboost } // namespace xgboost

View File

@ -25,16 +25,16 @@
namespace dh { namespace dh {
#define HOST_DEV_INLINE __host__ __device__ __forceinline__ #define HOST_DEV_INLINE XGBOOST_DEVICE __forceinline__
#define DEV_INLINE __device__ __forceinline__ #define DEV_INLINE __device__ __forceinline__
/* /*
* Error handling functions * Error handling functions
*/ */
#define safe_cuda(ans) throw_on_cuda_error((ans), __FILE__, __LINE__) #define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)
inline cudaError_t throw_on_cuda_error(cudaError_t code, const char *file, inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file,
int line) { int line) {
if (code != cudaSuccess) { if (code != cudaSuccess) {
std::stringstream ss; std::stringstream ss;
@ -48,9 +48,9 @@ inline cudaError_t throw_on_cuda_error(cudaError_t code, const char *file,
} }
#ifdef XGBOOST_USE_NCCL #ifdef XGBOOST_USE_NCCL
#define safe_nccl(ans) throw_on_nccl_error((ans), __FILE__, __LINE__) #define safe_nccl(ans) ThrowOnNcclError((ans), __FILE__, __LINE__)
inline ncclResult_t throw_on_nccl_error(ncclResult_t code, const char *file, inline ncclResult_t ThrowOnNcclError(ncclResult_t code, const char *file,
int line) { int line) {
if (code != ncclSuccess) { if (code != ncclSuccess) {
std::stringstream ss; std::stringstream ss;
@ -64,16 +64,16 @@ inline ncclResult_t throw_on_nccl_error(ncclResult_t code, const char *file,
#endif #endif
template <typename T> template <typename T>
T *raw(thrust::device_vector<T> &v) { // NOLINT T *Raw(thrust::device_vector<T> &v) { // NOLINT
return raw_pointer_cast(v.data()); return raw_pointer_cast(v.data());
} }
template <typename T> template <typename T>
const T *raw(const thrust::device_vector<T> &v) { // NOLINT const T *Raw(const thrust::device_vector<T> &v) { // NOLINT
return raw_pointer_cast(v.data()); return raw_pointer_cast(v.data());
} }
inline int n_visible_devices() { inline int NVisibleDevices() {
int n_visgpus = 0; int n_visgpus = 0;
dh::safe_cuda(cudaGetDeviceCount(&n_visgpus)); dh::safe_cuda(cudaGetDeviceCount(&n_visgpus));
@ -81,40 +81,40 @@ inline int n_visible_devices() {
return n_visgpus; return n_visgpus;
} }
inline int n_devices_all(int n_gpus) { inline int NDevicesAll(int n_gpus) {
int n_devices_visible = dh::n_visible_devices(); int n_devices_visible = dh::NVisibleDevices();
int n_devices = n_gpus < 0 ? n_devices_visible : n_gpus; int n_devices = n_gpus < 0 ? n_devices_visible : n_gpus;
return (n_devices); return (n_devices);
} }
inline int n_devices(int n_gpus, int num_rows) { inline int NDevices(int n_gpus, int num_rows) {
int n_devices = dh::n_devices_all(n_gpus); int n_devices = dh::NDevicesAll(n_gpus);
// fix-up device number to be limited by number of rows // fix-up device number to be limited by number of rows
n_devices = n_devices > num_rows ? num_rows : n_devices; n_devices = n_devices > num_rows ? num_rows : n_devices;
return (n_devices); return (n_devices);
} }
// if n_devices=-1, then use all visible devices // if n_devices=-1, then use all visible devices
inline void synchronize_n_devices(int n_devices, std::vector<int> dList) { inline void SynchronizeNDevices(int n_devices, std::vector<int> dList) {
for (int d_idx = 0; d_idx < n_devices; d_idx++) { for (int d_idx = 0; d_idx < n_devices; d_idx++) {
int device_idx = dList[d_idx]; int device_idx = dList[d_idx];
safe_cuda(cudaSetDevice(device_idx)); safe_cuda(cudaSetDevice(device_idx));
safe_cuda(cudaDeviceSynchronize()); safe_cuda(cudaDeviceSynchronize());
} }
} }
inline void synchronize_all() { inline void SynchronizeAll() {
for (int device_idx = 0; device_idx < n_visible_devices(); device_idx++) { for (int device_idx = 0; device_idx < NVisibleDevices(); device_idx++) {
safe_cuda(cudaSetDevice(device_idx)); safe_cuda(cudaSetDevice(device_idx));
safe_cuda(cudaDeviceSynchronize()); safe_cuda(cudaDeviceSynchronize());
} }
} }
inline std::string device_name(int device_idx) { inline std::string DeviceName(int device_idx) {
cudaDeviceProp prop; cudaDeviceProp prop;
dh::safe_cuda(cudaGetDeviceProperties(&prop, device_idx)); dh::safe_cuda(cudaGetDeviceProperties(&prop, device_idx));
return std::string(prop.name); return std::string(prop.name);
} }
inline size_t available_memory(int device_idx) { inline size_t AvailableMemory(int device_idx) {
size_t device_free = 0; size_t device_free = 0;
size_t device_total = 0; size_t device_total = 0;
safe_cuda(cudaSetDevice(device_idx)); safe_cuda(cudaSetDevice(device_idx));
@ -130,20 +130,20 @@ inline size_t available_memory(int device_idx) {
* \param device_idx Zero-based index of the device. * \param device_idx Zero-based index of the device.
*/ */
inline size_t max_shared_memory(int device_idx) { inline size_t MaxSharedMemory(int device_idx) {
cudaDeviceProp prop; cudaDeviceProp prop;
dh::safe_cuda(cudaGetDeviceProperties(&prop, device_idx)); dh::safe_cuda(cudaGetDeviceProperties(&prop, device_idx));
return prop.sharedMemPerBlock; return prop.sharedMemPerBlock;
} }
// ensure gpu_id is correct, so not dependent upon user knowing details // ensure gpu_id is correct, so not dependent upon user knowing details
inline int get_device_idx(int gpu_id) { inline int GetDeviceIdx(int gpu_id) {
// protect against overrun for gpu_id // protect against overrun for gpu_id
return (std::abs(gpu_id) + 0) % dh::n_visible_devices(); return (std::abs(gpu_id) + 0) % dh::NVisibleDevices();
} }
inline void check_compute_capability() { inline void CheckComputeCapability() {
int n_devices = n_visible_devices(); int n_devices = NVisibleDevices();
for (int d_idx = 0; d_idx < n_devices; ++d_idx) { for (int d_idx = 0; d_idx < n_devices; ++d_idx) {
cudaDeviceProp prop; cudaDeviceProp prop;
safe_cuda(cudaGetDeviceProperties(&prop, d_idx)); safe_cuda(cudaGetDeviceProperties(&prop, d_idx));
@ -159,72 +159,72 @@ inline void check_compute_capability() {
* Range iterator * Range iterator
*/ */
class range { class Range {
public: public:
class iterator { class Iterator {
friend class range; friend class Range;
public: public:
__host__ __device__ int64_t operator*() const { return i_; } XGBOOST_DEVICE int64_t operator*() const { return i_; }
__host__ __device__ const iterator &operator++() { XGBOOST_DEVICE const Iterator &operator++() {
i_ += step_; i_ += step_;
return *this; return *this;
} }
__host__ __device__ iterator operator++(int) { XGBOOST_DEVICE Iterator operator++(int) {
iterator copy(*this); Iterator copy(*this);
i_ += step_; i_ += step_;
return copy; return copy;
} }
__host__ __device__ bool operator==(const iterator &other) const { XGBOOST_DEVICE bool operator==(const Iterator &other) const {
return i_ >= other.i_; return i_ >= other.i_;
} }
__host__ __device__ bool operator!=(const iterator &other) const { XGBOOST_DEVICE bool operator!=(const Iterator &other) const {
return i_ < other.i_; return i_ < other.i_;
} }
__host__ __device__ void step(int s) { step_ = s; } XGBOOST_DEVICE void Step(int s) { step_ = s; }
protected: protected:
__host__ __device__ explicit iterator(int64_t start) : i_(start) {} XGBOOST_DEVICE explicit Iterator(int64_t start) : i_(start) {}
public: public:
uint64_t i_; uint64_t i_;
int step_ = 1; int step_ = 1;
}; };
__host__ __device__ iterator begin() const { return begin_; } XGBOOST_DEVICE Iterator begin() const { return begin_; } // NOLINT
__host__ __device__ iterator end() const { return end_; } XGBOOST_DEVICE Iterator end() const { return end_; } // NOLINT
__host__ __device__ range(int64_t begin, int64_t end) XGBOOST_DEVICE Range(int64_t begin, int64_t end)
: begin_(begin), end_(end) {} : begin_(begin), end_(end) {}
__host__ __device__ void step(int s) { begin_.step(s); } XGBOOST_DEVICE void Step(int s) { begin_.Step(s); }
private: private:
iterator begin_; Iterator begin_;
iterator end_; Iterator end_;
}; };
template <typename T> template <typename T>
__device__ range grid_stride_range(T begin, T end) { __device__ Range GridStrideRange(T begin, T end) {
begin += blockDim.x * blockIdx.x + threadIdx.x; begin += blockDim.x * blockIdx.x + threadIdx.x;
range r(begin, end); Range r(begin, end);
r.step(gridDim.x * blockDim.x); r.Step(gridDim.x * blockDim.x);
return r; return r;
} }
template <typename T> template <typename T>
__device__ range block_stride_range(T begin, T end) { __device__ Range BlockStrideRange(T begin, T end) {
begin += threadIdx.x; begin += threadIdx.x;
range r(begin, end); Range r(begin, end);
r.step(blockDim.x); r.Step(blockDim.x);
return r; return r;
} }
// Threadblock iterates over range, filling with value. Requires all threads in // Threadblock iterates over range, filling with value. Requires all threads in
// block to be active. // block to be active.
template <typename IterT, typename ValueT> template <typename IterT, typename ValueT>
__device__ void block_fill(IterT begin, size_t n, ValueT value) { __device__ void BlockFill(IterT begin, size_t n, ValueT value) {
for (auto i : block_stride_range(static_cast<size_t>(0), n)) { for (auto i : BlockStrideRange(static_cast<size_t>(0), n)) {
begin[i] = value; begin[i] = value;
} }
} }
@ -234,34 +234,34 @@ __device__ void block_fill(IterT begin, size_t n, ValueT value) {
*/ */
template <typename T1, typename T2> template <typename T1, typename T2>
T1 div_round_up(const T1 a, const T2 b) { T1 DivRoundUp(const T1 a, const T2 b) {
return static_cast<T1>(ceil(static_cast<double>(a) / b)); return static_cast<T1>(ceil(static_cast<double>(a) / b));
} }
template <typename L> template <typename L>
__global__ void launch_n_kernel(size_t begin, size_t end, L lambda) { __global__ void LaunchNKernel(size_t begin, size_t end, L lambda) {
for (auto i : grid_stride_range(begin, end)) { for (auto i : GridStrideRange(begin, end)) {
lambda(i); lambda(i);
} }
} }
template <typename L> template <typename L>
__global__ void launch_n_kernel(int device_idx, size_t begin, size_t end, __global__ void LaunchNKernel(int device_idx, size_t begin, size_t end,
L lambda) { L lambda) {
for (auto i : grid_stride_range(begin, end)) { for (auto i : GridStrideRange(begin, end)) {
lambda(i, device_idx); lambda(i, device_idx);
} }
} }
template <int ITEMS_PER_THREAD = 8, int BLOCK_THREADS = 256, typename L> template <int ITEMS_PER_THREAD = 8, int BLOCK_THREADS = 256, typename L>
inline void launch_n(int device_idx, size_t n, L lambda) { inline void LaunchN(int device_idx, size_t n, L lambda) {
if (n == 0) { if (n == 0) {
return; return;
} }
safe_cuda(cudaSetDevice(device_idx)); safe_cuda(cudaSetDevice(device_idx));
const int GRID_SIZE = const int GRID_SIZE =
static_cast<int>(div_round_up(n, ITEMS_PER_THREAD * BLOCK_THREADS)); static_cast<int>(DivRoundUp(n, ITEMS_PER_THREAD * BLOCK_THREADS));
launch_n_kernel<<<GRID_SIZE, BLOCK_THREADS>>>(static_cast<size_t>(0), n, LaunchNKernel<<<GRID_SIZE, BLOCK_THREADS>>>(static_cast<size_t>(0), n,
lambda); lambda);
} }
@ -269,91 +269,91 @@ inline void launch_n(int device_idx, size_t n, L lambda) {
* Memory * Memory
*/ */
enum memory_type { DEVICE, DEVICE_MANAGED }; enum MemoryType { kDevice, kDeviceManaged };
template <memory_type MemoryT> template <MemoryType MemoryT>
class bulk_allocator; class BulkAllocator;
template <typename T> template <typename T>
class dvec2; class DVec2;
template <typename T> template <typename T>
class dvec { class DVec {
friend class dvec2<T>; friend class DVec2<T>;
private: private:
T *_ptr; T *ptr_;
size_t _size; size_t size_;
int _device_idx; int device_idx_;
public: public:
void external_allocate(int device_idx, void *ptr, size_t size) { void ExternalAllocate(int device_idx, void *ptr, size_t size) {
if (!empty()) { if (!Empty()) {
throw std::runtime_error("Tried to allocate dvec but already allocated"); throw std::runtime_error("Tried to allocate DVec but already allocated");
} }
_ptr = static_cast<T *>(ptr); ptr_ = static_cast<T *>(ptr);
_size = size; size_ = size;
_device_idx = device_idx; device_idx_ = device_idx;
safe_cuda(cudaSetDevice(_device_idx)); safe_cuda(cudaSetDevice(device_idx_));
} }
dvec() : _ptr(NULL), _size(0), _device_idx(-1) {} DVec() : ptr_(NULL), size_(0), device_idx_(-1) {}
size_t size() const { return _size; } size_t Size() const { return size_; }
int device_idx() const { return _device_idx; } int DeviceIdx() const { return device_idx_; }
bool empty() const { return _ptr == NULL || _size == 0; } bool Empty() const { return ptr_ == NULL || size_ == 0; }
T *data() { return _ptr; } T *Data() { return ptr_; }
const T *data() const { return _ptr; } const T *Data() const { return ptr_; }
std::vector<T> as_vector() const { std::vector<T> AsVector() const {
std::vector<T> h_vector(size()); std::vector<T> h_vector(Size());
safe_cuda(cudaSetDevice(_device_idx)); safe_cuda(cudaSetDevice(device_idx_));
safe_cuda(cudaMemcpy(h_vector.data(), _ptr, size() * sizeof(T), safe_cuda(cudaMemcpy(h_vector.data(), ptr_, Size() * sizeof(T),
cudaMemcpyDeviceToHost)); cudaMemcpyDeviceToHost));
return h_vector; return h_vector;
} }
void fill(T value) { void Fill(T value) {
auto d_ptr = _ptr; auto d_ptr = ptr_;
launch_n(_device_idx, size(), LaunchN(device_idx_, Size(),
[=] __device__(size_t idx) { d_ptr[idx] = value; }); [=] __device__(size_t idx) { d_ptr[idx] = value; });
} }
void print() { void Print() {
auto h_vector = this->as_vector(); auto h_vector = this->AsVector();
for (auto e : h_vector) { for (auto e : h_vector) {
std::cout << e << " "; std::cout << e << " ";
} }
std::cout << "\n"; std::cout << "\n";
} }
thrust::device_ptr<T> tbegin() { return thrust::device_pointer_cast(_ptr); } thrust::device_ptr<T> tbegin() { return thrust::device_pointer_cast(ptr_); }
thrust::device_ptr<T> tend() { thrust::device_ptr<T> tend() {
return thrust::device_pointer_cast(_ptr + size()); return thrust::device_pointer_cast(ptr_ + Size());
} }
template <typename T2> template <typename T2>
dvec &operator=(const std::vector<T2> &other) { DVec &operator=(const std::vector<T2> &other) {
this->copy(other.begin(), other.end()); this->copy(other.begin(), other.end());
return *this; return *this;
} }
dvec &operator=(dvec<T> &other) { DVec &operator=(DVec<T> &other) {
if (other.size() != size()) { if (other.Size() != Size()) {
throw std::runtime_error( throw std::runtime_error(
"Cannot copy assign dvec to dvec, sizes are different"); "Cannot copy assign DVec to DVec, sizes are different");
} }
safe_cuda(cudaSetDevice(this->device_idx())); safe_cuda(cudaSetDevice(this->DeviceIdx()));
if (other.device_idx() == this->device_idx()) { if (other.DeviceIdx() == this->DeviceIdx()) {
dh::safe_cuda(cudaMemcpy(this->data(), other.data(), dh::safe_cuda(cudaMemcpy(this->Data(), other.Data(),
other.size() * sizeof(T), other.Size() * sizeof(T),
cudaMemcpyDeviceToDevice)); cudaMemcpyDeviceToDevice));
} else { } else {
std::cout << "deviceother: " << other.device_idx() std::cout << "deviceother: " << other.DeviceIdx()
<< " devicethis: " << this->device_idx() << std::endl; << " devicethis: " << this->DeviceIdx() << std::endl;
std::cout << "size deviceother: " << other.size() std::cout << "size deviceother: " << other.Size()
<< " devicethis: " << this->device_idx() << std::endl; << " devicethis: " << this->DeviceIdx() << std::endl;
throw std::runtime_error("Cannot copy to/from different devices"); throw std::runtime_error("Cannot copy to/from different devices");
} }
@ -362,177 +362,178 @@ class dvec {
template <typename IterT> template <typename IterT>
void copy(IterT begin, IterT end) { void copy(IterT begin, IterT end) {
safe_cuda(cudaSetDevice(this->device_idx())); safe_cuda(cudaSetDevice(this->DeviceIdx()));
if (end - begin != size()) { if (end - begin != Size()) {
throw std::runtime_error( throw std::runtime_error(
"Cannot copy assign vector to dvec, sizes are different"); "Cannot copy assign vector to DVec, sizes are different");
} }
thrust::copy(begin, end, this->tbegin()); thrust::copy(begin, end, this->tbegin());
} }
void copy(thrust::device_ptr<T> begin, thrust::device_ptr<T> end) { void copy(thrust::device_ptr<T> begin, thrust::device_ptr<T> end) {
safe_cuda(cudaSetDevice(this->device_idx())); safe_cuda(cudaSetDevice(this->DeviceIdx()));
if (end - begin != size()) { if (end - begin != Size()) {
throw std::runtime_error( throw std::runtime_error(
"Cannot copy assign vector to dvec, sizes are different"); "Cannot copy assign vector to DVec, sizes are different");
} }
safe_cuda(cudaMemcpy(this->data(), begin.get(), safe_cuda(cudaMemcpy(this->Data(), begin.get(),
size() * sizeof(T), cudaMemcpyDefault)); Size() * sizeof(T), cudaMemcpyDefault));
} }
}; };
/** /**
* @class dvec2 device_helpers.cuh * @class DVec2 device_helpers.cuh
* @brief wrapper for storing 2 dvec's which are needed for cub::DoubleBuffer * @brief wrapper for storing 2 DVec's which are needed for cub::DoubleBuffer
*/ */
template <typename T> template <typename T>
class dvec2 { class DVec2 {
private: private:
dvec<T> _d1, _d2; DVec<T> d1_, d2_;
cub::DoubleBuffer<T> _buff; cub::DoubleBuffer<T> buff_;
int _device_idx; int device_idx_;
public: public:
void external_allocate(int device_idx, void *ptr1, void *ptr2, size_t size) { void ExternalAllocate(int device_idx, void *ptr1, void *ptr2, size_t size) {
if (!empty()) { if (!Empty()) {
throw std::runtime_error("Tried to allocate dvec2 but already allocated"); throw std::runtime_error("Tried to allocate DVec2 but already allocated");
} }
_device_idx = device_idx; device_idx_ = device_idx;
_d1.external_allocate(_device_idx, ptr1, size); d1_.ExternalAllocate(device_idx_, ptr1, size);
_d2.external_allocate(_device_idx, ptr2, size); d2_.ExternalAllocate(device_idx_, ptr2, size);
_buff.d_buffers[0] = static_cast<T *>(ptr1); buff_.d_buffers[0] = static_cast<T *>(ptr1);
_buff.d_buffers[1] = static_cast<T *>(ptr2); buff_.d_buffers[1] = static_cast<T *>(ptr2);
_buff.selector = 0; buff_.selector = 0;
} }
dvec2() : _d1(), _d2(), _buff(), _device_idx(-1) {} DVec2() : d1_(), d2_(), buff_(), device_idx_(-1) {}
size_t size() const { return _d1.size(); } size_t Size() const { return d1_.Size(); }
int device_idx() const { return _device_idx; } int DeviceIdx() const { return device_idx_; }
bool empty() const { return _d1.empty() || _d2.empty(); } bool Empty() const { return d1_.Empty() || d2_.Empty(); }
cub::DoubleBuffer<T> &buff() { return _buff; } cub::DoubleBuffer<T> &buff() { return buff_; }
dvec<T> &d1() { return _d1; } DVec<T> &D1() { return d1_; }
dvec<T> &d2() { return _d2; }
T *current() { return _buff.Current(); } DVec<T> &D2() { return d2_; }
dvec<T> &current_dvec() { return _buff.selector == 0 ? d1() : d2(); } T *Current() { return buff_.Current(); }
T *other() { return _buff.Alternate(); } DVec<T> &CurrentDVec() { return buff_.selector == 0 ? D1() : D2(); }
T *other() { return buff_.Alternate(); }
}; };
template <memory_type MemoryT> template <MemoryType MemoryT>
class bulk_allocator { class BulkAllocator {
std::vector<char *> d_ptr; std::vector<char *> d_ptr_;
std::vector<size_t> _size; std::vector<size_t> size_;
std::vector<int> _device_idx; std::vector<int> device_idx_;
const int align = 256; static const int kAlign = 256;
size_t align_round_up(size_t n) const { size_t AlignRoundUp(size_t n) const {
n = (n + align - 1) / align; n = (n + kAlign - 1) / kAlign;
return n * align; return n * kAlign;
} }
template <typename T> template <typename T>
size_t get_size_bytes(dvec<T> *first_vec, size_t first_size) { size_t GetSizeBytes(DVec<T> *first_vec, size_t first_size) {
return align_round_up(first_size * sizeof(T)); return AlignRoundUp(first_size * sizeof(T));
} }
template <typename T, typename... Args> template <typename T, typename... Args>
size_t get_size_bytes(dvec<T> *first_vec, size_t first_size, Args... args) { size_t GetSizeBytes(DVec<T> *first_vec, size_t first_size, Args... args) {
return get_size_bytes<T>(first_vec, first_size) + get_size_bytes(args...); return GetSizeBytes<T>(first_vec, first_size) + GetSizeBytes(args...);
} }
template <typename T> template <typename T>
void allocate_dvec(int device_idx, char *ptr, dvec<T> *first_vec, void AllocateDVec(int device_idx, char *ptr, DVec<T> *first_vec,
size_t first_size) { size_t first_size) {
first_vec->external_allocate(device_idx, static_cast<void *>(ptr), first_vec->ExternalAllocate(device_idx, static_cast<void *>(ptr),
first_size); first_size);
} }
template <typename T, typename... Args> template <typename T, typename... Args>
void allocate_dvec(int device_idx, char *ptr, dvec<T> *first_vec, void AllocateDVec(int device_idx, char *ptr, DVec<T> *first_vec,
size_t first_size, Args... args) { size_t first_size, Args... args) {
allocate_dvec<T>(device_idx, ptr, first_vec, first_size); AllocateDVec<T>(device_idx, ptr, first_vec, first_size);
ptr += align_round_up(first_size * sizeof(T)); ptr += AlignRoundUp(first_size * sizeof(T));
allocate_dvec(device_idx, ptr, args...); AllocateDVec(device_idx, ptr, args...);
} }
char *allocate_device(int device_idx, size_t bytes, memory_type t) { char *AllocateDevice(int device_idx, size_t bytes, MemoryType t) {
char *ptr; char *ptr;
safe_cuda(cudaSetDevice(device_idx)); safe_cuda(cudaSetDevice(device_idx));
safe_cuda(cudaMalloc(&ptr, bytes)); safe_cuda(cudaMalloc(&ptr, bytes));
return ptr; return ptr;
} }
template <typename T> template <typename T>
size_t get_size_bytes(dvec2<T> *first_vec, size_t first_size) { size_t GetSizeBytes(DVec2<T> *first_vec, size_t first_size) {
return 2 * align_round_up(first_size * sizeof(T)); return 2 * AlignRoundUp(first_size * sizeof(T));
} }
template <typename T, typename... Args> template <typename T, typename... Args>
size_t get_size_bytes(dvec2<T> *first_vec, size_t first_size, Args... args) { size_t GetSizeBytes(DVec2<T> *first_vec, size_t first_size, Args... args) {
return get_size_bytes<T>(first_vec, first_size) + get_size_bytes(args...); return GetSizeBytes<T>(first_vec, first_size) + GetSizeBytes(args...);
} }
template <typename T> template <typename T>
void allocate_dvec(int device_idx, char *ptr, dvec2<T> *first_vec, void AllocateDVec(int device_idx, char *ptr, DVec2<T> *first_vec,
size_t first_size) { size_t first_size) {
first_vec->external_allocate( first_vec->ExternalAllocate(
device_idx, static_cast<void *>(ptr), device_idx, static_cast<void *>(ptr),
static_cast<void *>(ptr + align_round_up(first_size * sizeof(T))), static_cast<void *>(ptr + AlignRoundUp(first_size * sizeof(T))),
first_size); first_size);
} }
template <typename T, typename... Args> template <typename T, typename... Args>
void allocate_dvec(int device_idx, char *ptr, dvec2<T> *first_vec, void AllocateDVec(int device_idx, char *ptr, DVec2<T> *first_vec,
size_t first_size, Args... args) { size_t first_size, Args... args) {
allocate_dvec<T>(device_idx, ptr, first_vec, first_size); AllocateDVec<T>(device_idx, ptr, first_vec, first_size);
ptr += (align_round_up(first_size * sizeof(T)) * 2); ptr += (AlignRoundUp(first_size * sizeof(T)) * 2);
allocate_dvec(device_idx, ptr, args...); AllocateDVec(device_idx, ptr, args...);
} }
public: public:
bulk_allocator() {} BulkAllocator() = default;
// prevent accidental copying, moving or assignment of this object // prevent accidental copying, moving or assignment of this object
bulk_allocator(const bulk_allocator<MemoryT>&) = delete; BulkAllocator(const BulkAllocator<MemoryT>&) = delete;
bulk_allocator(bulk_allocator<MemoryT>&&) = delete; BulkAllocator(BulkAllocator<MemoryT>&&) = delete;
void operator=(const bulk_allocator<MemoryT>&) = delete; void operator=(const BulkAllocator<MemoryT>&) = delete;
void operator=(bulk_allocator<MemoryT>&&) = delete; void operator=(BulkAllocator<MemoryT>&&) = delete;
~bulk_allocator() { ~BulkAllocator() {
for (size_t i = 0; i < d_ptr.size(); i++) { for (size_t i = 0; i < d_ptr_.size(); i++) {
if (!(d_ptr[i] == nullptr)) { if (!(d_ptr_[i] == nullptr)) {
safe_cuda(cudaSetDevice(_device_idx[i])); safe_cuda(cudaSetDevice(device_idx_[i]));
safe_cuda(cudaFree(d_ptr[i])); safe_cuda(cudaFree(d_ptr_[i]));
d_ptr[i] = nullptr; d_ptr_[i] = nullptr;
} }
} }
} }
// returns sum of bytes for all allocations // returns sum of bytes for all allocations
size_t size() { size_t Size() {
return std::accumulate(_size.begin(), _size.end(), static_cast<size_t>(0)); return std::accumulate(size_.begin(), size_.end(), static_cast<size_t>(0));
} }
template <typename... Args> template <typename... Args>
void allocate(int device_idx, bool silent, Args... args) { void Allocate(int device_idx, bool silent, Args... args) {
size_t size = get_size_bytes(args...); size_t size = GetSizeBytes(args...);
char *ptr = allocate_device(device_idx, size, MemoryT); char *ptr = AllocateDevice(device_idx, size, MemoryT);
allocate_dvec(device_idx, ptr, args...); AllocateDVec(device_idx, ptr, args...);
d_ptr.push_back(ptr); d_ptr_.push_back(ptr);
_size.push_back(size); size_.push_back(size);
_device_idx.push_back(device_idx); device_idx_.push_back(device_idx);
if (!silent) { if (!silent) {
const int mb_size = 1048576; const int mb_size = 1048576;
LOG(CONSOLE) << "Allocated " << size / mb_size << "MB on [" << device_idx LOG(CONSOLE) << "Allocated " << size / mb_size << "MB on [" << device_idx
<< "] " << device_name(device_idx) << ", " << "] " << DeviceName(device_idx) << ", "
<< available_memory(device_idx) / mb_size << "MB remaining."; << AvailableMemory(device_idx) / mb_size << "MB remaining.";
} }
} }
}; };
@ -543,7 +544,7 @@ struct CubMemory {
size_t temp_storage_bytes; size_t temp_storage_bytes;
// Thrust // Thrust
typedef char value_type; using ValueT = char;
CubMemory() : d_temp_storage(nullptr), temp_storage_bytes(0) {} CubMemory() : d_temp_storage(nullptr), temp_storage_bytes(0) {}
@ -568,17 +569,18 @@ struct CubMemory {
} }
} }
// Thrust // Thrust
char *allocate(std::ptrdiff_t num_bytes) { char *allocate(std::ptrdiff_t num_bytes) { // NOLINT
LazyAllocate(num_bytes); LazyAllocate(num_bytes);
return reinterpret_cast<char *>(d_temp_storage); return reinterpret_cast<char *>(d_temp_storage);
} }
// Thrust // Thrust
void deallocate(char *ptr, size_t n) { void deallocate(char *ptr, size_t n) { // NOLINT
// Do nothing // Do nothing
} }
bool IsAllocated() { return d_temp_storage != NULL; } bool IsAllocated() { return d_temp_storage != nullptr; }
}; };
/* /*
@ -586,7 +588,7 @@ struct CubMemory {
*/ */
template <typename T> template <typename T>
void print(const dvec<T> &v, size_t max_items = 10) { void Print(const DVec<T> &v, size_t max_items = 10) {
std::vector<T> h = v.as_vector(); std::vector<T> h = v.as_vector();
for (size_t i = 0; i < std::min(max_items, h.size()); i++) { for (size_t i = 0; i < std::min(max_items, h.size()); i++) {
std::cout << " " << h[i]; std::cout << " " << h[i];
@ -609,14 +611,14 @@ void print(const dvec<T> &v, size_t max_items = 10) {
// Load balancing search // Load balancing search
template <typename coordinate_t, typename segments_t, typename offset_t> template <typename CoordinateT, typename SegmentT, typename OffsetT>
void FindMergePartitions(int device_idx, coordinate_t *d_tile_coordinates, void FindMergePartitions(int device_idx, CoordinateT *d_tile_coordinates,
size_t num_tiles, int tile_size, segments_t segments, size_t num_tiles, int tile_size, SegmentT segments,
offset_t num_rows, offset_t num_elements) { OffsetT num_rows, OffsetT num_elements) {
dh::launch_n(device_idx, num_tiles + 1, [=] __device__(int idx) { dh::LaunchN(device_idx, num_tiles + 1, [=] __device__(int idx) {
offset_t diagonal = idx * tile_size; OffsetT diagonal = idx * tile_size;
coordinate_t tile_coordinate; CoordinateT tile_coordinate;
cub::CountingInputIterator<offset_t> nonzero_indices(0); cub::CountingInputIterator<OffsetT> nonzero_indices(0);
// Search the merge path // Search the merge path
// Cast to signed integer as this function can have negatives // Cast to signed integer as this function can have negatives
@ -630,27 +632,27 @@ void FindMergePartitions(int device_idx, coordinate_t *d_tile_coordinates,
} }
template <int TILE_SIZE, int ITEMS_PER_THREAD, int BLOCK_THREADS, template <int TILE_SIZE, int ITEMS_PER_THREAD, int BLOCK_THREADS,
typename offset_t, typename coordinate_t, typename func_t, typename OffsetT, typename CoordinateT, typename FunctionT,
typename segments_iter> typename SegmentIterT>
__global__ void LbsKernel(coordinate_t *d_coordinates, __global__ void LbsKernel(CoordinateT *d_coordinates,
segments_iter segment_end_offsets, func_t f, SegmentIterT segment_end_offsets, FunctionT f,
offset_t num_segments) { OffsetT num_segments) {
int tile = blockIdx.x; int tile = blockIdx.x;
coordinate_t tile_start_coord = d_coordinates[tile]; CoordinateT tile_start_coord = d_coordinates[tile];
coordinate_t tile_end_coord = d_coordinates[tile + 1]; CoordinateT tile_end_coord = d_coordinates[tile + 1];
int64_t tile_num_rows = tile_end_coord.x - tile_start_coord.x; int64_t tile_num_rows = tile_end_coord.x - tile_start_coord.x;
int64_t tile_num_elements = tile_end_coord.y - tile_start_coord.y; int64_t tile_num_elements = tile_end_coord.y - tile_start_coord.y;
cub::CountingInputIterator<offset_t> tile_element_indices(tile_start_coord.y); cub::CountingInputIterator<OffsetT> tile_element_indices(tile_start_coord.y);
coordinate_t thread_start_coord; CoordinateT thread_start_coord;
typedef typename std::iterator_traits<segments_iter>::value_type segment_t; typedef typename std::iterator_traits<SegmentIterT>::value_type SegmentT;
__shared__ struct { __shared__ struct {
segment_t tile_segment_end_offsets[TILE_SIZE + 1]; SegmentT tile_segment_end_offsets[TILE_SIZE + 1];
segment_t output_segment[TILE_SIZE]; SegmentT output_segment[TILE_SIZE];
} temp_storage; } temp_storage;
for (auto item : dh::block_stride_range(int(0), int(tile_num_rows + 1))) { for (auto item : dh::BlockStrideRange(int(0), int(tile_num_rows + 1))) {
temp_storage.tile_segment_end_offsets[item] = temp_storage.tile_segment_end_offsets[item] =
segment_end_offsets[min(static_cast<size_t>(tile_start_coord.x + item), segment_end_offsets[min(static_cast<size_t>(tile_start_coord.x + item),
static_cast<size_t>(num_segments - 1))]; static_cast<size_t>(num_segments - 1))];
@ -665,7 +667,7 @@ __global__ void LbsKernel(coordinate_t *d_coordinates,
tile_element_indices, // List B tile_element_indices, // List B
tile_num_rows, tile_num_elements, thread_start_coord); tile_num_rows, tile_num_elements, thread_start_coord);
coordinate_t thread_current_coord = thread_start_coord; CoordinateT thread_current_coord = thread_start_coord;
#pragma unroll #pragma unroll
for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) {
if (tile_element_indices[thread_current_coord.y] < if (tile_element_indices[thread_current_coord.y] <
@ -679,50 +681,50 @@ __global__ void LbsKernel(coordinate_t *d_coordinates,
} }
__syncthreads(); __syncthreads();
for (auto item : dh::block_stride_range(int(0), int(tile_num_elements))) { for (auto item : dh::BlockStrideRange(int(0), int(tile_num_elements))) {
f(tile_start_coord.y + item, temp_storage.output_segment[item]); f(tile_start_coord.y + item, temp_storage.output_segment[item]);
} }
} }
template <typename func_t, typename segments_iter, typename offset_t> template <typename FunctionT, typename SegmentIterT, typename OffsetT>
void SparseTransformLbs(int device_idx, dh::CubMemory *temp_memory, void SparseTransformLbs(int device_idx, dh::CubMemory *temp_memory,
offset_t count, segments_iter segments, OffsetT count, SegmentIterT segments,
offset_t num_segments, func_t f) { OffsetT num_segments, FunctionT f) {
typedef typename cub::CubVector<offset_t, 2>::Type coordinate_t; typedef typename cub::CubVector<OffsetT, 2>::Type CoordinateT;
dh::safe_cuda(cudaSetDevice(device_idx)); dh::safe_cuda(cudaSetDevice(device_idx));
const int BLOCK_THREADS = 256; const int BLOCK_THREADS = 256;
const int ITEMS_PER_THREAD = 1; const int ITEMS_PER_THREAD = 1;
const int TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD; const int TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD;
auto num_tiles = dh::div_round_up(count + num_segments, BLOCK_THREADS); auto num_tiles = dh::DivRoundUp(count + num_segments, BLOCK_THREADS);
CHECK(num_tiles < std::numeric_limits<unsigned int>::max()); CHECK(num_tiles < std::numeric_limits<unsigned int>::max());
temp_memory->LazyAllocate(sizeof(coordinate_t) * (num_tiles + 1)); temp_memory->LazyAllocate(sizeof(CoordinateT) * (num_tiles + 1));
coordinate_t *tmp_tile_coordinates = CoordinateT *tmp_tile_coordinates =
reinterpret_cast<coordinate_t *>(temp_memory->d_temp_storage); reinterpret_cast<CoordinateT *>(temp_memory->d_temp_storage);
FindMergePartitions(device_idx, tmp_tile_coordinates, num_tiles, FindMergePartitions(device_idx, tmp_tile_coordinates, num_tiles,
BLOCK_THREADS, segments, num_segments, count); BLOCK_THREADS, segments, num_segments, count);
LbsKernel<TILE_SIZE, ITEMS_PER_THREAD, BLOCK_THREADS, offset_t> LbsKernel<TILE_SIZE, ITEMS_PER_THREAD, BLOCK_THREADS, OffsetT>
<<<uint32_t(num_tiles), BLOCK_THREADS>>>(tmp_tile_coordinates, <<<uint32_t(num_tiles), BLOCK_THREADS>>>(tmp_tile_coordinates,
segments + 1, f, num_segments); segments + 1, f, num_segments);
} }
template <typename func_t, typename offset_t> template <typename FunctionT, typename OffsetT>
void DenseTransformLbs(int device_idx, offset_t count, offset_t num_segments, void DenseTransformLbs(int device_idx, OffsetT count, OffsetT num_segments,
func_t f) { FunctionT f) {
CHECK(count % num_segments == 0) << "Data is not dense."; CHECK(count % num_segments == 0) << "Data is not dense.";
launch_n(device_idx, count, [=] __device__(offset_t idx) { LaunchN(device_idx, count, [=] __device__(OffsetT idx) {
offset_t segment = idx / (count / num_segments); OffsetT segment = idx / (count / num_segments);
f(idx, segment); f(idx, segment);
}); });
} }
/** /**
* \fn template <typename func_t, typename segments_iter, typename offset_t> * \fn template <typename FunctionT, typename SegmentIterT, typename OffsetT>
* void TransformLbs(int device_idx, dh::CubMemory *temp_memory, offset_t count, * void TransformLbs(int device_idx, dh::CubMemory *temp_memory, OffsetT count,
* segments_iter segments, offset_t num_segments, bool is_dense, func_t f) * SegmentIterT segments, OffsetT num_segments, bool is_dense, FunctionT f)
* *
* \brief Load balancing search function. Reads a CSR type matrix description * \brief Load balancing search function. Reads a CSR type matrix description
* and allows a function to be executed on each element. Search 'modern GPU load * and allows a function to be executed on each element. Search 'modern GPU load
@ -731,9 +733,9 @@ void DenseTransformLbs(int device_idx, offset_t count, offset_t num_segments,
* \author Rory * \author Rory
* \date 7/9/2017 * \date 7/9/2017
* *
* \tparam func_t Type of the function t. * \tparam FunctionT Type of the function t.
* \tparam segments_iter Type of the segments iterator. * \tparam SegmentIterT Type of the segments iterator.
* \tparam offset_t Type of the offset. * \tparam OffsetT Type of the offset.
* \param device_idx Zero-based index of the device. * \param device_idx Zero-based index of the device.
* \param [in,out] temp_memory Temporary memory allocator. * \param [in,out] temp_memory Temporary memory allocator.
* \param count Number of elements. * \param count Number of elements.
@ -743,10 +745,10 @@ void DenseTransformLbs(int device_idx, offset_t count, offset_t num_segments,
* \param f Lambda to be executed on matrix elements. * \param f Lambda to be executed on matrix elements.
*/ */
template <typename func_t, typename segments_iter, typename offset_t> template <typename FunctionT, typename SegmentIterT, typename OffsetT>
void TransformLbs(int device_idx, dh::CubMemory *temp_memory, offset_t count, void TransformLbs(int device_idx, dh::CubMemory *temp_memory, OffsetT count,
segments_iter segments, offset_t num_segments, bool is_dense, SegmentIterT segments, OffsetT num_segments, bool is_dense,
func_t f) { FunctionT f) {
if (is_dense) { if (is_dense) {
DenseTransformLbs(device_idx, count, num_segments, f); DenseTransformLbs(device_idx, count, num_segments, f);
} else { } else {
@ -765,18 +767,18 @@ void TransformLbs(int device_idx, dh::CubMemory *temp_memory, offset_t count,
* @param offsets the segments * @param offsets the segments
*/ */
template <typename T1, typename T2> template <typename T1, typename T2>
void segmentedSort(dh::CubMemory *tmp_mem, dh::dvec2<T1> *keys, void SegmentedSort(dh::CubMemory *tmp_mem, dh::DVec2<T1> *keys,
dh::dvec2<T2> *vals, int nVals, int nSegs, dh::DVec2<T2> *vals, int nVals, int nSegs,
const dh::dvec<int> &offsets, int start = 0, const dh::DVec<int> &offsets, int start = 0,
int end = sizeof(T1) * 8) { int end = sizeof(T1) * 8) {
size_t tmpSize; size_t tmpSize;
dh::safe_cuda(cub::DeviceSegmentedRadixSort::SortPairs( dh::safe_cuda(cub::DeviceSegmentedRadixSort::SortPairs(
NULL, tmpSize, keys->buff(), vals->buff(), nVals, nSegs, offsets.data(), NULL, tmpSize, keys->buff(), vals->buff(), nVals, nSegs, offsets.Data(),
offsets.data() + 1, start, end)); offsets.Data() + 1, start, end));
tmp_mem->LazyAllocate(tmpSize); tmp_mem->LazyAllocate(tmpSize);
dh::safe_cuda(cub::DeviceSegmentedRadixSort::SortPairs( dh::safe_cuda(cub::DeviceSegmentedRadixSort::SortPairs(
tmp_mem->d_temp_storage, tmpSize, keys->buff(), vals->buff(), nVals, tmp_mem->d_temp_storage, tmpSize, keys->buff(), vals->buff(), nVals,
nSegs, offsets.data(), offsets.data() + 1, start, end)); nSegs, offsets.Data(), offsets.Data() + 1, start, end));
} }
/** /**
@ -787,14 +789,14 @@ void segmentedSort(dh::CubMemory *tmp_mem, dh::dvec2<T1> *keys,
* @param nVals number of elements in the input array * @param nVals number of elements in the input array
*/ */
template <typename T> template <typename T>
void sumReduction(dh::CubMemory &tmp_mem, dh::dvec<T> &in, dh::dvec<T> &out, void SumReduction(dh::CubMemory &tmp_mem, dh::DVec<T> &in, dh::DVec<T> &out,
int nVals) { int nVals) {
size_t tmpSize; size_t tmpSize;
dh::safe_cuda( dh::safe_cuda(
cub::DeviceReduce::Sum(NULL, tmpSize, in.data(), out.data(), nVals)); cub::DeviceReduce::Sum(NULL, tmpSize, in.Data(), out.Data(), nVals));
tmp_mem.LazyAllocate(tmpSize); tmp_mem.LazyAllocate(tmpSize);
dh::safe_cuda(cub::DeviceReduce::Sum(tmp_mem.d_temp_storage, tmpSize, dh::safe_cuda(cub::DeviceReduce::Sum(tmp_mem.d_temp_storage, tmpSize,
in.data(), out.data(), nVals)); in.Data(), out.Data(), nVals));
} }
/** /**
@ -805,7 +807,7 @@ void sumReduction(dh::CubMemory &tmp_mem, dh::dvec<T> &in, dh::dvec<T> &out,
* @param nVals number of elements in the input array * @param nVals number of elements in the input array
*/ */
template <typename T> template <typename T>
T sumReduction(dh::CubMemory &tmp_mem, T *in, int nVals) { T SumReduction(dh::CubMemory &tmp_mem, T *in, int nVals) {
size_t tmpSize; size_t tmpSize;
dh::safe_cuda(cub::DeviceReduce::Sum(nullptr, tmpSize, in, in, nVals)); dh::safe_cuda(cub::DeviceReduce::Sum(nullptr, tmpSize, in, in, nVals));
// Allocate small extra memory for the return value // Allocate small extra memory for the return value
@ -827,8 +829,8 @@ T sumReduction(dh::CubMemory &tmp_mem, T *in, int nVals) {
* @param def default value to be filled * @param def default value to be filled
*/ */
template <typename T, int BlkDim = 256, int ItemsPerThread = 4> template <typename T, int BlkDim = 256, int ItemsPerThread = 4>
void fillConst(int device_idx, T *out, int len, T def) { void FillConst(int device_idx, T *out, int len, T def) {
dh::launch_n<ItemsPerThread, BlkDim>(device_idx, len, dh::LaunchN<ItemsPerThread, BlkDim>(device_idx, len,
[=] __device__(int i) { out[i] = def; }); [=] __device__(int i) { out[i] = def; });
} }
@ -842,9 +844,9 @@ void fillConst(int device_idx, T *out, int len, T def) {
* @param nVals length of the buffers * @param nVals length of the buffers
*/ */
template <typename T1, typename T2, int BlkDim = 256, int ItemsPerThread = 4> template <typename T1, typename T2, int BlkDim = 256, int ItemsPerThread = 4>
void gather(int device_idx, T1 *out1, const T1 *in1, T2 *out2, const T2 *in2, void Gather(int device_idx, T1 *out1, const T1 *in1, T2 *out2, const T2 *in2,
const int *instId, int nVals) { const int *instId, int nVals) {
dh::launch_n<ItemsPerThread, BlkDim>(device_idx, nVals, dh::LaunchN<ItemsPerThread, BlkDim>(device_idx, nVals,
[=] __device__(int i) { [=] __device__(int i) {
int iid = instId[i]; int iid = instId[i];
T1 v1 = in1[iid]; T1 v1 = in1[iid];
@ -862,8 +864,8 @@ void gather(int device_idx, T1 *out1, const T1 *in1, T2 *out2, const T2 *in2,
* @param nVals length of the buffers * @param nVals length of the buffers
*/ */
template <typename T, int BlkDim = 256, int ItemsPerThread = 4> template <typename T, int BlkDim = 256, int ItemsPerThread = 4>
void gather(int device_idx, T *out, const T *in, const int *instId, int nVals) { void Gather(int device_idx, T *out, const T *in, const int *instId, int nVals) {
dh::launch_n<ItemsPerThread, BlkDim>(device_idx, nVals, dh::LaunchN<ItemsPerThread, BlkDim>(device_idx, nVals,
[=] __device__(int i) { [=] __device__(int i) {
int iid = instId[i]; int iid = instId[i];
out[i] = in[iid]; out[i] = in[iid];

View File

@ -29,12 +29,12 @@ struct ParallelGroupBuilder {
// parallel group builder of data // parallel group builder of data
ParallelGroupBuilder(std::vector<SizeType> *p_rptr, ParallelGroupBuilder(std::vector<SizeType> *p_rptr,
std::vector<ValueType> *p_data) std::vector<ValueType> *p_data)
: rptr(*p_rptr), data(*p_data), thread_rptr(tmp_thread_rptr) { : rptr_(*p_rptr), data_(*p_data), thread_rptr_(tmp_thread_rptr_) {
} }
ParallelGroupBuilder(std::vector<SizeType> *p_rptr, ParallelGroupBuilder(std::vector<SizeType> *p_rptr,
std::vector<ValueType> *p_data, std::vector<ValueType> *p_data,
std::vector< std::vector<SizeType> > *p_thread_rptr) std::vector< std::vector<SizeType> > *p_thread_rptr)
: rptr(*p_rptr), data(*p_data), thread_rptr(*p_thread_rptr) { : rptr_(*p_rptr), data_(*p_data), thread_rptr_(*p_thread_rptr) {
} }
public: public:
@ -45,10 +45,10 @@ struct ParallelGroupBuilder {
* \param nthread number of thread that will be used in construction * \param nthread number of thread that will be used in construction
*/ */
inline void InitBudget(size_t nkeys, int nthread) { inline void InitBudget(size_t nkeys, int nthread) {
thread_rptr.resize(nthread); thread_rptr_.resize(nthread);
for (size_t i = 0; i < thread_rptr.size(); ++i) { for (size_t i = 0; i < thread_rptr_.size(); ++i) {
thread_rptr[i].resize(nkeys); thread_rptr_[i].resize(nkeys);
std::fill(thread_rptr[i].begin(), thread_rptr[i].end(), 0); std::fill(thread_rptr_[i].begin(), thread_rptr_[i].end(), 0);
} }
} }
/*! /*!
@ -58,34 +58,34 @@ struct ParallelGroupBuilder {
* \param nelem number of element budget add to this row * \param nelem number of element budget add to this row
*/ */
inline void AddBudget(size_t key, int threadid, SizeType nelem = 1) { inline void AddBudget(size_t key, int threadid, SizeType nelem = 1) {
std::vector<SizeType> &trptr = thread_rptr[threadid]; std::vector<SizeType> &trptr = thread_rptr_[threadid];
if (trptr.size() < key + 1) { if (trptr.size() < key + 1) {
trptr.resize(key + 1, 0); trptr.resize(key + 1, 0);
} }
trptr[key] += nelem; trptr[key] += nelem;
} }
/*! \brief step 3: initialize the necessary storage */ /*! \brief step 3: initialize the necessary storage */
inline void InitStorage(void) { inline void InitStorage() {
// set rptr to correct size // set rptr to correct size
for (size_t tid = 0; tid < thread_rptr.size(); ++tid) { for (size_t tid = 0; tid < thread_rptr_.size(); ++tid) {
if (rptr.size() <= thread_rptr[tid].size()) { if (rptr_.size() <= thread_rptr_[tid].size()) {
rptr.resize(thread_rptr[tid].size() + 1); rptr_.resize(thread_rptr_[tid].size() + 1);
} }
} }
// initialize rptr to be beginning of each segment // initialize rptr to be beginning of each segment
size_t start = 0; size_t start = 0;
for (size_t i = 0; i + 1 < rptr.size(); ++i) { for (size_t i = 0; i + 1 < rptr_.size(); ++i) {
for (size_t tid = 0; tid < thread_rptr.size(); ++tid) { for (size_t tid = 0; tid < thread_rptr_.size(); ++tid) {
std::vector<SizeType> &trptr = thread_rptr[tid]; std::vector<SizeType> &trptr = thread_rptr_[tid];
if (i < trptr.size()) { if (i < trptr.size()) {
size_t ncnt = trptr[i]; size_t ncnt = trptr[i];
trptr[i] = start; trptr[i] = start;
start += ncnt; start += ncnt;
} }
} }
rptr[i + 1] = start; rptr_[i + 1] = start;
} }
data.resize(start); data_.resize(start);
} }
/*! /*!
* \brief step 4: add data to the allocated space, * \brief step 4: add data to the allocated space,
@ -96,19 +96,19 @@ struct ParallelGroupBuilder {
* \param threadid the id of thread that calls this function * \param threadid the id of thread that calls this function
*/ */
inline void Push(size_t key, ValueType value, int threadid) { inline void Push(size_t key, ValueType value, int threadid) {
SizeType &rp = thread_rptr[threadid][key]; SizeType &rp = thread_rptr_[threadid][key];
data[rp++] = value; data_[rp++] = value;
} }
private: private:
/*! \brief pointer to the beginning and end of each continuous key */ /*! \brief pointer to the beginning and end of each continuous key */
std::vector<SizeType> &rptr; std::vector<SizeType> &rptr_;
/*! \brief index of nonzero entries in each row */ /*! \brief index of nonzero entries in each row */
std::vector<ValueType> &data; std::vector<ValueType> &data_;
/*! \brief thread local data structure */ /*! \brief thread local data structure */
std::vector<std::vector<SizeType> > &thread_rptr; std::vector<std::vector<SizeType> > &thread_rptr_;
/*! \brief local temp thread ptr, use this if not specified by the constructor */ /*! \brief local temp thread ptr, use this if not specified by the constructor */
std::vector<std::vector<SizeType> > tmp_thread_rptr; std::vector<std::vector<SizeType> > tmp_thread_rptr_;
}; };
} // namespace common } // namespace common
} // namespace xgboost } // namespace xgboost

View File

@ -17,20 +17,20 @@ namespace xgboost {
namespace common { namespace common {
void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) { void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) {
typedef common::WXQuantileSketch<bst_float, bst_float> WXQSketch; using WXQSketch = common::WXQuantileSketch<bst_float, bst_float>;
const MetaInfo& info = p_fmat->info(); const MetaInfo& info = p_fmat->Info();
// safe factor for better accuracy // safe factor for better accuracy
const int kFactor = 8; constexpr int kFactor = 8;
std::vector<WXQSketch> sketchs; std::vector<WXQSketch> sketchs;
const int nthread = omp_get_max_threads(); const int nthread = omp_get_max_threads();
unsigned nstep = static_cast<unsigned>((info.num_col + nthread - 1) / nthread); auto nstep = static_cast<unsigned>((info.num_col_ + nthread - 1) / nthread);
unsigned ncol = static_cast<unsigned>(info.num_col); auto ncol = static_cast<unsigned>(info.num_col_);
sketchs.resize(info.num_col); sketchs.resize(info.num_col_);
for (auto& s : sketchs) { for (auto& s : sketchs) {
s.Init(info.num_row, 1.0 / (max_num_bins * kFactor)); s.Init(info.num_row_, 1.0 / (max_num_bins * kFactor));
} }
dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator(); dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator();
@ -40,7 +40,7 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) {
#pragma omp parallel num_threads(nthread) #pragma omp parallel num_threads(nthread)
{ {
CHECK_EQ(nthread, omp_get_num_threads()); CHECK_EQ(nthread, omp_get_num_threads());
unsigned tid = static_cast<unsigned>(omp_get_thread_num()); auto tid = static_cast<unsigned>(omp_get_thread_num());
unsigned begin = std::min(nstep * tid, ncol); unsigned begin = std::min(nstep * tid, ncol);
unsigned end = std::min(nstep * (tid + 1), ncol); unsigned end = std::min(nstep * (tid + 1), ncol);
for (size_t i = 0; i < batch.size; ++i) { // NOLINT(*) for (size_t i = 0; i < batch.size; ++i) { // NOLINT(*)
@ -68,7 +68,7 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) {
size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_num_bins * kFactor); size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_num_bins * kFactor);
sreducer.Allreduce(dmlc::BeginPtr(summary_array), nbytes, summary_array.size()); sreducer.Allreduce(dmlc::BeginPtr(summary_array), nbytes, summary_array.size());
this->min_val.resize(info.num_col); this->min_val.resize(info.num_col_);
row_ptr.push_back(0); row_ptr.push_back(0);
for (size_t fid = 0; fid < summary_array.size(); ++fid) { for (size_t fid = 0; fid < summary_array.size(); ++fid) {
WXQSketch::SummaryContainer a; WXQSketch::SummaryContainer a;
@ -105,7 +105,7 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) {
} }
void GHistIndexMatrix::Init(DMatrix* p_fmat) { void GHistIndexMatrix::Init(DMatrix* p_fmat) {
CHECK(cut != nullptr); CHECK(cut != nullptr); // NOLINT
dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator(); dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator();
const int nthread = omp_get_max_threads(); const int nthread = omp_get_max_threads();
@ -126,7 +126,7 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat) {
CHECK_GT(cut->cut.size(), 0U); CHECK_GT(cut->cut.size(), 0U);
CHECK_EQ(cut->row_ptr.back(), cut->cut.size()); CHECK_EQ(cut->row_ptr.back(), cut->cut.size());
omp_ulong bsize = static_cast<omp_ulong>(batch.size); auto bsize = static_cast<omp_ulong>(batch.size);
#pragma omp parallel for num_threads(nthread) schedule(static) #pragma omp parallel for num_threads(nthread) schedule(static)
for (omp_ulong i = 0; i < bsize; ++i) { // NOLINT(*) for (omp_ulong i = 0; i < bsize; ++i) { // NOLINT(*)
const int tid = omp_get_thread_num(); const int tid = omp_get_thread_num();
@ -217,7 +217,7 @@ FindGroups_(const std::vector<unsigned>& feature_list,
std::vector<std::vector<bool>> conflict_marks; std::vector<std::vector<bool>> conflict_marks;
std::vector<size_t> group_nnz; std::vector<size_t> group_nnz;
std::vector<size_t> group_conflict_cnt; std::vector<size_t> group_conflict_cnt;
const size_t max_conflict_cnt const auto max_conflict_cnt
= static_cast<size_t>(param.max_conflict_rate * nrow); = static_cast<size_t>(param.max_conflict_rate * nrow);
for (auto fid : feature_list) { for (auto fid : feature_list) {
@ -336,14 +336,14 @@ FastFeatureGrouping(const GHistIndexMatrix& gmat,
void GHistIndexBlockMatrix::Init(const GHistIndexMatrix& gmat, void GHistIndexBlockMatrix::Init(const GHistIndexMatrix& gmat,
const ColumnMatrix& colmat, const ColumnMatrix& colmat,
const FastHistParam& param) { const FastHistParam& param) {
cut = gmat.cut; cut_ = gmat.cut;
const size_t nrow = gmat.row_ptr.size() - 1; const size_t nrow = gmat.row_ptr.size() - 1;
const uint32_t nbins = gmat.cut->row_ptr.back(); const uint32_t nbins = gmat.cut->row_ptr.back();
/* step 1: form feature groups */ /* step 1: form feature groups */
auto groups = FastFeatureGrouping(gmat, colmat, param); auto groups = FastFeatureGrouping(gmat, colmat, param);
const uint32_t nblock = static_cast<uint32_t>(groups.size()); const auto nblock = static_cast<uint32_t>(groups.size());
/* step 2: build a new CSR matrix for each feature group */ /* step 2: build a new CSR matrix for each feature group */
std::vector<uint32_t> bin2block(nbins); // lookup table [bin id] => [block id] std::vector<uint32_t> bin2block(nbins); // lookup table [bin id] => [block id]
@ -380,24 +380,24 @@ void GHistIndexBlockMatrix::Init(const GHistIndexMatrix& gmat,
index_blk_ptr.push_back(0); index_blk_ptr.push_back(0);
row_ptr_blk_ptr.push_back(0); row_ptr_blk_ptr.push_back(0);
for (uint32_t block_id = 0; block_id < nblock; ++block_id) { for (uint32_t block_id = 0; block_id < nblock; ++block_id) {
index.insert(index.end(), index_temp[block_id].begin(), index_temp[block_id].end()); index_.insert(index_.end(), index_temp[block_id].begin(), index_temp[block_id].end());
row_ptr.insert(row_ptr.end(), row_ptr_temp[block_id].begin(), row_ptr_temp[block_id].end()); row_ptr_.insert(row_ptr_.end(), row_ptr_temp[block_id].begin(), row_ptr_temp[block_id].end());
index_blk_ptr.push_back(index.size()); index_blk_ptr.push_back(index_.size());
row_ptr_blk_ptr.push_back(row_ptr.size()); row_ptr_blk_ptr.push_back(row_ptr_.size());
} }
// save shortcut for each block // save shortcut for each block
for (uint32_t block_id = 0; block_id < nblock; ++block_id) { for (uint32_t block_id = 0; block_id < nblock; ++block_id) {
Block blk; Block blk;
blk.index_begin = &index[index_blk_ptr[block_id]]; blk.index_begin = &index_[index_blk_ptr[block_id]];
blk.row_ptr_begin = &row_ptr[row_ptr_blk_ptr[block_id]]; blk.row_ptr_begin = &row_ptr_[row_ptr_blk_ptr[block_id]];
blk.index_end = &index[index_blk_ptr[block_id + 1]]; blk.index_end = &index_[index_blk_ptr[block_id + 1]];
blk.row_ptr_end = &row_ptr[row_ptr_blk_ptr[block_id + 1]]; blk.row_ptr_end = &row_ptr_[row_ptr_blk_ptr[block_id + 1]];
blocks.push_back(blk); blocks_.push_back(blk);
} }
} }
void GHistBuilder::BuildHist(const std::vector<bst_gpair>& gpair, void GHistBuilder::BuildHist(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices, const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat, const GHistIndexMatrix& gmat,
const std::vector<bst_uint>& feat_set, const std::vector<bst_uint>& feat_set,
@ -405,30 +405,30 @@ void GHistBuilder::BuildHist(const std::vector<bst_gpair>& gpair,
data_.resize(nbins_ * nthread_, GHistEntry()); data_.resize(nbins_ * nthread_, GHistEntry());
std::fill(data_.begin(), data_.end(), GHistEntry()); std::fill(data_.begin(), data_.end(), GHistEntry());
const int K = 8; // loop unrolling factor constexpr int kUnroll = 8; // loop unrolling factor
const bst_omp_uint nthread = static_cast<bst_omp_uint>(this->nthread_); const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
const size_t nrows = row_indices.end - row_indices.begin; const size_t nrows = row_indices.end - row_indices.begin;
const size_t rest = nrows % K; const size_t rest = nrows % kUnroll;
#pragma omp parallel for num_threads(nthread) schedule(guided) #pragma omp parallel for num_threads(nthread) schedule(guided)
for (bst_omp_uint i = 0; i < nrows - rest; i += K) { for (bst_omp_uint i = 0; i < nrows - rest; i += kUnroll) {
const bst_omp_uint tid = omp_get_thread_num(); const bst_omp_uint tid = omp_get_thread_num();
const size_t off = tid * nbins_; const size_t off = tid * nbins_;
size_t rid[K]; size_t rid[kUnroll];
size_t ibegin[K]; size_t ibegin[kUnroll];
size_t iend[K]; size_t iend[kUnroll];
bst_gpair stat[K]; GradientPair stat[kUnroll];
for (int k = 0; k < K; ++k) { for (int k = 0; k < kUnroll; ++k) {
rid[k] = row_indices.begin[i + k]; rid[k] = row_indices.begin[i + k];
} }
for (int k = 0; k < K; ++k) { for (int k = 0; k < kUnroll; ++k) {
ibegin[k] = gmat.row_ptr[rid[k]]; ibegin[k] = gmat.row_ptr[rid[k]];
iend[k] = gmat.row_ptr[rid[k] + 1]; iend[k] = gmat.row_ptr[rid[k] + 1];
} }
for (int k = 0; k < K; ++k) { for (int k = 0; k < kUnroll; ++k) {
stat[k] = gpair[rid[k]]; stat[k] = gpair[rid[k]];
} }
for (int k = 0; k < K; ++k) { for (int k = 0; k < kUnroll; ++k) {
for (size_t j = ibegin[k]; j < iend[k]; ++j) { for (size_t j = ibegin[k]; j < iend[k]; ++j) {
const uint32_t bin = gmat.index[j]; const uint32_t bin = gmat.index[j];
data_[off + bin].Add(stat[k]); data_[off + bin].Add(stat[k]);
@ -439,7 +439,7 @@ void GHistBuilder::BuildHist(const std::vector<bst_gpair>& gpair,
const size_t rid = row_indices.begin[i]; const size_t rid = row_indices.begin[i];
const size_t ibegin = gmat.row_ptr[rid]; const size_t ibegin = gmat.row_ptr[rid];
const size_t iend = gmat.row_ptr[rid + 1]; const size_t iend = gmat.row_ptr[rid + 1];
const bst_gpair stat = gpair[rid]; const GradientPair stat = gpair[rid];
for (size_t j = ibegin; j < iend; ++j) { for (size_t j = ibegin; j < iend; ++j) {
const uint32_t bin = gmat.index[j]; const uint32_t bin = gmat.index[j];
data_[bin].Add(stat); data_[bin].Add(stat);
@ -456,37 +456,40 @@ void GHistBuilder::BuildHist(const std::vector<bst_gpair>& gpair,
} }
} }
void GHistBuilder::BuildBlockHist(const std::vector<bst_gpair>& gpair, void GHistBuilder::BuildBlockHist(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices, const RowSetCollection::Elem row_indices,
const GHistIndexBlockMatrix& gmatb, const GHistIndexBlockMatrix& gmatb,
const std::vector<bst_uint>& feat_set, const std::vector<bst_uint>& feat_set,
GHistRow hist) { GHistRow hist) {
const int K = 8; // loop unrolling factor constexpr int kUnroll = 8; // loop unrolling factor
const bst_omp_uint nthread = static_cast<bst_omp_uint>(this->nthread_);
const size_t nblock = gmatb.GetNumBlock(); const size_t nblock = gmatb.GetNumBlock();
const size_t nrows = row_indices.end - row_indices.begin; const size_t nrows = row_indices.end - row_indices.begin;
const size_t rest = nrows % K; const size_t rest = nrows % kUnroll;
#if defined(_OPENMP)
const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
#endif
#pragma omp parallel for num_threads(nthread) schedule(guided) #pragma omp parallel for num_threads(nthread) schedule(guided)
for (bst_omp_uint bid = 0; bid < nblock; ++bid) { for (bst_omp_uint bid = 0; bid < nblock; ++bid) {
auto gmat = gmatb[bid]; auto gmat = gmatb[bid];
for (size_t i = 0; i < nrows - rest; i += K) { for (size_t i = 0; i < nrows - rest; i += kUnroll) {
size_t rid[K]; size_t rid[kUnroll];
size_t ibegin[K]; size_t ibegin[kUnroll];
size_t iend[K]; size_t iend[kUnroll];
bst_gpair stat[K]; GradientPair stat[kUnroll];
for (int k = 0; k < K; ++k) { for (int k = 0; k < kUnroll; ++k) {
rid[k] = row_indices.begin[i + k]; rid[k] = row_indices.begin[i + k];
} }
for (int k = 0; k < K; ++k) { for (int k = 0; k < kUnroll; ++k) {
ibegin[k] = gmat.row_ptr[rid[k]]; ibegin[k] = gmat.row_ptr[rid[k]];
iend[k] = gmat.row_ptr[rid[k] + 1]; iend[k] = gmat.row_ptr[rid[k] + 1];
} }
for (int k = 0; k < K; ++k) { for (int k = 0; k < kUnroll; ++k) {
stat[k] = gpair[rid[k]]; stat[k] = gpair[rid[k]];
} }
for (int k = 0; k < K; ++k) { for (int k = 0; k < kUnroll; ++k) {
for (size_t j = ibegin[k]; j < iend[k]; ++j) { for (size_t j = ibegin[k]; j < iend[k]; ++j) {
const uint32_t bin = gmat.index[j]; const uint32_t bin = gmat.index[j];
hist.begin[bin].Add(stat[k]); hist.begin[bin].Add(stat[k]);
@ -497,7 +500,7 @@ void GHistBuilder::BuildBlockHist(const std::vector<bst_gpair>& gpair,
const size_t rid = row_indices.begin[i]; const size_t rid = row_indices.begin[i];
const size_t ibegin = gmat.row_ptr[rid]; const size_t ibegin = gmat.row_ptr[rid];
const size_t iend = gmat.row_ptr[rid + 1]; const size_t iend = gmat.row_ptr[rid + 1];
const bst_gpair stat = gpair[rid]; const GradientPair stat = gpair[rid];
for (size_t j = ibegin; j < iend; ++j) { for (size_t j = ibegin; j < iend; ++j) {
const uint32_t bin = gmat.index[j]; const uint32_t bin = gmat.index[j];
hist.begin[bin].Add(stat); hist.begin[bin].Add(stat);
@ -507,21 +510,26 @@ void GHistBuilder::BuildBlockHist(const std::vector<bst_gpair>& gpair,
} }
void GHistBuilder::SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow parent) { void GHistBuilder::SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow parent) {
const bst_omp_uint nthread = static_cast<bst_omp_uint>(this->nthread_);
const uint32_t nbins = static_cast<bst_omp_uint>(nbins_); const uint32_t nbins = static_cast<bst_omp_uint>(nbins_);
const int K = 8; // loop unrolling factor constexpr int kUnroll = 8; // loop unrolling factor
const uint32_t rest = nbins % K; const uint32_t rest = nbins % kUnroll;
#if defined(_OPENMP)
const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
#endif
#pragma omp parallel for num_threads(nthread) schedule(static) #pragma omp parallel for num_threads(nthread) schedule(static)
for (bst_omp_uint bin_id = 0; bin_id < static_cast<bst_omp_uint>(nbins - rest); bin_id += K) { for (bst_omp_uint bin_id = 0;
GHistEntry pb[K]; bin_id < static_cast<bst_omp_uint>(nbins - rest); bin_id += kUnroll) {
GHistEntry sb[K]; GHistEntry pb[kUnroll];
for (int k = 0; k < K; ++k) { GHistEntry sb[kUnroll];
for (int k = 0; k < kUnroll; ++k) {
pb[k] = parent.begin[bin_id + k]; pb[k] = parent.begin[bin_id + k];
} }
for (int k = 0; k < K; ++k) { for (int k = 0; k < kUnroll; ++k) {
sb[k] = sibling.begin[bin_id + k]; sb[k] = sibling.begin[bin_id + k];
} }
for (int k = 0; k < K; ++k) { for (int k = 0; k < kUnroll; ++k) {
self.begin[bin_id + k].SetSubtract(pb[k], sb[k]); self.begin[bin_id + k].SetSubtract(pb[k], sb[k]);
} }
} }

View File

@ -13,26 +13,26 @@
#include "row_set.h" #include "row_set.h"
#include "../tree/fast_hist_param.h" #include "../tree/fast_hist_param.h"
using xgboost::tree::FastHistParam;
namespace xgboost { namespace xgboost {
namespace common { namespace common {
using tree::FastHistParam;
/*! \brief sums of gradient statistics corresponding to a histogram bin */ /*! \brief sums of gradient statistics corresponding to a histogram bin */
struct GHistEntry { struct GHistEntry {
/*! \brief sum of first-order gradient statistics */ /*! \brief sum of first-order gradient statistics */
double sum_grad; double sum_grad{0};
/*! \brief sum of second-order gradient statistics */ /*! \brief sum of second-order gradient statistics */
double sum_hess; double sum_hess{0};
GHistEntry() : sum_grad(0), sum_hess(0) {} GHistEntry() = default;
inline void Clear() { inline void Clear() {
sum_grad = sum_hess = 0; sum_grad = sum_hess = 0;
} }
/*! \brief add a bst_gpair to the sum */ /*! \brief add a GradientPair to the sum */
inline void Add(const bst_gpair& e) { inline void Add(const GradientPair& e) {
sum_grad += e.GetGrad(); sum_grad += e.GetGrad();
sum_hess += e.GetHess(); sum_hess += e.GetHess();
} }
@ -58,7 +58,7 @@ struct HistCutUnit {
/*! \brief number of cutting point, containing the maximum point */ /*! \brief number of cutting point, containing the maximum point */
uint32_t size; uint32_t size;
// default constructor // default constructor
HistCutUnit() {} HistCutUnit() = default;
// constructor // constructor
HistCutUnit(const bst_float* cut, uint32_t size) HistCutUnit(const bst_float* cut, uint32_t size)
: cut(cut), size(size) {} : cut(cut), size(size) {}
@ -74,8 +74,8 @@ struct HistCutMatrix {
std::vector<bst_float> cut; std::vector<bst_float> cut;
/*! \brief Get histogram bound for fid */ /*! \brief Get histogram bound for fid */
inline HistCutUnit operator[](bst_uint fid) const { inline HistCutUnit operator[](bst_uint fid) const {
return HistCutUnit(dmlc::BeginPtr(cut) + row_ptr[fid], return {dmlc::BeginPtr(cut) + row_ptr[fid],
row_ptr[fid + 1] - row_ptr[fid]); row_ptr[fid + 1] - row_ptr[fid]};
} }
// create histogram cut matrix given statistics from data // create histogram cut matrix given statistics from data
// using approximate quantile sketch approach // using approximate quantile sketch approach
@ -92,7 +92,7 @@ struct GHistIndexRow {
const uint32_t* index; const uint32_t* index;
/*! \brief The size of the histogram */ /*! \brief The size of the histogram */
size_t size; size_t size;
GHistIndexRow() {} GHistIndexRow() = default;
GHistIndexRow(const uint32_t* index, size_t size) GHistIndexRow(const uint32_t* index, size_t size)
: index(index), size(size) {} : index(index), size(size) {}
}; };
@ -115,7 +115,7 @@ struct GHistIndexMatrix {
void Init(DMatrix* p_fmat); void Init(DMatrix* p_fmat);
// get i-th row // get i-th row
inline GHistIndexRow operator[](size_t i) const { inline GHistIndexRow operator[](size_t i) const {
return GHistIndexRow(&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]); return {&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]};
} }
inline void GetFeatureCounts(size_t* counts) const { inline void GetFeatureCounts(size_t* counts) const {
auto nfeature = cut->row_ptr.size() - 1; auto nfeature = cut->row_ptr.size() - 1;
@ -141,7 +141,7 @@ struct GHistIndexBlock {
// get i-th row // get i-th row
inline GHistIndexRow operator[](size_t i) const { inline GHistIndexRow operator[](size_t i) const {
return GHistIndexRow(&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]); return {&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]};
} }
}; };
@ -154,24 +154,24 @@ class GHistIndexBlockMatrix {
const FastHistParam& param); const FastHistParam& param);
inline GHistIndexBlock operator[](size_t i) const { inline GHistIndexBlock operator[](size_t i) const {
return GHistIndexBlock(blocks[i].row_ptr_begin, blocks[i].index_begin); return {blocks_[i].row_ptr_begin, blocks_[i].index_begin};
} }
inline size_t GetNumBlock() const { inline size_t GetNumBlock() const {
return blocks.size(); return blocks_.size();
} }
private: private:
std::vector<size_t> row_ptr; std::vector<size_t> row_ptr_;
std::vector<uint32_t> index; std::vector<uint32_t> index_;
const HistCutMatrix* cut; const HistCutMatrix* cut_;
struct Block { struct Block {
const size_t* row_ptr_begin; const size_t* row_ptr_begin;
const size_t* row_ptr_end; const size_t* row_ptr_end;
const uint32_t* index_begin; const uint32_t* index_begin;
const uint32_t* index_end; const uint32_t* index_end;
}; };
std::vector<Block> blocks; std::vector<Block> blocks_;
}; };
/*! /*!
@ -186,7 +186,7 @@ struct GHistRow {
/*! \brief number of entries */ /*! \brief number of entries */
uint32_t size; uint32_t size;
GHistRow() {} GHistRow() = default;
GHistRow(GHistEntry* begin, uint32_t size) GHistRow(GHistEntry* begin, uint32_t size)
: begin(begin), size(size) {} : begin(begin), size(size) {}
}; };
@ -198,15 +198,15 @@ class HistCollection {
public: public:
// access histogram for i-th node // access histogram for i-th node
inline GHistRow operator[](bst_uint nid) const { inline GHistRow operator[](bst_uint nid) const {
const uint32_t kMax = std::numeric_limits<uint32_t>::max(); constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
CHECK_NE(row_ptr_[nid], kMax); CHECK_NE(row_ptr_[nid], kMax);
return GHistRow(const_cast<GHistEntry*>(dmlc::BeginPtr(data_) + row_ptr_[nid]), nbins_); return {const_cast<GHistEntry*>(dmlc::BeginPtr(data_) + row_ptr_[nid]), nbins_};
} }
// have we computed a histogram for i-th node? // have we computed a histogram for i-th node?
inline bool RowExists(bst_uint nid) const { inline bool RowExists(bst_uint nid) const {
const uint32_t kMax = std::numeric_limits<uint32_t>::max(); const uint32_t k_max = std::numeric_limits<uint32_t>::max();
return (nid < row_ptr_.size() && row_ptr_[nid] != kMax); return (nid < row_ptr_.size() && row_ptr_[nid] != k_max);
} }
// initialize histogram collection // initialize histogram collection
@ -218,7 +218,7 @@ class HistCollection {
// create an empty histogram for i-th node // create an empty histogram for i-th node
inline void AddHistRow(bst_uint nid) { inline void AddHistRow(bst_uint nid) {
const uint32_t kMax = std::numeric_limits<uint32_t>::max(); constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
if (nid >= row_ptr_.size()) { if (nid >= row_ptr_.size()) {
row_ptr_.resize(nid + 1, kMax); row_ptr_.resize(nid + 1, kMax);
} }
@ -250,13 +250,13 @@ class GHistBuilder {
} }
// construct a histogram via histogram aggregation // construct a histogram via histogram aggregation
void BuildHist(const std::vector<bst_gpair>& gpair, void BuildHist(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices, const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat, const GHistIndexMatrix& gmat,
const std::vector<bst_uint>& feat_set, const std::vector<bst_uint>& feat_set,
GHistRow hist); GHistRow hist);
// same, with feature grouping // same, with feature grouping
void BuildBlockHist(const std::vector<bst_gpair>& gpair, void BuildBlockHist(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices, const RowSetCollection::Elem row_indices,
const GHistIndexBlockMatrix& gmatb, const GHistIndexBlockMatrix& gmatb,
const std::vector<bst_uint>& feat_set, const std::vector<bst_uint>& feat_set,

View File

@ -6,6 +6,8 @@
// dummy implementation of HostDeviceVector in case CUDA is not used // dummy implementation of HostDeviceVector in case CUDA is not used
#include <xgboost/base.h> #include <xgboost/base.h>
#include <utility>
#include "./host_device_vector.h" #include "./host_device_vector.h"
namespace xgboost { namespace xgboost {
@ -13,8 +15,8 @@ namespace xgboost {
template <typename T> template <typename T>
struct HostDeviceVectorImpl { struct HostDeviceVectorImpl {
explicit HostDeviceVectorImpl(size_t size, T v) : data_h_(size, v) {} explicit HostDeviceVectorImpl(size_t size, T v) : data_h_(size, v) {}
explicit HostDeviceVectorImpl(std::initializer_list<T> init) : data_h_(init) {} HostDeviceVectorImpl(std::initializer_list<T> init) : data_h_(init) {}
explicit HostDeviceVectorImpl(const std::vector<T>& init) : data_h_(init) {} explicit HostDeviceVectorImpl(std::vector<T> init) : data_h_(std::move(init)) {}
std::vector<T> data_h_; std::vector<T> data_h_;
}; };
@ -43,25 +45,25 @@ HostDeviceVector<T>::~HostDeviceVector() {
} }
template <typename T> template <typename T>
size_t HostDeviceVector<T>::size() const { return impl_->data_h_.size(); } size_t HostDeviceVector<T>::Size() const { return impl_->data_h_.size(); }
template <typename T> template <typename T>
int HostDeviceVector<T>::device() const { return -1; } int HostDeviceVector<T>::DeviceIdx() const { return -1; }
template <typename T> template <typename T>
T* HostDeviceVector<T>::ptr_d(int device) { return nullptr; } T* HostDeviceVector<T>::DevicePointer(int device) { return nullptr; }
template <typename T> template <typename T>
std::vector<T>& HostDeviceVector<T>::data_h() { return impl_->data_h_; } std::vector<T>& HostDeviceVector<T>::HostVector() { return impl_->data_h_; }
template <typename T> template <typename T>
void HostDeviceVector<T>::resize(size_t new_size, T v, int new_device) { void HostDeviceVector<T>::Resize(size_t new_size, T v, int new_device) {
impl_->data_h_.resize(new_size, v); impl_->data_h_.resize(new_size, v);
} }
// explicit instantiations are required, as HostDeviceVector isn't header-only // explicit instantiations are required, as HostDeviceVector isn't header-only
template class HostDeviceVector<bst_float>; template class HostDeviceVector<bst_float>;
template class HostDeviceVector<bst_gpair>; template class HostDeviceVector<GradientPair>;
} // namespace xgboost } // namespace xgboost

View File

@ -35,27 +35,27 @@ struct HostDeviceVectorImpl {
void operator=(const HostDeviceVectorImpl<T>&) = delete; void operator=(const HostDeviceVectorImpl<T>&) = delete;
void operator=(HostDeviceVectorImpl<T>&&) = delete; void operator=(HostDeviceVectorImpl<T>&&) = delete;
size_t size() const { return on_d_ ? data_d_.size() : data_h_.size(); } size_t Size() const { return on_d_ ? data_d_.size() : data_h_.size(); }
int device() const { return device_; } int DeviceIdx() const { return device_; }
T* ptr_d(int device) { T* DevicePointer(int device) {
lazy_sync_device(device); LazySyncDevice(device);
return data_d_.data().get(); return data_d_.data().get();
} }
thrust::device_ptr<T> tbegin(int device) { thrust::device_ptr<T> tbegin(int device) { // NOLINT
return thrust::device_ptr<T>(ptr_d(device)); return thrust::device_ptr<T>(DevicePointer(device));
} }
thrust::device_ptr<T> tend(int device) { thrust::device_ptr<T> tend(int device) { // NOLINT
auto begin = tbegin(device); auto begin = tbegin(device);
return begin + size(); return begin + Size();
} }
std::vector<T>& data_h() { std::vector<T>& HostVector() {
lazy_sync_host(); LazySyncHost();
return data_h_; return data_h_;
} }
void resize(size_t new_size, T v, int new_device) { void Resize(size_t new_size, T v, int new_device) {
if (new_size == this->size() && new_device == device_) if (new_size == this->Size() && new_device == device_)
return; return;
if (new_device != -1) if (new_device != -1)
device_ = new_device; device_ = new_device;
@ -70,26 +70,26 @@ struct HostDeviceVectorImpl {
} }
} }
void lazy_sync_host() { void LazySyncHost() {
if (!on_d_) if (!on_d_)
return; return;
if (data_h_.size() != this->size()) if (data_h_.size() != this->Size())
data_h_.resize(this->size()); data_h_.resize(this->Size());
dh::safe_cuda(cudaSetDevice(device_)); dh::safe_cuda(cudaSetDevice(device_));
thrust::copy(data_d_.begin(), data_d_.end(), data_h_.begin()); thrust::copy(data_d_.begin(), data_d_.end(), data_h_.begin());
on_d_ = false; on_d_ = false;
} }
void lazy_sync_device(int device) { void LazySyncDevice(int device) {
if (on_d_) if (on_d_)
return; return;
if (device != device_) { if (device != device_) {
CHECK_EQ(device_, -1); CHECK_EQ(device_, -1);
device_ = device; device_ = device;
} }
if (data_d_.size() != this->size()) { if (data_d_.size() != this->Size()) {
dh::safe_cuda(cudaSetDevice(device_)); dh::safe_cuda(cudaSetDevice(device_));
data_d_.resize(this->size()); data_d_.resize(this->Size());
} }
dh::safe_cuda(cudaSetDevice(device_)); dh::safe_cuda(cudaSetDevice(device_));
thrust::copy(data_h_.begin(), data_h_.end(), data_d_.begin()); thrust::copy(data_h_.begin(), data_h_.end(), data_d_.begin());
@ -128,34 +128,34 @@ HostDeviceVector<T>::~HostDeviceVector() {
} }
template <typename T> template <typename T>
size_t HostDeviceVector<T>::size() const { return impl_->size(); } size_t HostDeviceVector<T>::Size() const { return impl_->Size(); }
template <typename T> template <typename T>
int HostDeviceVector<T>::device() const { return impl_->device(); } int HostDeviceVector<T>::DeviceIdx() const { return impl_->DeviceIdx(); }
template <typename T> template <typename T>
T* HostDeviceVector<T>::ptr_d(int device) { return impl_->ptr_d(device); } T* HostDeviceVector<T>::DevicePointer(int device) { return impl_->DevicePointer(device); }
template <typename T> template <typename T>
thrust::device_ptr<T> HostDeviceVector<T>::tbegin(int device) { thrust::device_ptr<T> HostDeviceVector<T>::tbegin(int device) { // NOLINT
return impl_->tbegin(device); return impl_->tbegin(device);
} }
template <typename T> template <typename T>
thrust::device_ptr<T> HostDeviceVector<T>::tend(int device) { thrust::device_ptr<T> HostDeviceVector<T>::tend(int device) { // NOLINT
return impl_->tend(device); return impl_->tend(device);
} }
template <typename T> template <typename T>
std::vector<T>& HostDeviceVector<T>::data_h() { return impl_->data_h(); } std::vector<T>& HostDeviceVector<T>::HostVector() { return impl_->HostVector(); }
template <typename T> template <typename T>
void HostDeviceVector<T>::resize(size_t new_size, T v, int new_device) { void HostDeviceVector<T>::Resize(size_t new_size, T v, int new_device) {
impl_->resize(new_size, v, new_device); impl_->Resize(new_size, v, new_device);
} }
// explicit instantiations are required, as HostDeviceVector isn't header-only // explicit instantiations are required, as HostDeviceVector isn't header-only
template class HostDeviceVector<bst_float>; template class HostDeviceVector<bst_float>;
template class HostDeviceVector<bst_gpair>; template class HostDeviceVector<GradientPair>;
} // namespace xgboost } // namespace xgboost

View File

@ -70,10 +70,10 @@ class HostDeviceVector {
HostDeviceVector(HostDeviceVector<T>&&) = delete; HostDeviceVector(HostDeviceVector<T>&&) = delete;
void operator=(const HostDeviceVector<T>&) = delete; void operator=(const HostDeviceVector<T>&) = delete;
void operator=(HostDeviceVector<T>&&) = delete; void operator=(HostDeviceVector<T>&&) = delete;
size_t size() const; size_t Size() const;
int device() const; int DeviceIdx() const;
T* ptr_d(int device); T* DevicePointer(int device);
T* ptr_h() { return data_h().data(); } T* HostPointer() { return HostVector().data(); }
// only define functions returning device_ptr // only define functions returning device_ptr
// if HostDeviceVector.h is included from a .cu file // if HostDeviceVector.h is included from a .cu file
@ -82,10 +82,10 @@ class HostDeviceVector {
thrust::device_ptr<T> tend(int device); thrust::device_ptr<T> tend(int device);
#endif #endif
std::vector<T>& data_h(); std::vector<T>& HostVector();
// passing in new_device == -1 keeps the device as is // passing in new_device == -1 keeps the device as is
void resize(size_t new_size, T v = T(), int new_device = -1); void Resize(size_t new_size, T v = T(), int new_device = -1);
private: private:
HostDeviceVectorImpl<T>* impl_; HostDeviceVectorImpl<T>* impl_;

View File

@ -15,8 +15,8 @@
namespace xgboost { namespace xgboost {
namespace common { namespace common {
typedef rabit::utils::MemoryFixSizeBuffer MemoryFixSizeBuffer; using MemoryFixSizeBuffer = rabit::utils::MemoryFixSizeBuffer;
typedef rabit::utils::MemoryBufferStream MemoryBufferStream; using MemoryBufferStream = rabit::utils::MemoryBufferStream;
/*! /*!
* \brief Input stream that support additional PeekRead * \brief Input stream that support additional PeekRead

View File

@ -39,12 +39,12 @@ inline void Softmax(std::vector<float>* p_rec) {
wmax = std::max(rec[i], wmax); wmax = std::max(rec[i], wmax);
} }
double wsum = 0.0f; double wsum = 0.0f;
for (size_t i = 0; i < rec.size(); ++i) { for (float & elem : rec) {
rec[i] = std::exp(rec[i] - wmax); elem = std::exp(elem - wmax);
wsum += rec[i]; wsum += elem;
} }
for (size_t i = 0; i < rec.size(); ++i) { for (float & elem : rec) {
rec[i] /= static_cast<float>(wsum); elem /= static_cast<float>(wsum);
} }
} }

View File

@ -35,7 +35,7 @@ struct WQSummary {
/*! \brief the value of data */ /*! \brief the value of data */
DType value; DType value;
// constructor // constructor
Entry() {} Entry() = default;
// constructor // constructor
Entry(RType rmin, RType rmax, RType wmin, DType value) Entry(RType rmin, RType rmax, RType wmin, DType value)
: rmin(rmin), rmax(rmax), wmin(wmin), value(value) {} : rmin(rmin), rmax(rmax), wmin(wmin), value(value) {}
@ -48,11 +48,11 @@ struct WQSummary {
CHECK(rmax- rmin - wmin > -eps) << "relation constraint: min/max"; CHECK(rmax- rmin - wmin > -eps) << "relation constraint: min/max";
} }
/*! \return rmin estimation for v strictly bigger than value */ /*! \return rmin estimation for v strictly bigger than value */
inline RType rmin_next() const { inline RType RMinNext() const {
return rmin + wmin; return rmin + wmin;
} }
/*! \return rmax estimation for v strictly smaller than value */ /*! \return rmax estimation for v strictly smaller than value */
inline RType rmax_prev() const { inline RType RMaxPrev() const {
return rmax - wmin; return rmax - wmin;
} }
}; };
@ -65,7 +65,7 @@ struct WQSummary {
// weight of instance // weight of instance
RType weight; RType weight;
// default constructor // default constructor
QEntry() {} QEntry() = default;
// constructor // constructor
QEntry(DType value, RType weight) QEntry(DType value, RType weight)
: value(value), weight(weight) {} : value(value), weight(weight) {}
@ -116,7 +116,7 @@ struct WQSummary {
inline RType MaxError() const { inline RType MaxError() const {
RType res = data[0].rmax - data[0].rmin - data[0].wmin; RType res = data[0].rmax - data[0].rmin - data[0].wmin;
for (size_t i = 1; i < size; ++i) { for (size_t i = 1; i < size; ++i) {
res = std::max(data[i].rmax_prev() - data[i - 1].rmin_next(), res); res = std::max(data[i].RMaxPrev() - data[i - 1].RMinNext(), res);
res = std::max(data[i].rmax - data[i].rmin - data[i].wmin, res); res = std::max(data[i].rmax - data[i].rmin - data[i].wmin, res);
} }
return res; return res;
@ -140,8 +140,8 @@ struct WQSummary {
if (istart == 0) { if (istart == 0) {
return Entry(0.0f, 0.0f, 0.0f, qvalue); return Entry(0.0f, 0.0f, 0.0f, qvalue);
} else { } else {
return Entry(data[istart - 1].rmin_next(), return Entry(data[istart - 1].RMinNext(),
data[istart].rmax_prev(), data[istart].RMaxPrev(),
0.0f, qvalue); 0.0f, qvalue);
} }
} }
@ -197,7 +197,7 @@ struct WQSummary {
while (i < src.size - 1 while (i < src.size - 1
&& dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i; && dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i;
CHECK(i != src.size - 1); CHECK(i != src.size - 1);
if (dx2 < src.data[i].rmin_next() + src.data[i + 1].rmax_prev()) { if (dx2 < src.data[i].RMinNext() + src.data[i + 1].RMaxPrev()) {
if (i != lastidx) { if (i != lastidx) {
data[size++] = src.data[i]; lastidx = i; data[size++] = src.data[i]; lastidx = i;
} }
@ -236,20 +236,20 @@ struct WQSummary {
*dst = Entry(a->rmin + b->rmin, *dst = Entry(a->rmin + b->rmin,
a->rmax + b->rmax, a->rmax + b->rmax,
a->wmin + b->wmin, a->value); a->wmin + b->wmin, a->value);
aprev_rmin = a->rmin_next(); aprev_rmin = a->RMinNext();
bprev_rmin = b->rmin_next(); bprev_rmin = b->RMinNext();
++dst; ++a; ++b; ++dst; ++a; ++b;
} else if (a->value < b->value) { } else if (a->value < b->value) {
*dst = Entry(a->rmin + bprev_rmin, *dst = Entry(a->rmin + bprev_rmin,
a->rmax + b->rmax_prev(), a->rmax + b->RMaxPrev(),
a->wmin, a->value); a->wmin, a->value);
aprev_rmin = a->rmin_next(); aprev_rmin = a->RMinNext();
++dst; ++a; ++dst; ++a;
} else { } else {
*dst = Entry(b->rmin + aprev_rmin, *dst = Entry(b->rmin + aprev_rmin,
b->rmax + a->rmax_prev(), b->rmax + a->RMaxPrev(),
b->wmin, b->value); b->wmin, b->value);
bprev_rmin = b->rmin_next(); bprev_rmin = b->RMinNext();
++dst; ++b; ++dst; ++b;
} }
} }
@ -307,7 +307,7 @@ struct WQSummary {
data[i].rmax = prev_rmax; data[i].rmax = prev_rmax;
*err_maxgap = std::max(*err_maxgap, prev_rmax - data[i].rmax); *err_maxgap = std::max(*err_maxgap, prev_rmax - data[i].rmax);
} }
RType rmin_next = data[i].rmin_next(); RType rmin_next = data[i].RMinNext();
if (data[i].rmax < rmin_next) { if (data[i].rmax < rmin_next) {
data[i].rmax = rmin_next; data[i].rmax = rmin_next;
*err_wgap = std::max(*err_wgap, data[i].rmax - rmin_next); *err_wgap = std::max(*err_wgap, data[i].rmax - rmin_next);
@ -334,13 +334,13 @@ struct WQSummary {
template<typename DType, typename RType> template<typename DType, typename RType>
struct WXQSummary : public WQSummary<DType, RType> { struct WXQSummary : public WQSummary<DType, RType> {
// redefine entry type // redefine entry type
typedef typename WQSummary<DType, RType>::Entry Entry; using Entry = typename WQSummary<DType, RType>::Entry;
// constructor // constructor
WXQSummary(Entry *data, size_t size) WXQSummary(Entry *data, size_t size)
: WQSummary<DType, RType>(data, size) {} : WQSummary<DType, RType>(data, size) {}
// check if the block is large chunk // check if the block is large chunk
inline static bool CheckLarge(const Entry &e, RType chunk) { inline static bool CheckLarge(const Entry &e, RType chunk) {
return e.rmin_next() > e.rmax_prev() + chunk; return e.RMinNext() > e.RMaxPrev() + chunk;
} }
// set prune // set prune
inline void SetPrune(const WQSummary<DType, RType> &src, size_t maxsize) { inline void SetPrune(const WQSummary<DType, RType> &src, size_t maxsize) {
@ -377,13 +377,13 @@ struct WXQSummary : public WQSummary<DType, RType> {
if (CheckLarge(src.data[i], chunk)) { if (CheckLarge(src.data[i], chunk)) {
if (bid != i - 1) { if (bid != i - 1) {
// accumulate the range of the rest points // accumulate the range of the rest points
mrange += src.data[i].rmax_prev() - src.data[bid].rmin_next(); mrange += src.data[i].RMaxPrev() - src.data[bid].RMinNext();
} }
bid = i; ++nbig; bid = i; ++nbig;
} }
} }
if (bid != src.size - 2) { if (bid != src.size - 2) {
mrange += src.data[src.size-1].rmax_prev() - src.data[bid].rmin_next(); mrange += src.data[src.size-1].RMaxPrev() - src.data[bid].RMinNext();
} }
} }
// assert: there cannot be more than n big data points // assert: there cannot be more than n big data points
@ -405,14 +405,14 @@ struct WXQSummary : public WQSummary<DType, RType> {
if (end == src.size - 1 || CheckLarge(src.data[end], chunk)) { if (end == src.size - 1 || CheckLarge(src.data[end], chunk)) {
if (bid != end - 1) { if (bid != end - 1) {
size_t i = bid; size_t i = bid;
RType maxdx2 = src.data[end].rmax_prev() * 2; RType maxdx2 = src.data[end].RMaxPrev() * 2;
for (; k < n; ++k) { for (; k < n; ++k) {
RType dx2 = 2 * ((k * mrange) / n + begin); RType dx2 = 2 * ((k * mrange) / n + begin);
if (dx2 >= maxdx2) break; if (dx2 >= maxdx2) break;
while (i < end && while (i < end &&
dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i; dx2 >= src.data[i + 1].rmax + src.data[i + 1].rmin) ++i;
if (i == end) break; if (i == end) break;
if (dx2 < src.data[i].rmin_next() + src.data[i + 1].rmax_prev()) { if (dx2 < src.data[i].RMinNext() + src.data[i + 1].RMaxPrev()) {
if (i != lastidx) { if (i != lastidx) {
this->data[this->size++] = src.data[i]; lastidx = i; this->data[this->size++] = src.data[i]; lastidx = i;
} }
@ -429,7 +429,7 @@ struct WXQSummary : public WQSummary<DType, RType> {
} }
bid = end; bid = end;
// shift base by the gap // shift base by the gap
begin += src.data[bid].rmin_next() - src.data[bid].rmax_prev(); begin += src.data[bid].RMinNext() - src.data[bid].RMaxPrev();
} }
} }
} }
@ -448,7 +448,7 @@ struct GKSummary {
/*! \brief the value of data */ /*! \brief the value of data */
DType value; DType value;
// constructor // constructor
Entry() {} Entry() = default;
// constructor // constructor
Entry(RType rmin, RType rmax, DType value) Entry(RType rmin, RType rmax, DType value)
: rmin(rmin), rmax(rmax), value(value) {} : rmin(rmin), rmax(rmax), value(value) {}
@ -591,17 +591,17 @@ template<typename DType, typename RType, class TSummary>
class QuantileSketchTemplate { class QuantileSketchTemplate {
public: public:
/*! \brief type of summary type */ /*! \brief type of summary type */
typedef TSummary Summary; using Summary = TSummary;
/*! \brief the entry type */ /*! \brief the entry type */
typedef typename Summary::Entry Entry; using Entry = typename Summary::Entry;
/*! \brief same as summary, but use STL to backup the space */ /*! \brief same as summary, but use STL to backup the space */
struct SummaryContainer : public Summary { struct SummaryContainer : public Summary {
std::vector<Entry> space; std::vector<Entry> space;
SummaryContainer(const SummaryContainer &src) : Summary(NULL, src.size) { SummaryContainer(const SummaryContainer &src) : Summary(nullptr, src.size) {
this->space = src.space; this->space = src.space;
this->data = dmlc::BeginPtr(this->space); this->data = dmlc::BeginPtr(this->space);
} }
SummaryContainer() : Summary(NULL, 0) { SummaryContainer() : Summary(nullptr, 0) {
} }
/*! \brief reserve space for summary */ /*! \brief reserve space for summary */
inline void Reserve(size_t size) { inline void Reserve(size_t size) {
@ -775,7 +775,7 @@ class QuantileSketchTemplate {
inline void InitLevel(size_t nlevel) { inline void InitLevel(size_t nlevel) {
if (level.size() >= nlevel) return; if (level.size() >= nlevel) return;
data.resize(limit_size * nlevel); data.resize(limit_size * nlevel);
level.resize(nlevel, Summary(NULL, 0)); level.resize(nlevel, Summary(nullptr, 0));
for (size_t l = 0; l < level.size(); ++l) { for (size_t l = 0; l < level.size(); ++l) {
level[l].data = dmlc::BeginPtr(data) + l * limit_size; level[l].data = dmlc::BeginPtr(data) + l * limit_size;
} }

View File

@ -15,7 +15,7 @@ namespace common {
/*! /*!
* \brief Define mt19937 as default type Random Engine. * \brief Define mt19937 as default type Random Engine.
*/ */
typedef std::mt19937 RandomEngine; using RandomEngine = std::mt19937;
#if XGBOOST_CUSTOMIZE_GLOBAL_PRNG #if XGBOOST_CUSTOMIZE_GLOBAL_PRNG
/*! /*!
@ -56,7 +56,7 @@ typedef CustomGlobalRandomEngine GlobalRandomEngine;
/*! /*!
* \brief global random engine * \brief global random engine
*/ */
typedef RandomEngine GlobalRandomEngine; using GlobalRandomEngine = RandomEngine;
#endif #endif
/*! /*!

View File

@ -21,18 +21,18 @@ class RowSetCollection {
* rows (instances) associated with a particular node in a decision * rows (instances) associated with a particular node in a decision
* tree. */ * tree. */
struct Elem { struct Elem {
const size_t* begin; const size_t* begin{nullptr};
const size_t* end; const size_t* end{nullptr};
int node_id; int node_id{-1};
// id of node associated with this instance set; -1 means uninitialized // id of node associated with this instance set; -1 means uninitialized
Elem(void) Elem()
: begin(nullptr), end(nullptr), node_id(-1) {} = default;
Elem(const size_t* begin, Elem(const size_t* begin,
const size_t* end, const size_t* end,
int node_id) int node_id)
: begin(begin), end(end), node_id(node_id) {} : begin(begin), end(end), node_id(node_id) {}
inline size_t size() const { inline size_t Size() const {
return end - begin; return end - begin;
} }
}; };
@ -42,11 +42,11 @@ class RowSetCollection {
std::vector<size_t> right; std::vector<size_t> right;
}; };
inline std::vector<Elem>::const_iterator begin() const { inline std::vector<Elem>::const_iterator begin() const { // NOLINT
return elem_of_each_node_.begin(); return elem_of_each_node_.begin();
} }
inline std::vector<Elem>::const_iterator end() const { inline std::vector<Elem>::const_iterator end() const { // NOLINT
return elem_of_each_node_.end(); return elem_of_each_node_.end();
} }
@ -88,7 +88,7 @@ class RowSetCollection {
unsigned left_node_id, unsigned left_node_id,
unsigned right_node_id) { unsigned right_node_id) {
const Elem e = elem_of_each_node_[node_id]; const Elem e = elem_of_each_node_[node_id];
const bst_omp_uint nthread = static_cast<bst_omp_uint>(row_split_tloc.size()); const auto nthread = static_cast<bst_omp_uint>(row_split_tloc.size());
CHECK(e.begin != nullptr); CHECK(e.begin != nullptr);
size_t* all_begin = dmlc::BeginPtr(row_indices_); size_t* all_begin = dmlc::BeginPtr(row_indices_);
size_t* begin = all_begin + (e.begin - all_begin); size_t* begin = all_begin + (e.begin - all_begin);

View File

@ -12,10 +12,10 @@
namespace xgboost { namespace xgboost {
namespace common { namespace common {
struct Timer { struct Timer {
typedef std::chrono::high_resolution_clock ClockT; using ClockT = std::chrono::high_resolution_clock;
typedef std::chrono::high_resolution_clock::time_point TimePointT; using TimePointT = std::chrono::high_resolution_clock::time_point;
typedef std::chrono::high_resolution_clock::duration DurationT; using DurationT = std::chrono::high_resolution_clock::duration;
typedef std::chrono::duration<double> SecondsT; using SecondsT = std::chrono::duration<double>;
TimePointT start; TimePointT start;
DurationT elapsed; DurationT elapsed;
@ -70,7 +70,7 @@ struct Monitor {
if (debug_verbose) { if (debug_verbose) {
#ifdef __CUDACC__ #ifdef __CUDACC__
#include "device_helpers.cuh" #include "device_helpers.cuh"
dh::synchronize_n_devices(dList.size(), dList); dh::SynchronizeNDevices(dList.size(), dList);
#endif #endif
} }
timer_map[name].Start(); timer_map[name].Start();
@ -80,7 +80,7 @@ struct Monitor {
if (debug_verbose) { if (debug_verbose) {
#ifdef __CUDACC__ #ifdef __CUDACC__
#include "device_helpers.cuh" #include "device_helpers.cuh"
dh::synchronize_n_devices(dList.size(), dList); dh::SynchronizeNDevices(dList.size(), dList);
#endif #endif
} }
timer_map[name].Stop(); timer_map[name].Stop();

View File

@ -24,51 +24,51 @@ DMLC_REGISTRY_ENABLE(::xgboost::data::SparsePageFormatReg);
namespace xgboost { namespace xgboost {
// implementation of inline functions // implementation of inline functions
void MetaInfo::Clear() { void MetaInfo::Clear() {
num_row = num_col = num_nonzero = 0; num_row_ = num_col_ = num_nonzero_ = 0;
labels.clear(); labels_.clear();
root_index.clear(); root_index_.clear();
group_ptr.clear(); group_ptr_.clear();
weights.clear(); weights_.clear();
base_margin.clear(); base_margin_.clear();
} }
void MetaInfo::SaveBinary(dmlc::Stream *fo) const { void MetaInfo::SaveBinary(dmlc::Stream *fo) const {
int32_t version = kVersion; int32_t version = kVersion;
fo->Write(&version, sizeof(version)); fo->Write(&version, sizeof(version));
fo->Write(&num_row, sizeof(num_row)); fo->Write(&num_row_, sizeof(num_row_));
fo->Write(&num_col, sizeof(num_col)); fo->Write(&num_col_, sizeof(num_col_));
fo->Write(&num_nonzero, sizeof(num_nonzero)); fo->Write(&num_nonzero_, sizeof(num_nonzero_));
fo->Write(labels); fo->Write(labels_);
fo->Write(group_ptr); fo->Write(group_ptr_);
fo->Write(weights); fo->Write(weights_);
fo->Write(root_index); fo->Write(root_index_);
fo->Write(base_margin); fo->Write(base_margin_);
} }
void MetaInfo::LoadBinary(dmlc::Stream *fi) { void MetaInfo::LoadBinary(dmlc::Stream *fi) {
int version; int version;
CHECK(fi->Read(&version, sizeof(version)) == sizeof(version)) << "MetaInfo: invalid version"; CHECK(fi->Read(&version, sizeof(version)) == sizeof(version)) << "MetaInfo: invalid version";
CHECK_EQ(version, kVersion) << "MetaInfo: invalid format"; CHECK_EQ(version, kVersion) << "MetaInfo: invalid format";
CHECK(fi->Read(&num_row, sizeof(num_row)) == sizeof(num_row)) << "MetaInfo: invalid format"; CHECK(fi->Read(&num_row_, sizeof(num_row_)) == sizeof(num_row_)) << "MetaInfo: invalid format";
CHECK(fi->Read(&num_col, sizeof(num_col)) == sizeof(num_col)) << "MetaInfo: invalid format"; CHECK(fi->Read(&num_col_, sizeof(num_col_)) == sizeof(num_col_)) << "MetaInfo: invalid format";
CHECK(fi->Read(&num_nonzero, sizeof(num_nonzero)) == sizeof(num_nonzero)) CHECK(fi->Read(&num_nonzero_, sizeof(num_nonzero_)) == sizeof(num_nonzero_))
<< "MetaInfo: invalid format"; << "MetaInfo: invalid format";
CHECK(fi->Read(&labels)) << "MetaInfo: invalid format"; CHECK(fi->Read(&labels_)) << "MetaInfo: invalid format";
CHECK(fi->Read(&group_ptr)) << "MetaInfo: invalid format"; CHECK(fi->Read(&group_ptr_)) << "MetaInfo: invalid format";
CHECK(fi->Read(&weights)) << "MetaInfo: invalid format"; CHECK(fi->Read(&weights_)) << "MetaInfo: invalid format";
CHECK(fi->Read(&root_index)) << "MetaInfo: invalid format"; CHECK(fi->Read(&root_index_)) << "MetaInfo: invalid format";
CHECK(fi->Read(&base_margin)) << "MetaInfo: invalid format"; CHECK(fi->Read(&base_margin_)) << "MetaInfo: invalid format";
} }
// try to load group information from file, if exists // try to load group information from file, if exists
inline bool MetaTryLoadGroup(const std::string& fname, inline bool MetaTryLoadGroup(const std::string& fname,
std::vector<unsigned>* group) { std::vector<unsigned>* group) {
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r", true)); std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r", true));
if (fi.get() == nullptr) return false; if (fi == nullptr) return false;
dmlc::istream is(fi.get()); dmlc::istream is(fi.get());
group->clear(); group->clear();
group->push_back(0); group->push_back(0);
unsigned nline; unsigned nline = 0;
while (is >> nline) { while (is >> nline) {
group->push_back(group->back() + nline); group->push_back(group->back() + nline);
} }
@ -79,7 +79,7 @@ inline bool MetaTryLoadGroup(const std::string& fname,
inline bool MetaTryLoadFloatInfo(const std::string& fname, inline bool MetaTryLoadFloatInfo(const std::string& fname,
std::vector<bst_float>* data) { std::vector<bst_float>* data) {
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r", true)); std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r", true));
if (fi.get() == nullptr) return false; if (fi == nullptr) return false;
dmlc::istream is(fi.get()); dmlc::istream is(fi.get());
data->clear(); data->clear();
bst_float value; bst_float value;
@ -93,16 +93,16 @@ inline bool MetaTryLoadFloatInfo(const std::string& fname,
#define DISPATCH_CONST_PTR(dtype, old_ptr, cast_ptr, proc) \ #define DISPATCH_CONST_PTR(dtype, old_ptr, cast_ptr, proc) \
switch (dtype) { \ switch (dtype) { \
case kFloat32: { \ case kFloat32: { \
const float* cast_ptr = reinterpret_cast<const float*>(old_ptr); proc; break; \ auto cast_ptr = reinterpret_cast<const float*>(old_ptr); proc; break; \
} \ } \
case kDouble: { \ case kDouble: { \
const double* cast_ptr = reinterpret_cast<const double*>(old_ptr); proc; break; \ auto cast_ptr = reinterpret_cast<const double*>(old_ptr); proc; break; \
} \ } \
case kUInt32: { \ case kUInt32: { \
const uint32_t* cast_ptr = reinterpret_cast<const uint32_t*>(old_ptr); proc; break; \ auto cast_ptr = reinterpret_cast<const uint32_t*>(old_ptr); proc; break; \
} \ } \
case kUInt64: { \ case kUInt64: { \
const uint64_t* cast_ptr = reinterpret_cast<const uint64_t*>(old_ptr); proc; break; \ auto cast_ptr = reinterpret_cast<const uint64_t*>(old_ptr); proc; break; \
} \ } \
default: LOG(FATAL) << "Unknown data type" << dtype; \ default: LOG(FATAL) << "Unknown data type" << dtype; \
} \ } \
@ -110,28 +110,28 @@ inline bool MetaTryLoadFloatInfo(const std::string& fname,
void MetaInfo::SetInfo(const char* key, const void* dptr, DataType dtype, size_t num) { void MetaInfo::SetInfo(const char* key, const void* dptr, DataType dtype, size_t num) {
if (!std::strcmp(key, "root_index")) { if (!std::strcmp(key, "root_index")) {
root_index.resize(num); root_index_.resize(num);
DISPATCH_CONST_PTR(dtype, dptr, cast_dptr, DISPATCH_CONST_PTR(dtype, dptr, cast_dptr,
std::copy(cast_dptr, cast_dptr + num, root_index.begin())); std::copy(cast_dptr, cast_dptr + num, root_index_.begin()));
} else if (!std::strcmp(key, "label")) { } else if (!std::strcmp(key, "label")) {
labels.resize(num); labels_.resize(num);
DISPATCH_CONST_PTR(dtype, dptr, cast_dptr, DISPATCH_CONST_PTR(dtype, dptr, cast_dptr,
std::copy(cast_dptr, cast_dptr + num, labels.begin())); std::copy(cast_dptr, cast_dptr + num, labels_.begin()));
} else if (!std::strcmp(key, "weight")) { } else if (!std::strcmp(key, "weight")) {
weights.resize(num); weights_.resize(num);
DISPATCH_CONST_PTR(dtype, dptr, cast_dptr, DISPATCH_CONST_PTR(dtype, dptr, cast_dptr,
std::copy(cast_dptr, cast_dptr + num, weights.begin())); std::copy(cast_dptr, cast_dptr + num, weights_.begin()));
} else if (!std::strcmp(key, "base_margin")) { } else if (!std::strcmp(key, "base_margin")) {
base_margin.resize(num); base_margin_.resize(num);
DISPATCH_CONST_PTR(dtype, dptr, cast_dptr, DISPATCH_CONST_PTR(dtype, dptr, cast_dptr,
std::copy(cast_dptr, cast_dptr + num, base_margin.begin())); std::copy(cast_dptr, cast_dptr + num, base_margin_.begin()));
} else if (!std::strcmp(key, "group")) { } else if (!std::strcmp(key, "group")) {
group_ptr.resize(num + 1); group_ptr_.resize(num + 1);
DISPATCH_CONST_PTR(dtype, dptr, cast_dptr, DISPATCH_CONST_PTR(dtype, dptr, cast_dptr,
std::copy(cast_dptr, cast_dptr + num, group_ptr.begin() + 1)); std::copy(cast_dptr, cast_dptr + num, group_ptr_.begin() + 1));
group_ptr[0] = 0; group_ptr_[0] = 0;
for (size_t i = 1; i < group_ptr.size(); ++i) { for (size_t i = 1; i < group_ptr_.size(); ++i) {
group_ptr[i] = group_ptr[i - 1] + group_ptr[i]; group_ptr_[i] = group_ptr_[i - 1] + group_ptr_[i];
} }
} }
} }
@ -163,7 +163,9 @@ DMatrix* DMatrix::Load(const std::string& uri,
<< "-" << rabit::GetWorldSize() << "-" << rabit::GetWorldSize()
<< cache_shards[i].substr(pos, cache_shards[i].length()); << cache_shards[i].substr(pos, cache_shards[i].length());
} }
if (i + 1 != cache_shards.size()) os << ':'; if (i + 1 != cache_shards.size()) {
os << ':';
}
} }
cache_file = os.str(); cache_file = os.str();
} }
@ -187,7 +189,7 @@ DMatrix* DMatrix::Load(const std::string& uri,
if (file_format == "auto" && npart == 1) { if (file_format == "auto" && npart == 1) {
int magic; int magic;
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r", true)); std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r", true));
if (fi.get() != nullptr) { if (fi != nullptr) {
common::PeekableInStream is(fi.get()); common::PeekableInStream is(fi.get());
if (is.PeekRead(&magic, sizeof(magic)) == sizeof(magic) && if (is.PeekRead(&magic, sizeof(magic)) == sizeof(magic) &&
magic == data::SimpleCSRSource::kMagic) { magic == data::SimpleCSRSource::kMagic) {
@ -195,8 +197,8 @@ DMatrix* DMatrix::Load(const std::string& uri,
source->LoadBinary(&is); source->LoadBinary(&is);
DMatrix* dmat = DMatrix::Create(std::move(source), cache_file); DMatrix* dmat = DMatrix::Create(std::move(source), cache_file);
if (!silent) { if (!silent) {
LOG(CONSOLE) << dmat->info().num_row << 'x' << dmat->info().num_col << " matrix with " LOG(CONSOLE) << dmat->Info().num_row_ << 'x' << dmat->Info().num_col_ << " matrix with "
<< dmat->info().num_nonzero << " entries loaded from " << uri; << dmat->Info().num_nonzero_ << " entries loaded from " << uri;
} }
return dmat; return dmat;
} }
@ -207,26 +209,26 @@ DMatrix* DMatrix::Load(const std::string& uri,
dmlc::Parser<uint32_t>::Create(fname.c_str(), partid, npart, file_format.c_str())); dmlc::Parser<uint32_t>::Create(fname.c_str(), partid, npart, file_format.c_str()));
DMatrix* dmat = DMatrix::Create(parser.get(), cache_file); DMatrix* dmat = DMatrix::Create(parser.get(), cache_file);
if (!silent) { if (!silent) {
LOG(CONSOLE) << dmat->info().num_row << 'x' << dmat->info().num_col << " matrix with " LOG(CONSOLE) << dmat->Info().num_row_ << 'x' << dmat->Info().num_col_ << " matrix with "
<< dmat->info().num_nonzero << " entries loaded from " << uri; << dmat->Info().num_nonzero_ << " entries loaded from " << uri;
} }
/* sync up number of features after matrix loaded. /* sync up number of features after matrix loaded.
* partitioned data will fail the train/val validation check * partitioned data will fail the train/val validation check
* since partitioned data not knowing the real number of features. */ * since partitioned data not knowing the real number of features. */
rabit::Allreduce<rabit::op::Max>(&dmat->info().num_col, 1); rabit::Allreduce<rabit::op::Max>(&dmat->Info().num_col_, 1);
// backward compatiblity code. // backward compatiblity code.
if (!load_row_split) { if (!load_row_split) {
MetaInfo& info = dmat->info(); MetaInfo& info = dmat->Info();
if (MetaTryLoadGroup(fname + ".group", &info.group_ptr) && !silent) { if (MetaTryLoadGroup(fname + ".group", &info.group_ptr_) && !silent) {
LOG(CONSOLE) << info.group_ptr.size() - 1 LOG(CONSOLE) << info.group_ptr_.size() - 1
<< " groups are loaded from " << fname << ".group"; << " groups are loaded from " << fname << ".group";
} }
if (MetaTryLoadFloatInfo(fname + ".base_margin", &info.base_margin) && !silent) { if (MetaTryLoadFloatInfo(fname + ".base_margin", &info.base_margin_) && !silent) {
LOG(CONSOLE) << info.base_margin.size() LOG(CONSOLE) << info.base_margin_.size()
<< " base_margin are loaded from " << fname << ".base_margin"; << " base_margin are loaded from " << fname << ".base_margin";
} }
if (MetaTryLoadFloatInfo(fname + ".weight", &info.weights) && !silent) { if (MetaTryLoadFloatInfo(fname + ".weight", &info.weights_) && !silent) {
LOG(CONSOLE) << info.weights.size() LOG(CONSOLE) << info.weights_.size()
<< " weights are loaded from " << fname << ".weight"; << " weights are loaded from " << fname << ".weight";
} }
} }

View File

@ -18,7 +18,7 @@ void SimpleCSRSource::Clear() {
void SimpleCSRSource::CopyFrom(DMatrix* src) { void SimpleCSRSource::CopyFrom(DMatrix* src) {
this->Clear(); this->Clear();
this->info = src->info(); this->info = src->Info();
dmlc::DataIter<RowBatch>* iter = src->RowIterator(); dmlc::DataIter<RowBatch>* iter = src->RowIterator();
iter->BeforeFirst(); iter->BeforeFirst();
while (iter->Next()) { while (iter->Next()) {
@ -36,10 +36,10 @@ void SimpleCSRSource::CopyFrom(dmlc::Parser<uint32_t>* parser) {
while (parser->Next()) { while (parser->Next()) {
const dmlc::RowBlock<uint32_t>& batch = parser->Value(); const dmlc::RowBlock<uint32_t>& batch = parser->Value();
if (batch.label != nullptr) { if (batch.label != nullptr) {
info.labels.insert(info.labels.end(), batch.label, batch.label + batch.size); info.labels_.insert(info.labels_.end(), batch.label, batch.label + batch.size);
} }
if (batch.weight != nullptr) { if (batch.weight != nullptr) {
info.weights.insert(info.weights.end(), batch.weight, batch.weight + batch.size); info.weights_.insert(info.weights_.end(), batch.weight, batch.weight + batch.size);
} }
// Remove the assertion on batch.index, which can be null in the case that the data in this // Remove the assertion on batch.index, which can be null in the case that the data in this
// batch is entirely sparse. Although it's true that this indicates a likely issue with the // batch is entirely sparse. Although it's true that this indicates a likely issue with the
@ -48,13 +48,13 @@ void SimpleCSRSource::CopyFrom(dmlc::Parser<uint32_t>* parser) {
// CHECK(batch.index != nullptr); // CHECK(batch.index != nullptr);
// update information // update information
this->info.num_row += batch.size; this->info.num_row_ += batch.size;
// copy the data over // copy the data over
for (size_t i = batch.offset[0]; i < batch.offset[batch.size]; ++i) { for (size_t i = batch.offset[0]; i < batch.offset[batch.size]; ++i) {
uint32_t index = batch.index[i]; uint32_t index = batch.index[i];
bst_float fvalue = batch.value == nullptr ? 1.0f : batch.value[i]; bst_float fvalue = batch.value == nullptr ? 1.0f : batch.value[i];
row_data_.push_back(SparseBatch::Entry(index, fvalue)); row_data_.emplace_back(index, fvalue);
this->info.num_col = std::max(this->info.num_col, this->info.num_col_ = std::max(this->info.num_col_,
static_cast<uint64_t>(index + 1)); static_cast<uint64_t>(index + 1));
} }
size_t top = row_ptr_.size(); size_t top = row_ptr_.size();
@ -62,7 +62,7 @@ void SimpleCSRSource::CopyFrom(dmlc::Parser<uint32_t>* parser) {
row_ptr_.push_back(row_ptr_[top - 1] + batch.offset[i + 1] - batch.offset[0]); row_ptr_.push_back(row_ptr_[top - 1] + batch.offset[i + 1] - batch.offset[0]);
} }
} }
this->info.num_nonzero = static_cast<uint64_t>(row_data_.size()); this->info.num_nonzero_ = static_cast<uint64_t>(row_data_.size());
} }
void SimpleCSRSource::LoadBinary(dmlc::Stream* fi) { void SimpleCSRSource::LoadBinary(dmlc::Stream* fi) {

View File

@ -35,9 +35,9 @@ class SimpleCSRSource : public DataSource {
std::vector<RowBatch::Entry> row_data_; std::vector<RowBatch::Entry> row_data_;
// functions // functions
/*! \brief default constructor */ /*! \brief default constructor */
SimpleCSRSource() : row_ptr_(1, 0), at_first_(true) {} SimpleCSRSource() : row_ptr_(1, 0) {}
/*! \brief destructor */ /*! \brief destructor */
virtual ~SimpleCSRSource() {} ~SimpleCSRSource() override = default;
/*! \brief clear the data structure */ /*! \brief clear the data structure */
void Clear(); void Clear();
/*! /*!
@ -72,7 +72,7 @@ class SimpleCSRSource : public DataSource {
private: private:
/*! \brief internal variable, used to support iterator interface */ /*! \brief internal variable, used to support iterator interface */
bool at_first_; bool at_first_{true};
/*! \brief */ /*! \brief */
RowBatch batch_; RowBatch batch_;
}; };

View File

@ -20,7 +20,7 @@ bool SimpleDMatrix::ColBatchIter::Next() {
data_ptr_ += 1; data_ptr_ += 1;
SparsePage* pcol = cpages_[data_ptr_ - 1].get(); SparsePage* pcol = cpages_[data_ptr_ - 1].get();
batch_.size = col_index_.size(); batch_.size = col_index_.size();
col_data_.resize(col_index_.size(), SparseBatch::Inst(NULL, 0)); col_data_.resize(col_index_.size(), SparseBatch::Inst(nullptr, 0));
for (size_t i = 0; i < col_data_.size(); ++i) { for (size_t i = 0; i < col_data_.size(); ++i) {
const bst_uint ridx = col_index_[i]; const bst_uint ridx = col_index_[i];
col_data_[i] = SparseBatch::Inst col_data_[i] = SparseBatch::Inst
@ -33,7 +33,7 @@ bool SimpleDMatrix::ColBatchIter::Next() {
} }
dmlc::DataIter<ColBatch>* SimpleDMatrix::ColIterator() { dmlc::DataIter<ColBatch>* SimpleDMatrix::ColIterator() {
size_t ncol = this->info().num_col; size_t ncol = this->Info().num_col_;
col_iter_.col_index_.resize(ncol); col_iter_.col_index_.resize(ncol);
for (size_t i = 0; i < ncol; ++i) { for (size_t i = 0; i < ncol; ++i) {
col_iter_.col_index_[i] = static_cast<bst_uint>(i); col_iter_.col_index_[i] = static_cast<bst_uint>(i);
@ -43,10 +43,10 @@ dmlc::DataIter<ColBatch>* SimpleDMatrix::ColIterator() {
} }
dmlc::DataIter<ColBatch>* SimpleDMatrix::ColIterator(const std::vector<bst_uint>&fset) { dmlc::DataIter<ColBatch>* SimpleDMatrix::ColIterator(const std::vector<bst_uint>&fset) {
size_t ncol = this->info().num_col; size_t ncol = this->Info().num_col_;
col_iter_.col_index_.resize(0); col_iter_.col_index_.resize(0);
for (size_t i = 0; i < fset.size(); ++i) { for (auto fidx : fset) {
if (fset[i] < ncol) col_iter_.col_index_.push_back(fset[i]); if (fidx < ncol) col_iter_.col_index_.push_back(fidx);
} }
col_iter_.BeforeFirst(); col_iter_.BeforeFirst();
return &col_iter_; return &col_iter_;
@ -56,9 +56,9 @@ void SimpleDMatrix::InitColAccess(const std::vector<bool> &enabled,
float pkeep, float pkeep,
size_t max_row_perbatch, bool sorted) { size_t max_row_perbatch, bool sorted) {
if (this->HaveColAccess(sorted)) return; if (this->HaveColAccess(sorted)) return;
col_iter_.sorted = sorted; col_iter_.sorted_ = sorted;
col_iter_.cpages_.clear(); col_iter_.cpages_.clear();
if (info().num_row < max_row_perbatch) { if (Info().num_row_ < max_row_perbatch) {
std::unique_ptr<SparsePage> page(new SparsePage()); std::unique_ptr<SparsePage> page(new SparsePage());
this->MakeOneBatch(enabled, pkeep, page.get(), sorted); this->MakeOneBatch(enabled, pkeep, page.get(), sorted);
col_iter_.cpages_.push_back(std::move(page)); col_iter_.cpages_.push_back(std::move(page));
@ -66,10 +66,10 @@ void SimpleDMatrix::InitColAccess(const std::vector<bool> &enabled,
this->MakeManyBatch(enabled, pkeep, max_row_perbatch, sorted); this->MakeManyBatch(enabled, pkeep, max_row_perbatch, sorted);
} }
// setup col-size // setup col-size
col_size_.resize(info().num_col); col_size_.resize(Info().num_col_);
std::fill(col_size_.begin(), col_size_.end(), 0); std::fill(col_size_.begin(), col_size_.end(), 0);
for (size_t i = 0; i < col_iter_.cpages_.size(); ++i) { for (auto & cpage : col_iter_.cpages_) {
SparsePage *pcol = col_iter_.cpages_[i].get(); SparsePage *pcol = cpage.get();
for (size_t j = 0; j < pcol->Size(); ++j) { for (size_t j = 0; j < pcol->Size(); ++j) {
col_size_[j] += pcol->offset[j + 1] - pcol->offset[j]; col_size_[j] += pcol->offset[j + 1] - pcol->offset[j];
} }
@ -80,14 +80,14 @@ void SimpleDMatrix::InitColAccess(const std::vector<bool> &enabled,
void SimpleDMatrix::MakeOneBatch(const std::vector<bool>& enabled, float pkeep, void SimpleDMatrix::MakeOneBatch(const std::vector<bool>& enabled, float pkeep,
SparsePage* pcol, bool sorted) { SparsePage* pcol, bool sorted) {
// clear rowset // clear rowset
buffered_rowset_.clear(); buffered_rowset_.Clear();
// bit map // bit map
const int nthread = omp_get_max_threads(); const int nthread = omp_get_max_threads();
std::vector<bool> bmap; std::vector<bool> bmap;
pcol->Clear(); pcol->Clear();
common::ParallelGroupBuilder<SparseBatch::Entry> common::ParallelGroupBuilder<SparseBatch::Entry>
builder(&pcol->offset, &pcol->data); builder(&pcol->offset, &pcol->data);
builder.InitBudget(info().num_col, nthread); builder.InitBudget(Info().num_col_, nthread);
// start working // start working
dmlc::DataIter<RowBatch>* iter = this->RowIterator(); dmlc::DataIter<RowBatch>* iter = this->RowIterator();
iter->BeforeFirst(); iter->BeforeFirst();
@ -99,9 +99,9 @@ void SimpleDMatrix::MakeOneBatch(const std::vector<bool>& enabled, float pkeep,
long batch_size = static_cast<long>(batch.size); // NOLINT(*) long batch_size = static_cast<long>(batch.size); // NOLINT(*)
for (long i = 0; i < batch_size; ++i) { // NOLINT(*) for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i); auto ridx = static_cast<bst_uint>(batch.base_rowid + i);
if (pkeep == 1.0f || coin_flip(rnd)) { if (pkeep == 1.0f || coin_flip(rnd)) {
buffered_rowset_.push_back(ridx); buffered_rowset_.PushBack(ridx);
} else { } else {
bmap[i] = false; bmap[i] = false;
} }
@ -109,7 +109,7 @@ void SimpleDMatrix::MakeOneBatch(const std::vector<bool>& enabled, float pkeep,
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (long i = 0; i < batch_size; ++i) { // NOLINT(*) for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
int tid = omp_get_thread_num(); int tid = omp_get_thread_num();
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i); auto ridx = static_cast<bst_uint>(batch.base_rowid + i);
if (bmap[ridx]) { if (bmap[ridx]) {
RowBatch::Inst inst = batch[i]; RowBatch::Inst inst = batch[i];
for (bst_uint j = 0; j < inst.length; ++j) { for (bst_uint j = 0; j < inst.length; ++j) {
@ -128,13 +128,13 @@ void SimpleDMatrix::MakeOneBatch(const std::vector<bool>& enabled, float pkeep,
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (long i = 0; i < static_cast<long>(batch.size); ++i) { // NOLINT(*) for (long i = 0; i < static_cast<long>(batch.size); ++i) { // NOLINT(*)
int tid = omp_get_thread_num(); int tid = omp_get_thread_num();
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i); auto ridx = static_cast<bst_uint>(batch.base_rowid + i);
if (bmap[ridx]) { if (bmap[ridx]) {
RowBatch::Inst inst = batch[i]; RowBatch::Inst inst = batch[i];
for (bst_uint j = 0; j < inst.length; ++j) { for (bst_uint j = 0; j < inst.length; ++j) {
if (enabled[inst[j].index]) { if (enabled[inst[j].index]) {
builder.Push(inst[j].index, builder.Push(inst[j].index,
SparseBatch::Entry((bst_uint)(batch.base_rowid+i), SparseBatch::Entry(static_cast<bst_uint>(batch.base_rowid+i),
inst[j].fvalue), tid); inst[j].fvalue), tid);
} }
} }
@ -142,11 +142,11 @@ void SimpleDMatrix::MakeOneBatch(const std::vector<bool>& enabled, float pkeep,
} }
} }
CHECK_EQ(pcol->Size(), info().num_col); CHECK_EQ(pcol->Size(), Info().num_col_);
if (sorted) { if (sorted) {
// sort columns // sort columns
bst_omp_uint ncol = static_cast<bst_omp_uint>(pcol->Size()); auto ncol = static_cast<bst_omp_uint>(pcol->Size());
#pragma omp parallel for schedule(dynamic, 1) num_threads(nthread) #pragma omp parallel for schedule(dynamic, 1) num_threads(nthread)
for (bst_omp_uint i = 0; i < ncol; ++i) { for (bst_omp_uint i = 0; i < ncol; ++i) {
if (pcol->offset[i] < pcol->offset[i + 1]) { if (pcol->offset[i] < pcol->offset[i + 1]) {
@ -164,7 +164,7 @@ void SimpleDMatrix::MakeManyBatch(const std::vector<bool>& enabled,
size_t btop = 0; size_t btop = 0;
std::bernoulli_distribution coin_flip(pkeep); std::bernoulli_distribution coin_flip(pkeep);
auto& rnd = common::GlobalRandom(); auto& rnd = common::GlobalRandom();
buffered_rowset_.clear(); buffered_rowset_.Clear();
// internal temp cache // internal temp cache
SparsePage tmp; tmp.Clear(); SparsePage tmp; tmp.Clear();
// start working // start working
@ -174,16 +174,16 @@ void SimpleDMatrix::MakeManyBatch(const std::vector<bool>& enabled,
while (iter->Next()) { while (iter->Next()) {
const RowBatch &batch = iter->Value(); const RowBatch &batch = iter->Value();
for (size_t i = 0; i < batch.size; ++i) { for (size_t i = 0; i < batch.size; ++i) {
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i); auto ridx = static_cast<bst_uint>(batch.base_rowid + i);
if (pkeep == 1.0f || coin_flip(rnd)) { if (pkeep == 1.0f || coin_flip(rnd)) {
buffered_rowset_.push_back(ridx); buffered_rowset_.PushBack(ridx);
tmp.Push(batch[i]); tmp.Push(batch[i]);
} }
if (tmp.Size() >= max_row_perbatch) { if (tmp.Size() >= max_row_perbatch) {
std::unique_ptr<SparsePage> page(new SparsePage()); std::unique_ptr<SparsePage> page(new SparsePage());
this->MakeColPage(tmp.GetRowBatch(0), btop, enabled, page.get(), sorted); this->MakeColPage(tmp.GetRowBatch(0), btop, enabled, page.get(), sorted);
col_iter_.cpages_.push_back(std::move(page)); col_iter_.cpages_.push_back(std::move(page));
btop = buffered_rowset_.size(); btop = buffered_rowset_.Size();
tmp.Clear(); tmp.Clear();
} }
} }
@ -205,7 +205,7 @@ void SimpleDMatrix::MakeColPage(const RowBatch& batch,
pcol->Clear(); pcol->Clear();
common::ParallelGroupBuilder<SparseBatch::Entry> common::ParallelGroupBuilder<SparseBatch::Entry>
builder(&pcol->offset, &pcol->data); builder(&pcol->offset, &pcol->data);
builder.InitBudget(info().num_col, nthread); builder.InitBudget(Info().num_col_, nthread);
bst_omp_uint ndata = static_cast<bst_uint>(batch.size); bst_omp_uint ndata = static_cast<bst_uint>(batch.size);
#pragma omp parallel for schedule(static) num_threads(nthread) #pragma omp parallel for schedule(static) num_threads(nthread)
for (bst_omp_uint i = 0; i < ndata; ++i) { for (bst_omp_uint i = 0; i < ndata; ++i) {
@ -231,10 +231,10 @@ void SimpleDMatrix::MakeColPage(const RowBatch& batch,
tid); tid);
} }
} }
CHECK_EQ(pcol->Size(), info().num_col); CHECK_EQ(pcol->Size(), Info().num_col_);
// sort columns // sort columns
if (sorted) { if (sorted) {
bst_omp_uint ncol = static_cast<bst_omp_uint>(pcol->Size()); auto ncol = static_cast<bst_omp_uint>(pcol->Size());
#pragma omp parallel for schedule(dynamic, 1) num_threads(nthread) #pragma omp parallel for schedule(dynamic, 1) num_threads(nthread)
for (bst_omp_uint i = 0; i < ncol; ++i) { for (bst_omp_uint i = 0; i < ncol; ++i) {
if (pcol->offset[i] < pcol->offset[i + 1]) { if (pcol->offset[i] < pcol->offset[i + 1]) {

View File

@ -22,11 +22,11 @@ class SimpleDMatrix : public DMatrix {
explicit SimpleDMatrix(std::unique_ptr<DataSource>&& source) explicit SimpleDMatrix(std::unique_ptr<DataSource>&& source)
: source_(std::move(source)) {} : source_(std::move(source)) {}
MetaInfo& info() override { MetaInfo& Info() override {
return source_->info; return source_->info;
} }
const MetaInfo& info() const override { const MetaInfo& Info() const override {
return source_->info; return source_->info;
} }
@ -37,10 +37,10 @@ class SimpleDMatrix : public DMatrix {
} }
bool HaveColAccess(bool sorted) const override { bool HaveColAccess(bool sorted) const override {
return col_size_.size() != 0 && col_iter_.sorted == sorted; return col_size_.size() != 0 && col_iter_.sorted_ == sorted;
} }
const RowSet& buffered_rowset() const override { const RowSet& BufferedRowset() const override {
return buffered_rowset_; return buffered_rowset_;
} }
@ -49,8 +49,8 @@ class SimpleDMatrix : public DMatrix {
} }
float GetColDensity(size_t cidx) const override { float GetColDensity(size_t cidx) const override {
size_t nmiss = buffered_rowset_.size() - col_size_[cidx]; size_t nmiss = buffered_rowset_.Size() - col_size_[cidx];
return 1.0f - (static_cast<float>(nmiss)) / buffered_rowset_.size(); return 1.0f - (static_cast<float>(nmiss)) / buffered_rowset_.Size();
} }
dmlc::DataIter<ColBatch>* ColIterator() override; dmlc::DataIter<ColBatch>* ColIterator() override;
@ -67,7 +67,7 @@ class SimpleDMatrix : public DMatrix {
// in-memory column batch iterator. // in-memory column batch iterator.
struct ColBatchIter: dmlc::DataIter<ColBatch> { struct ColBatchIter: dmlc::DataIter<ColBatch> {
public: public:
ColBatchIter() : data_ptr_(0), sorted(false) {} ColBatchIter() = default;
void BeforeFirst() override { void BeforeFirst() override {
data_ptr_ = 0; data_ptr_ = 0;
} }
@ -86,11 +86,11 @@ class SimpleDMatrix : public DMatrix {
// column sparse pages // column sparse pages
std::vector<std::unique_ptr<SparsePage> > cpages_; std::vector<std::unique_ptr<SparsePage> > cpages_;
// data pointer // data pointer
size_t data_ptr_; size_t data_ptr_{0};
// temporal space for batch // temporal space for batch
ColBatch batch_; ColBatch batch_;
// Is column sorted? // Is column sorted?
bool sorted; bool sorted_{false};
}; };
// source data pointer. // source data pointer.

View File

@ -51,11 +51,11 @@ class SparsePage {
return offset.size() - 1; return offset.size() - 1;
} }
/*! \return estimation of memory cost of this page */ /*! \return estimation of memory cost of this page */
inline size_t MemCostBytes(void) const { inline size_t MemCostBytes() const {
return offset.size() * sizeof(size_t) + data.size() * sizeof(SparseBatch::Entry); return offset.size() * sizeof(size_t) + data.size() * sizeof(SparseBatch::Entry);
} }
/*! \brief clear the page */ /*! \brief clear the page */
inline void Clear(void) { inline void Clear() {
min_index = 0; min_index = 0;
offset.clear(); offset.clear();
offset.push_back(0); offset.push_back(0);
@ -92,7 +92,7 @@ class SparsePage {
for (size_t i = batch.offset[0]; i < batch.offset[batch.size]; ++i) { for (size_t i = batch.offset[0]; i < batch.offset[batch.size]; ++i) {
uint32_t index = batch.index[i]; uint32_t index = batch.index[i];
bst_float fvalue = batch.value == nullptr ? 1.0f : batch.value[i]; bst_float fvalue = batch.value == nullptr ? 1.0f : batch.value[i];
data.push_back(SparseBatch::Entry(index, fvalue)); data.emplace_back(index, fvalue);
} }
CHECK_EQ(offset.back(), data.size()); CHECK_EQ(offset.back(), data.size());
} }
@ -145,7 +145,7 @@ class SparsePage {
class SparsePage::Format { class SparsePage::Format {
public: public:
/*! \brief virtual destructor */ /*! \brief virtual destructor */
virtual ~Format() {} virtual ~Format() = default;
/*! /*!
* \brief Load all the segments into page, advance fi to end of the block. * \brief Load all the segments into page, advance fi to end of the block.
* \param page The data to read page into. * \param page The data to read page into.

View File

@ -94,9 +94,9 @@ void SparsePageDMatrix::ColPageIter::Init(const std::vector<bst_uint>& index_set
} }
dmlc::DataIter<ColBatch>* SparsePageDMatrix::ColIterator() { dmlc::DataIter<ColBatch>* SparsePageDMatrix::ColIterator() {
CHECK(col_iter_.get() != nullptr); CHECK(col_iter_ != nullptr);
std::vector<bst_uint> col_index; std::vector<bst_uint> col_index;
size_t ncol = this->info().num_col; size_t ncol = this->Info().num_col_;
for (size_t i = 0; i < ncol; ++i) { for (size_t i = 0; i < ncol; ++i) {
col_index.push_back(static_cast<bst_uint>(i)); col_index.push_back(static_cast<bst_uint>(i));
} }
@ -106,12 +106,12 @@ dmlc::DataIter<ColBatch>* SparsePageDMatrix::ColIterator() {
dmlc::DataIter<ColBatch>* SparsePageDMatrix:: dmlc::DataIter<ColBatch>* SparsePageDMatrix::
ColIterator(const std::vector<bst_uint>& fset) { ColIterator(const std::vector<bst_uint>& fset) {
CHECK(col_iter_.get() != nullptr); CHECK(col_iter_ != nullptr);
std::vector<bst_uint> col_index; std::vector<bst_uint> col_index;
size_t ncol = this->info().num_col; size_t ncol = this->Info().num_col_;
for (size_t i = 0; i < fset.size(); ++i) { for (auto fidx : fset) {
if (fset[i] < ncol) { if (fidx < ncol) {
col_index.push_back(fset[i]); col_index.push_back(fidx);
} }
} }
col_iter_->Init(col_index, false); col_iter_->Init(col_index, false);
@ -126,7 +126,7 @@ bool SparsePageDMatrix::TryInitColData(bool sorted) {
std::string col_meta_name = cache_shards[0] + ".col.meta"; std::string col_meta_name = cache_shards[0] + ".col.meta";
std::unique_ptr<dmlc::Stream> fmeta( std::unique_ptr<dmlc::Stream> fmeta(
dmlc::Stream::Create(col_meta_name.c_str(), "r", true)); dmlc::Stream::Create(col_meta_name.c_str(), "r", true));
if (fmeta.get() == nullptr) return false; if (fmeta == nullptr) return false;
CHECK(fmeta->Read(&buffered_rowset_)) << "invalid col.meta file"; CHECK(fmeta->Read(&buffered_rowset_)) << "invalid col.meta file";
CHECK(fmeta->Read(&col_size_)) << "invalid col.meta file"; CHECK(fmeta->Read(&col_size_)) << "invalid col.meta file";
} }
@ -136,7 +136,7 @@ bool SparsePageDMatrix::TryInitColData(bool sorted) {
std::string col_data_name = prefix + ".col.page"; std::string col_data_name = prefix + ".col.page";
std::unique_ptr<dmlc::SeekStream> fdata( std::unique_ptr<dmlc::SeekStream> fdata(
dmlc::SeekStream::CreateForRead(col_data_name.c_str(), true)); dmlc::SeekStream::CreateForRead(col_data_name.c_str(), true));
if (fdata.get() == nullptr) return false; if (fdata == nullptr) return false;
files.push_back(std::move(fdata)); files.push_back(std::move(fdata));
} }
col_iter_.reset(new ColPageIter(std::move(files))); col_iter_.reset(new ColPageIter(std::move(files)));
@ -150,12 +150,12 @@ void SparsePageDMatrix::InitColAccess(const std::vector<bool>& enabled,
size_t max_row_perbatch, bool sorted) { size_t max_row_perbatch, bool sorted) {
if (HaveColAccess(sorted)) return; if (HaveColAccess(sorted)) return;
if (TryInitColData(sorted)) return; if (TryInitColData(sorted)) return;
const MetaInfo& info = this->info(); const MetaInfo& info = this->Info();
if (max_row_perbatch == std::numeric_limits<size_t>::max()) { if (max_row_perbatch == std::numeric_limits<size_t>::max()) {
max_row_perbatch = kMaxRowPerBatch; max_row_perbatch = kMaxRowPerBatch;
} }
buffered_rowset_.clear(); buffered_rowset_.Clear();
col_size_.resize(info.num_col); col_size_.resize(info.num_col_);
std::fill(col_size_.begin(), col_size_.end(), 0); std::fill(col_size_.begin(), col_size_.end(), 0);
dmlc::DataIter<RowBatch>* iter = this->RowIterator(); dmlc::DataIter<RowBatch>* iter = this->RowIterator();
std::bernoulli_distribution coin_flip(pkeep); std::bernoulli_distribution coin_flip(pkeep);
@ -173,7 +173,7 @@ void SparsePageDMatrix::InitColAccess(const std::vector<bool>& enabled,
const int nthread = std::max(omp_get_max_threads(), std::max(omp_get_num_procs() / 2 - 1, 1)); const int nthread = std::max(omp_get_max_threads(), std::max(omp_get_num_procs() / 2 - 1, 1));
common::ParallelGroupBuilder<SparseBatch::Entry> common::ParallelGroupBuilder<SparseBatch::Entry>
builder(&pcol->offset, &pcol->data); builder(&pcol->offset, &pcol->data);
builder.InitBudget(info.num_col, nthread); builder.InitBudget(info.num_col_, nthread);
bst_omp_uint ndata = static_cast<bst_uint>(prow.Size()); bst_omp_uint ndata = static_cast<bst_uint>(prow.Size());
#pragma omp parallel for schedule(static) num_threads(nthread) #pragma omp parallel for schedule(static) num_threads(nthread)
for (bst_omp_uint i = 0; i < ndata; ++i) { for (bst_omp_uint i = 0; i < ndata; ++i) {
@ -196,10 +196,10 @@ void SparsePageDMatrix::InitColAccess(const std::vector<bool>& enabled,
tid); tid);
} }
} }
CHECK_EQ(pcol->Size(), info.num_col); CHECK_EQ(pcol->Size(), info.num_col_);
// sort columns // sort columns
if (sorted) { if (sorted) {
bst_omp_uint ncol = static_cast<bst_omp_uint>(pcol->Size()); auto ncol = static_cast<bst_omp_uint>(pcol->Size());
#pragma omp parallel for schedule(dynamic, 1) num_threads(nthread) #pragma omp parallel for schedule(dynamic, 1) num_threads(nthread)
for (bst_omp_uint i = 0; i < ncol; ++i) { for (bst_omp_uint i = 0; i < ncol; ++i) {
if (pcol->offset[i] < pcol->offset[i + 1]) { if (pcol->offset[i] < pcol->offset[i + 1]) {
@ -213,16 +213,16 @@ void SparsePageDMatrix::InitColAccess(const std::vector<bool>& enabled,
auto make_next_col = [&] (SparsePage* dptr) { auto make_next_col = [&] (SparsePage* dptr) {
tmp.Clear(); tmp.Clear();
size_t btop = buffered_rowset_.size(); size_t btop = buffered_rowset_.Size();
while (true) { while (true) {
if (batch_ptr != batch_top) { if (batch_ptr != batch_top) {
const RowBatch& batch = iter->Value(); const RowBatch& batch = iter->Value();
CHECK_EQ(batch_top, batch.size); CHECK_EQ(batch_top, batch.size);
for (size_t i = batch_ptr; i < batch_top; ++i) { for (size_t i = batch_ptr; i < batch_top; ++i) {
bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i); auto ridx = static_cast<bst_uint>(batch.base_rowid + i);
if (pkeep == 1.0f || coin_flip(rnd)) { if (pkeep == 1.0f || coin_flip(rnd)) {
buffered_rowset_.push_back(ridx); buffered_rowset_.PushBack(ridx);
tmp.Push(batch[i]); tmp.Push(batch[i]);
} }
@ -263,7 +263,7 @@ void SparsePageDMatrix::InitColAccess(const std::vector<bool>& enabled,
double tstart = dmlc::GetTime(); double tstart = dmlc::GetTime();
size_t bytes_write = 0; size_t bytes_write = 0;
// print every 4 sec. // print every 4 sec.
const double kStep = 4.0; constexpr double kStep = 4.0;
size_t tick_expected = kStep; size_t tick_expected = kStep;
while (make_next_col(page.get())) { while (make_next_col(page.get())) {

View File

@ -10,6 +10,7 @@
#include <xgboost/base.h> #include <xgboost/base.h>
#include <xgboost/data.h> #include <xgboost/data.h>
#include <dmlc/threadediter.h> #include <dmlc/threadediter.h>
#include <utility>
#include <vector> #include <vector>
#include <algorithm> #include <algorithm>
#include <string> #include <string>
@ -22,15 +23,15 @@ namespace data {
class SparsePageDMatrix : public DMatrix { class SparsePageDMatrix : public DMatrix {
public: public:
explicit SparsePageDMatrix(std::unique_ptr<DataSource>&& source, explicit SparsePageDMatrix(std::unique_ptr<DataSource>&& source,
const std::string& cache_info) std::string cache_info)
: source_(std::move(source)), cache_info_(cache_info) { : source_(std::move(source)), cache_info_(std::move(cache_info)) {
} }
MetaInfo& info() override { MetaInfo& Info() override {
return source_->info; return source_->info;
} }
const MetaInfo& info() const override { const MetaInfo& Info() const override {
return source_->info; return source_->info;
} }
@ -41,10 +42,10 @@ class SparsePageDMatrix : public DMatrix {
} }
bool HaveColAccess(bool sorted) const override { bool HaveColAccess(bool sorted) const override {
return col_iter_.get() != nullptr && col_iter_->sorted == sorted; return col_iter_ != nullptr && col_iter_->sorted == sorted;
} }
const RowSet& buffered_rowset() const override { const RowSet& BufferedRowset() const override {
return buffered_rowset_; return buffered_rowset_;
} }
@ -53,8 +54,8 @@ class SparsePageDMatrix : public DMatrix {
} }
float GetColDensity(size_t cidx) const override { float GetColDensity(size_t cidx) const override {
size_t nmiss = buffered_rowset_.size() - col_size_[cidx]; size_t nmiss = buffered_rowset_.Size() - col_size_[cidx];
return 1.0f - (static_cast<float>(nmiss)) / buffered_rowset_.size(); return 1.0f - (static_cast<float>(nmiss)) / buffered_rowset_.Size();
} }
bool SingleColBlock() const override { bool SingleColBlock() const override {
@ -79,7 +80,7 @@ class SparsePageDMatrix : public DMatrix {
class ColPageIter : public dmlc::DataIter<ColBatch> { class ColPageIter : public dmlc::DataIter<ColBatch> {
public: public:
explicit ColPageIter(std::vector<std::unique_ptr<dmlc::SeekStream> >&& files); explicit ColPageIter(std::vector<std::unique_ptr<dmlc::SeekStream> >&& files);
virtual ~ColPageIter(); ~ColPageIter() override;
void BeforeFirst() override; void BeforeFirst() override;
const ColBatch &Value() const override { const ColBatch &Value() const override {
return out_; return out_;

View File

@ -34,8 +34,7 @@ class SparsePageRawFormat : public SparsePage::Format {
// setup the offset // setup the offset
page->offset.clear(); page->offset.clear();
page->offset.push_back(0); page->offset.push_back(0);
for (size_t i = 0; i < sorted_index_set.size(); ++i) { for (unsigned int fid : sorted_index_set) {
bst_uint fid = sorted_index_set[i];
CHECK_LT(fid + 1, disk_offset_.size()); CHECK_LT(fid + 1, disk_offset_.size());
size_t size = disk_offset_[fid + 1] - disk_offset_[fid]; size_t size = disk_offset_[fid + 1] - disk_offset_[fid];
page->offset.push_back(page->offset.back() + size); page->offset.push_back(page->offset.back() + size);

View File

@ -89,12 +89,12 @@ bool SparsePageSource::CacheExist(const std::string& cache_info) {
{ {
std::string name_info = cache_shards[0]; std::string name_info = cache_shards[0];
std::unique_ptr<dmlc::Stream> finfo(dmlc::Stream::Create(name_info.c_str(), "r", true)); std::unique_ptr<dmlc::Stream> finfo(dmlc::Stream::Create(name_info.c_str(), "r", true));
if (finfo.get() == nullptr) return false; if (finfo == nullptr) return false;
} }
for (const std::string& prefix : cache_shards) { for (const std::string& prefix : cache_shards) {
std::string name_row = prefix + ".row.page"; std::string name_row = prefix + ".row.page";
std::unique_ptr<dmlc::Stream> frow(dmlc::Stream::Create(name_row.c_str(), "r", true)); std::unique_ptr<dmlc::Stream> frow(dmlc::Stream::Create(name_row.c_str(), "r", true));
if (frow.get() == nullptr) return false; if (frow == nullptr) return false;
} }
return true; return true;
} }
@ -119,22 +119,22 @@ void SparsePageSource::Create(dmlc::Parser<uint32_t>* src,
size_t bytes_write = 0; size_t bytes_write = 0;
double tstart = dmlc::GetTime(); double tstart = dmlc::GetTime();
// print every 4 sec. // print every 4 sec.
const double kStep = 4.0; constexpr double kStep = 4.0;
size_t tick_expected = static_cast<double>(kStep); size_t tick_expected = static_cast<double>(kStep);
while (src->Next()) { while (src->Next()) {
const dmlc::RowBlock<uint32_t>& batch = src->Value(); const dmlc::RowBlock<uint32_t>& batch = src->Value();
if (batch.label != nullptr) { if (batch.label != nullptr) {
info.labels.insert(info.labels.end(), batch.label, batch.label + batch.size); info.labels_.insert(info.labels_.end(), batch.label, batch.label + batch.size);
} }
if (batch.weight != nullptr) { if (batch.weight != nullptr) {
info.weights.insert(info.weights.end(), batch.weight, batch.weight + batch.size); info.weights_.insert(info.weights_.end(), batch.weight, batch.weight + batch.size);
} }
info.num_row += batch.size; info.num_row_ += batch.size;
info.num_nonzero += batch.offset[batch.size] - batch.offset[0]; info.num_nonzero_ += batch.offset[batch.size] - batch.offset[0];
for (size_t i = batch.offset[0]; i < batch.offset[batch.size]; ++i) { for (size_t i = batch.offset[0]; i < batch.offset[batch.size]; ++i) {
uint32_t index = batch.index[i]; uint32_t index = batch.index[i];
info.num_col = std::max(info.num_col, info.num_col_ = std::max(info.num_col_,
static_cast<uint64_t>(index + 1)); static_cast<uint64_t>(index + 1));
} }
page->Push(batch); page->Push(batch);
@ -183,7 +183,7 @@ void SparsePageSource::Create(DMatrix* src,
std::shared_ptr<SparsePage> page; std::shared_ptr<SparsePage> page;
writer.Alloc(&page); page->Clear(); writer.Alloc(&page); page->Clear();
MetaInfo info = src->info(); MetaInfo info = src->Info();
size_t bytes_write = 0; size_t bytes_write = 0;
double tstart = dmlc::GetTime(); double tstart = dmlc::GetTime();
dmlc::DataIter<RowBatch>* iter = src->RowIterator(); dmlc::DataIter<RowBatch>* iter = src->RowIterator();

View File

@ -33,7 +33,7 @@ class SparsePageSource : public DataSource {
*/ */
explicit SparsePageSource(const std::string& cache_prefix) noexcept(false); explicit SparsePageSource(const std::string& cache_prefix) noexcept(false);
/*! \brief destructor */ /*! \brief destructor */
virtual ~SparsePageSource(); ~SparsePageSource() override;
// implement Next // implement Next
bool Next() override; bool Next() override;
// implement BeforeFirst // implement BeforeFirst

View File

@ -34,7 +34,7 @@ SparsePage::Writer::Writer(
fo->Write(format_shard); fo->Write(format_shard);
std::shared_ptr<SparsePage> page; std::shared_ptr<SparsePage> page;
while (wqueue->Pop(&page)) { while (wqueue->Pop(&page)) {
if (page.get() == nullptr) break; if (page == nullptr) break;
fmt->Write(*page, fo.get()); fmt->Write(*page, fo.get());
qrecycle_.Push(std::move(page)); qrecycle_.Push(std::move(page));
} }
@ -61,7 +61,7 @@ void SparsePage::Writer::PushWrite(std::shared_ptr<SparsePage>&& page) {
} }
void SparsePage::Writer::Alloc(std::shared_ptr<SparsePage>* out_page) { void SparsePage::Writer::Alloc(std::shared_ptr<SparsePage>* out_page) {
CHECK(out_page->get() == nullptr); CHECK(*out_page == nullptr);
if (num_free_buffer_ != 0) { if (num_free_buffer_ != 0) {
out_page->reset(new SparsePage()); out_page->reset(new SparsePage());
--num_free_buffer_; --num_free_buffer_;

View File

@ -52,9 +52,9 @@ class GBLinear : public GradientBooster {
explicit GBLinear(const std::vector<std::shared_ptr<DMatrix> > &cache, explicit GBLinear(const std::vector<std::shared_ptr<DMatrix> > &cache,
bst_float base_margin) bst_float base_margin)
: base_margin_(base_margin), : base_margin_(base_margin),
sum_instance_weight(0), sum_instance_weight_(0),
sum_weight_complete(false), sum_weight_complete_(false),
is_converged(false) { is_converged_(false) {
// Add matrices to the prediction cache // Add matrices to the prediction cache
for (auto &d : cache) { for (auto &d : cache) {
PredictionCacheEntry e; PredictionCacheEntry e;
@ -63,46 +63,46 @@ class GBLinear : public GradientBooster {
} }
} }
void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override { void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override {
if (model.weight.size() == 0) { if (model_.weight.size() == 0) {
model.param.InitAllowUnknown(cfg); model_.param.InitAllowUnknown(cfg);
} }
param.InitAllowUnknown(cfg); param_.InitAllowUnknown(cfg);
updater.reset(LinearUpdater::Create(param.updater)); updater_.reset(LinearUpdater::Create(param_.updater));
updater->Init(cfg); updater_->Init(cfg);
monitor.Init("GBLinear ", param.debug_verbose); monitor_.Init("GBLinear ", param_.debug_verbose);
} }
void Load(dmlc::Stream* fi) override { void Load(dmlc::Stream* fi) override {
model.Load(fi); model_.Load(fi);
} }
void Save(dmlc::Stream* fo) const override { void Save(dmlc::Stream* fo) const override {
model.Save(fo); model_.Save(fo);
} }
void DoBoost(DMatrix *p_fmat, void DoBoost(DMatrix *p_fmat,
HostDeviceVector<bst_gpair> *in_gpair, HostDeviceVector<GradientPair> *in_gpair,
ObjFunction* obj) override { ObjFunction* obj) override {
monitor.Start("DoBoost"); monitor_.Start("DoBoost");
if (!p_fmat->HaveColAccess(false)) { if (!p_fmat->HaveColAccess(false)) {
std::vector<bool> enabled(p_fmat->info().num_col, true); std::vector<bool> enabled(p_fmat->Info().num_col_, true);
p_fmat->InitColAccess(enabled, 1.0f, param.max_row_perbatch, false); p_fmat->InitColAccess(enabled, 1.0f, param_.max_row_perbatch, false);
} }
model.LazyInitModel(); model_.LazyInitModel();
this->LazySumWeights(p_fmat); this->LazySumWeights(p_fmat);
if (!this->CheckConvergence()) { if (!this->CheckConvergence()) {
updater->Update(&in_gpair->data_h(), p_fmat, &model, sum_instance_weight); updater_->Update(&in_gpair->HostVector(), p_fmat, &model_, sum_instance_weight_);
} }
this->UpdatePredictionCache(); this->UpdatePredictionCache();
monitor.Stop("DoBoost"); monitor_.Stop("DoBoost");
} }
void PredictBatch(DMatrix *p_fmat, void PredictBatch(DMatrix *p_fmat,
HostDeviceVector<bst_float> *out_preds, HostDeviceVector<bst_float> *out_preds,
unsigned ntree_limit) override { unsigned ntree_limit) override {
monitor.Start("PredictBatch"); monitor_.Start("PredictBatch");
CHECK_EQ(ntree_limit, 0U) CHECK_EQ(ntree_limit, 0U)
<< "GBLinear::Predict ntrees is only valid for gbtree predictor"; << "GBLinear::Predict ntrees is only valid for gbtree predictor";
@ -110,19 +110,19 @@ class GBLinear : public GradientBooster {
auto it = cache_.find(p_fmat); auto it = cache_.find(p_fmat);
if (it != cache_.end() && it->second.predictions.size() != 0) { if (it != cache_.end() && it->second.predictions.size() != 0) {
std::vector<bst_float> &y = it->second.predictions; std::vector<bst_float> &y = it->second.predictions;
out_preds->resize(y.size()); out_preds->Resize(y.size());
std::copy(y.begin(), y.end(), out_preds->data_h().begin()); std::copy(y.begin(), y.end(), out_preds->HostVector().begin());
} else { } else {
this->PredictBatchInternal(p_fmat, &out_preds->data_h()); this->PredictBatchInternal(p_fmat, &out_preds->HostVector());
} }
monitor.Stop("PredictBatch"); monitor_.Stop("PredictBatch");
} }
// add base margin // add base margin
void PredictInstance(const SparseBatch::Inst &inst, void PredictInstance(const SparseBatch::Inst &inst,
std::vector<bst_float> *out_preds, std::vector<bst_float> *out_preds,
unsigned ntree_limit, unsigned ntree_limit,
unsigned root_index) override { unsigned root_index) override {
const int ngroup = model.param.num_output_group; const int ngroup = model_.param.num_output_group;
for (int gid = 0; gid < ngroup; ++gid) { for (int gid = 0; gid < ngroup; ++gid) {
this->Pred(inst, dmlc::BeginPtr(*out_preds), gid, base_margin_); this->Pred(inst, dmlc::BeginPtr(*out_preds), gid, base_margin_);
} }
@ -138,15 +138,15 @@ class GBLinear : public GradientBooster {
std::vector<bst_float>* out_contribs, std::vector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate, int condition = 0, unsigned ntree_limit, bool approximate, int condition = 0,
unsigned condition_feature = 0) override { unsigned condition_feature = 0) override {
model.LazyInitModel(); model_.LazyInitModel();
CHECK_EQ(ntree_limit, 0U) CHECK_EQ(ntree_limit, 0U)
<< "GBLinear::PredictContribution: ntrees is only valid for gbtree predictor"; << "GBLinear::PredictContribution: ntrees is only valid for gbtree predictor";
const std::vector<bst_float>& base_margin = p_fmat->info().base_margin; const std::vector<bst_float>& base_margin = p_fmat->Info().base_margin_;
const int ngroup = model.param.num_output_group; const int ngroup = model_.param.num_output_group;
const size_t ncolumns = model.param.num_feature + 1; const size_t ncolumns = model_.param.num_feature + 1;
// allocate space for (#features + bias) times #groups times #rows // allocate space for (#features + bias) times #groups times #rows
std::vector<bst_float>& contribs = *out_contribs; std::vector<bst_float>& contribs = *out_contribs;
contribs.resize(p_fmat->info().num_row * ncolumns * ngroup); contribs.resize(p_fmat->Info().num_row_ * ncolumns * ngroup);
// make sure contributions is zeroed, we could be reusing a previously allocated one // make sure contributions is zeroed, we could be reusing a previously allocated one
std::fill(contribs.begin(), contribs.end(), 0); std::fill(contribs.begin(), contribs.end(), 0);
// start collecting the contributions // start collecting the contributions
@ -155,21 +155,21 @@ class GBLinear : public GradientBooster {
while (iter->Next()) { while (iter->Next()) {
const RowBatch& batch = iter->Value(); const RowBatch& batch = iter->Value();
// parallel over local batch // parallel over local batch
const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size); const auto nsize = static_cast<bst_omp_uint>(batch.size);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nsize; ++i) { for (bst_omp_uint i = 0; i < nsize; ++i) {
const RowBatch::Inst &inst = batch[i]; const RowBatch::Inst &inst = batch[i];
size_t row_idx = static_cast<size_t>(batch.base_rowid + i); auto row_idx = static_cast<size_t>(batch.base_rowid + i);
// loop over output groups // loop over output groups
for (int gid = 0; gid < ngroup; ++gid) { for (int gid = 0; gid < ngroup; ++gid) {
bst_float *p_contribs = &contribs[(row_idx * ngroup + gid) * ncolumns]; bst_float *p_contribs = &contribs[(row_idx * ngroup + gid) * ncolumns];
// calculate linear terms' contributions // calculate linear terms' contributions
for (bst_uint c = 0; c < inst.length; ++c) { for (bst_uint c = 0; c < inst.length; ++c) {
if (inst[c].index >= model.param.num_feature) continue; if (inst[c].index >= model_.param.num_feature) continue;
p_contribs[inst[c].index] = inst[c].fvalue * model[inst[c].index][gid]; p_contribs[inst[c].index] = inst[c].fvalue * model_[inst[c].index][gid];
} }
// add base margin to BIAS // add base margin to BIAS
p_contribs[ncolumns - 1] = model.bias()[gid] + p_contribs[ncolumns - 1] = model_.bias()[gid] +
((base_margin.size() != 0) ? base_margin[row_idx * ngroup + gid] : base_margin_); ((base_margin.size() != 0) ? base_margin[row_idx * ngroup + gid] : base_margin_);
} }
} }
@ -182,34 +182,34 @@ class GBLinear : public GradientBooster {
std::vector<bst_float>& contribs = *out_contribs; std::vector<bst_float>& contribs = *out_contribs;
// linear models have no interaction effects // linear models have no interaction effects
const size_t nelements = model.param.num_feature*model.param.num_feature; const size_t nelements = model_.param.num_feature*model_.param.num_feature;
contribs.resize(p_fmat->info().num_row * nelements * model.param.num_output_group); contribs.resize(p_fmat->Info().num_row_ * nelements * model_.param.num_output_group);
std::fill(contribs.begin(), contribs.end(), 0); std::fill(contribs.begin(), contribs.end(), 0);
} }
std::vector<std::string> DumpModel(const FeatureMap& fmap, std::vector<std::string> DumpModel(const FeatureMap& fmap,
bool with_stats, bool with_stats,
std::string format) const override { std::string format) const override {
return model.DumpModel(fmap, with_stats, format); return model_.DumpModel(fmap, with_stats, format);
} }
protected: protected:
void PredictBatchInternal(DMatrix *p_fmat, void PredictBatchInternal(DMatrix *p_fmat,
std::vector<bst_float> *out_preds) { std::vector<bst_float> *out_preds) {
monitor.Start("PredictBatchInternal"); monitor_.Start("PredictBatchInternal");
model.LazyInitModel(); model_.LazyInitModel();
std::vector<bst_float> &preds = *out_preds; std::vector<bst_float> &preds = *out_preds;
const std::vector<bst_float>& base_margin = p_fmat->info().base_margin; const std::vector<bst_float>& base_margin = p_fmat->Info().base_margin_;
// start collecting the prediction // start collecting the prediction
dmlc::DataIter<RowBatch> *iter = p_fmat->RowIterator(); dmlc::DataIter<RowBatch> *iter = p_fmat->RowIterator();
const int ngroup = model.param.num_output_group; const int ngroup = model_.param.num_output_group;
preds.resize(p_fmat->info().num_row * ngroup); preds.resize(p_fmat->Info().num_row_ * ngroup);
while (iter->Next()) { while (iter->Next()) {
const RowBatch &batch = iter->Value(); const RowBatch &batch = iter->Value();
// output convention: nrow * k, where nrow is number of rows // output convention: nrow * k, where nrow is number of rows
// k is number of group // k is number of group
// parallel over local batch // parallel over local batch
const omp_ulong nsize = static_cast<omp_ulong>(batch.size); const auto nsize = static_cast<omp_ulong>(batch.size);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (omp_ulong i = 0; i < nsize; ++i) { for (omp_ulong i = 0; i < nsize; ++i) {
const size_t ridx = batch.base_rowid + i; const size_t ridx = batch.base_rowid + i;
@ -221,14 +221,14 @@ class GBLinear : public GradientBooster {
} }
} }
} }
monitor.Stop("PredictBatchInternal"); monitor_.Stop("PredictBatchInternal");
} }
void UpdatePredictionCache() { void UpdatePredictionCache() {
// update cache entry // update cache entry
for (auto &kv : cache_) { for (auto &kv : cache_) {
PredictionCacheEntry &e = kv.second; PredictionCacheEntry &e = kv.second;
if (e.predictions.size() == 0) { if (e.predictions.size() == 0) {
size_t n = model.param.num_output_group * e.data->info().num_row; size_t n = model_.param.num_output_group * e.data->Info().num_row_;
e.predictions.resize(n); e.predictions.resize(n);
} }
this->PredictBatchInternal(e.data.get(), &e.predictions); this->PredictBatchInternal(e.data.get(), &e.predictions);
@ -236,53 +236,53 @@ class GBLinear : public GradientBooster {
} }
bool CheckConvergence() { bool CheckConvergence() {
if (param.tolerance == 0.0f) return false; if (param_.tolerance == 0.0f) return false;
if (is_converged) return true; if (is_converged_) return true;
if (previous_model.weight.size() != model.weight.size()) { if (previous_model_.weight.size() != model_.weight.size()) {
previous_model = model; previous_model_ = model_;
return false; return false;
} }
float largest_dw = 0.0; float largest_dw = 0.0;
for (size_t i = 0; i < model.weight.size(); i++) { for (size_t i = 0; i < model_.weight.size(); i++) {
largest_dw = std::max( largest_dw = std::max(
largest_dw, std::abs(model.weight[i] - previous_model.weight[i])); largest_dw, std::abs(model_.weight[i] - previous_model_.weight[i]));
} }
previous_model = model; previous_model_ = model_;
is_converged = largest_dw <= param.tolerance; is_converged_ = largest_dw <= param_.tolerance;
return is_converged; return is_converged_;
} }
void LazySumWeights(DMatrix *p_fmat) { void LazySumWeights(DMatrix *p_fmat) {
if (!sum_weight_complete) { if (!sum_weight_complete_) {
auto &info = p_fmat->info(); auto &info = p_fmat->Info();
for (size_t i = 0; i < info.num_row; i++) { for (size_t i = 0; i < info.num_row_; i++) {
sum_instance_weight += info.GetWeight(i); sum_instance_weight_ += info.GetWeight(i);
} }
sum_weight_complete = true; sum_weight_complete_ = true;
} }
} }
inline void Pred(const RowBatch::Inst &inst, bst_float *preds, int gid, inline void Pred(const RowBatch::Inst &inst, bst_float *preds, int gid,
bst_float base) { bst_float base) {
bst_float psum = model.bias()[gid] + base; bst_float psum = model_.bias()[gid] + base;
for (bst_uint i = 0; i < inst.length; ++i) { for (bst_uint i = 0; i < inst.length; ++i) {
if (inst[i].index >= model.param.num_feature) continue; if (inst[i].index >= model_.param.num_feature) continue;
psum += inst[i].fvalue * model[inst[i].index][gid]; psum += inst[i].fvalue * model_[inst[i].index][gid];
} }
preds[gid] = psum; preds[gid] = psum;
} }
// biase margin score // biase margin score
bst_float base_margin_; bst_float base_margin_;
// model field // model field
GBLinearModel model; GBLinearModel model_;
GBLinearModel previous_model; GBLinearModel previous_model_;
GBLinearTrainParam param; GBLinearTrainParam param_;
std::unique_ptr<LinearUpdater> updater; std::unique_ptr<LinearUpdater> updater_;
double sum_instance_weight; double sum_instance_weight_;
bool sum_weight_complete; bool sum_weight_complete_;
common::Monitor monitor; common::Monitor monitor_;
bool is_converged; bool is_converged_;
/** /**
* \struct PredictionCacheEntry * \struct PredictionCacheEntry

View File

@ -40,7 +40,7 @@ class GBLinearModel {
// weight for each of feature, bias is the last one // weight for each of feature, bias is the last one
std::vector<bst_float> weight; std::vector<bst_float> weight;
// initialize the model parameter // initialize the model parameter
inline void LazyInitModel(void) { inline void LazyInitModel() {
if (!weight.empty()) return; if (!weight.empty()) return;
// bias is the last weight // bias is the last weight
weight.resize((param.num_feature + 1) * param.num_output_group); weight.resize((param.num_feature + 1) * param.num_output_group);

View File

@ -143,32 +143,32 @@ class GBTree : public GradientBooster {
} }
void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override { void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override {
this->cfg = cfg; this->cfg_ = cfg;
model_.Configure(cfg); model_.Configure(cfg);
// initialize the updaters only when needed. // initialize the updaters only when needed.
std::string updater_seq = tparam.updater_seq; std::string updater_seq = tparam_.updater_seq;
tparam.InitAllowUnknown(cfg); tparam_.InitAllowUnknown(cfg);
if (updater_seq != tparam.updater_seq) updaters.clear(); if (updater_seq != tparam_.updater_seq) updaters_.clear();
for (const auto& up : updaters) { for (const auto& up : updaters_) {
up->Init(cfg); up->Init(cfg);
} }
// for the 'update' process_type, move trees into trees_to_update // for the 'update' process_type, move trees into trees_to_update
if (tparam.process_type == kUpdate) { if (tparam_.process_type == kUpdate) {
model_.InitTreesToUpdate(); model_.InitTreesToUpdate();
} }
// configure predictor // configure predictor
predictor = std::unique_ptr<Predictor>(Predictor::Create(tparam.predictor)); predictor_ = std::unique_ptr<Predictor>(Predictor::Create(tparam_.predictor));
predictor->Init(cfg, cache_); predictor_->Init(cfg, cache_);
monitor.Init("GBTree", tparam.debug_verbose); monitor_.Init("GBTree", tparam_.debug_verbose);
} }
void Load(dmlc::Stream* fi) override { void Load(dmlc::Stream* fi) override {
model_.Load(fi); model_.Load(fi);
this->cfg.clear(); this->cfg_.clear();
this->cfg.push_back(std::make_pair(std::string("num_feature"), this->cfg_.emplace_back(std::string("num_feature"),
common::ToString(model_.param.num_feature))); common::ToString(model_.param.num_feature));
} }
void Save(dmlc::Stream* fo) const override { void Save(dmlc::Stream* fo) const override {
@ -177,29 +177,29 @@ class GBTree : public GradientBooster {
bool AllowLazyCheckPoint() const override { bool AllowLazyCheckPoint() const override {
return model_.param.num_output_group == 1 || return model_.param.num_output_group == 1 ||
tparam.updater_seq.find("distcol") != std::string::npos; tparam_.updater_seq.find("distcol") != std::string::npos;
} }
void DoBoost(DMatrix* p_fmat, void DoBoost(DMatrix* p_fmat,
HostDeviceVector<bst_gpair>* in_gpair, HostDeviceVector<GradientPair>* in_gpair,
ObjFunction* obj) override { ObjFunction* obj) override {
std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees; std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
const int ngroup = model_.param.num_output_group; const int ngroup = model_.param.num_output_group;
monitor.Start("BoostNewTrees"); monitor_.Start("BoostNewTrees");
if (ngroup == 1) { if (ngroup == 1) {
std::vector<std::unique_ptr<RegTree> > ret; std::vector<std::unique_ptr<RegTree> > ret;
BoostNewTrees(in_gpair, p_fmat, 0, &ret); BoostNewTrees(in_gpair, p_fmat, 0, &ret);
new_trees.push_back(std::move(ret)); new_trees.push_back(std::move(ret));
} else { } else {
CHECK_EQ(in_gpair->size() % ngroup, 0U) CHECK_EQ(in_gpair->Size() % ngroup, 0U)
<< "must have exactly ngroup*nrow gpairs"; << "must have exactly ngroup*nrow gpairs";
// TODO(canonizer): perform this on GPU if HostDeviceVector has device set. // TODO(canonizer): perform this on GPU if HostDeviceVector has device set.
HostDeviceVector<bst_gpair> tmp(in_gpair->size() / ngroup, HostDeviceVector<GradientPair> tmp(in_gpair->Size() / ngroup,
bst_gpair(), in_gpair->device()); GradientPair(), in_gpair->DeviceIdx());
std::vector<bst_gpair>& gpair_h = in_gpair->data_h(); std::vector<GradientPair>& gpair_h = in_gpair->HostVector();
bst_omp_uint nsize = static_cast<bst_omp_uint>(tmp.size()); auto nsize = static_cast<bst_omp_uint>(tmp.Size());
for (int gid = 0; gid < ngroup; ++gid) { for (int gid = 0; gid < ngroup; ++gid) {
std::vector<bst_gpair>& tmp_h = tmp.data_h(); std::vector<GradientPair>& tmp_h = tmp.HostVector();
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nsize; ++i) { for (bst_omp_uint i = 0; i < nsize; ++i) {
tmp_h[i] = gpair_h[i * ngroup + gid]; tmp_h[i] = gpair_h[i * ngroup + gid];
@ -209,43 +209,43 @@ class GBTree : public GradientBooster {
new_trees.push_back(std::move(ret)); new_trees.push_back(std::move(ret));
} }
} }
monitor.Stop("BoostNewTrees"); monitor_.Stop("BoostNewTrees");
monitor.Start("CommitModel"); monitor_.Start("CommitModel");
this->CommitModel(std::move(new_trees)); this->CommitModel(std::move(new_trees));
monitor.Stop("CommitModel"); monitor_.Stop("CommitModel");
} }
void PredictBatch(DMatrix* p_fmat, void PredictBatch(DMatrix* p_fmat,
HostDeviceVector<bst_float>* out_preds, HostDeviceVector<bst_float>* out_preds,
unsigned ntree_limit) override { unsigned ntree_limit) override {
predictor->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit); predictor_->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
} }
void PredictInstance(const SparseBatch::Inst& inst, void PredictInstance(const SparseBatch::Inst& inst,
std::vector<bst_float>* out_preds, std::vector<bst_float>* out_preds,
unsigned ntree_limit, unsigned ntree_limit,
unsigned root_index) override { unsigned root_index) override {
predictor->PredictInstance(inst, out_preds, model_, predictor_->PredictInstance(inst, out_preds, model_,
ntree_limit, root_index); ntree_limit, root_index);
} }
void PredictLeaf(DMatrix* p_fmat, void PredictLeaf(DMatrix* p_fmat,
std::vector<bst_float>* out_preds, std::vector<bst_float>* out_preds,
unsigned ntree_limit) override { unsigned ntree_limit) override {
predictor->PredictLeaf(p_fmat, out_preds, model_, ntree_limit); predictor_->PredictLeaf(p_fmat, out_preds, model_, ntree_limit);
} }
void PredictContribution(DMatrix* p_fmat, void PredictContribution(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs, std::vector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate, int condition, unsigned ntree_limit, bool approximate, int condition,
unsigned condition_feature) override { unsigned condition_feature) override {
predictor->PredictContribution(p_fmat, out_contribs, model_, ntree_limit, approximate); predictor_->PredictContribution(p_fmat, out_contribs, model_, ntree_limit, approximate);
} }
void PredictInteractionContributions(DMatrix* p_fmat, void PredictInteractionContributions(DMatrix* p_fmat,
std::vector<bst_float>* out_contribs, std::vector<bst_float>* out_contribs,
unsigned ntree_limit, bool approximate) override { unsigned ntree_limit, bool approximate) override {
predictor->PredictInteractionContributions(p_fmat, out_contribs, model_, predictor_->PredictInteractionContributions(p_fmat, out_contribs, model_,
ntree_limit, approximate); ntree_limit, approximate);
} }
@ -258,18 +258,18 @@ class GBTree : public GradientBooster {
protected: protected:
// initialize updater before using them // initialize updater before using them
inline void InitUpdater() { inline void InitUpdater() {
if (updaters.size() != 0) return; if (updaters_.size() != 0) return;
std::string tval = tparam.updater_seq; std::string tval = tparam_.updater_seq;
std::vector<std::string> ups = common::Split(tval, ','); std::vector<std::string> ups = common::Split(tval, ',');
for (const std::string& pstr : ups) { for (const std::string& pstr : ups) {
std::unique_ptr<TreeUpdater> up(TreeUpdater::Create(pstr.c_str())); std::unique_ptr<TreeUpdater> up(TreeUpdater::Create(pstr.c_str()));
up->Init(this->cfg); up->Init(this->cfg_);
updaters.push_back(std::move(up)); updaters_.push_back(std::move(up));
} }
} }
// do group specific group // do group specific group
inline void BoostNewTrees(HostDeviceVector<bst_gpair>* gpair, inline void BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
DMatrix *p_fmat, DMatrix *p_fmat,
int bst_group, int bst_group,
std::vector<std::unique_ptr<RegTree> >* ret) { std::vector<std::unique_ptr<RegTree> >* ret) {
@ -277,26 +277,27 @@ class GBTree : public GradientBooster {
std::vector<RegTree*> new_trees; std::vector<RegTree*> new_trees;
ret->clear(); ret->clear();
// create the trees // create the trees
for (int i = 0; i < tparam.num_parallel_tree; ++i) { for (int i = 0; i < tparam_.num_parallel_tree; ++i) {
if (tparam.process_type == kDefault) { if (tparam_.process_type == kDefault) {
// create new tree // create new tree
std::unique_ptr<RegTree> ptr(new RegTree()); std::unique_ptr<RegTree> ptr(new RegTree());
ptr->param.InitAllowUnknown(this->cfg); ptr->param.InitAllowUnknown(this->cfg_);
ptr->InitModel(); ptr->InitModel();
new_trees.push_back(ptr.get()); new_trees.push_back(ptr.get());
ret->push_back(std::move(ptr)); ret->push_back(std::move(ptr));
} else if (tparam.process_type == kUpdate) { } else if (tparam_.process_type == kUpdate) {
CHECK_LT(model_.trees.size(), model_.trees_to_update.size()); CHECK_LT(model_.trees.size(), model_.trees_to_update.size());
// move an existing tree from trees_to_update // move an existing tree from trees_to_update
auto t = std::move(model_.trees_to_update[model_.trees.size() + auto t = std::move(model_.trees_to_update[model_.trees.size() +
bst_group * tparam.num_parallel_tree + i]); bst_group * tparam_.num_parallel_tree + i]);
new_trees.push_back(t.get()); new_trees.push_back(t.get());
ret->push_back(std::move(t)); ret->push_back(std::move(t));
} }
} }
// update the trees // update the trees
for (auto& up : updaters) for (auto& up : updaters_) {
up->Update(gpair, p_fmat, new_trees); up->Update(gpair, p_fmat, new_trees);
}
} }
// commit new trees all at once // commit new trees all at once
@ -307,22 +308,22 @@ class GBTree : public GradientBooster {
num_new_trees += new_trees[gid].size(); num_new_trees += new_trees[gid].size();
model_.CommitModel(std::move(new_trees[gid]), gid); model_.CommitModel(std::move(new_trees[gid]), gid);
} }
predictor->UpdatePredictionCache(model_, &updaters, num_new_trees); predictor_->UpdatePredictionCache(model_, &updaters_, num_new_trees);
} }
// --- data structure --- // --- data structure ---
GBTreeModel model_; GBTreeModel model_;
// training parameter // training parameter
GBTreeTrainParam tparam; GBTreeTrainParam tparam_;
// ----training fields---- // ----training fields----
// configurations for tree // configurations for tree
std::vector<std::pair<std::string, std::string> > cfg; std::vector<std::pair<std::string, std::string> > cfg_;
// the updaters that can be applied to each of tree // the updaters that can be applied to each of tree
std::vector<std::unique_ptr<TreeUpdater>> updaters; std::vector<std::unique_ptr<TreeUpdater>> updaters_;
// Cached matrices // Cached matrices
std::vector<std::shared_ptr<DMatrix>> cache_; std::vector<std::shared_ptr<DMatrix>> cache_;
std::unique_ptr<Predictor> predictor; std::unique_ptr<Predictor> predictor_;
common::Monitor monitor; common::Monitor monitor_;
}; };
// dart // dart
@ -333,22 +334,22 @@ class Dart : public GBTree {
void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override { void Configure(const std::vector<std::pair<std::string, std::string> >& cfg) override {
GBTree::Configure(cfg); GBTree::Configure(cfg);
if (model_.trees.size() == 0) { if (model_.trees.size() == 0) {
dparam.InitAllowUnknown(cfg); dparam_.InitAllowUnknown(cfg);
} }
} }
void Load(dmlc::Stream* fi) override { void Load(dmlc::Stream* fi) override {
GBTree::Load(fi); GBTree::Load(fi);
weight_drop.resize(model_.param.num_trees); weight_drop_.resize(model_.param.num_trees);
if (model_.param.num_trees != 0) { if (model_.param.num_trees != 0) {
fi->Read(&weight_drop); fi->Read(&weight_drop_);
} }
} }
void Save(dmlc::Stream* fo) const override { void Save(dmlc::Stream* fo) const override {
GBTree::Save(fo); GBTree::Save(fo);
if (weight_drop.size() != 0) { if (weight_drop_.size() != 0) {
fo->Write(weight_drop); fo->Write(weight_drop_);
} }
} }
@ -357,7 +358,7 @@ class Dart : public GBTree {
HostDeviceVector<bst_float>* out_preds, HostDeviceVector<bst_float>* out_preds,
unsigned ntree_limit) override { unsigned ntree_limit) override {
DropTrees(ntree_limit); DropTrees(ntree_limit);
PredLoopInternal<Dart>(p_fmat, &out_preds->data_h(), 0, ntree_limit, true); PredLoopInternal<Dart>(p_fmat, &out_preds->HostVector(), 0, ntree_limit, true);
} }
void PredictInstance(const SparseBatch::Inst& inst, void PredictInstance(const SparseBatch::Inst& inst,
@ -365,9 +366,9 @@ class Dart : public GBTree {
unsigned ntree_limit, unsigned ntree_limit,
unsigned root_index) override { unsigned root_index) override {
DropTrees(1); DropTrees(1);
if (thread_temp.size() == 0) { if (thread_temp_.size() == 0) {
thread_temp.resize(1, RegTree::FVec()); thread_temp_.resize(1, RegTree::FVec());
thread_temp[0].Init(model_.param.num_feature); thread_temp_[0].Init(model_.param.num_feature);
} }
out_preds->resize(model_.param.num_output_group); out_preds->resize(model_.param.num_output_group);
ntree_limit *= model_.param.num_output_group; ntree_limit *= model_.param.num_output_group;
@ -378,7 +379,7 @@ class Dart : public GBTree {
for (int gid = 0; gid < model_.param.num_output_group; ++gid) { for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
(*out_preds)[gid] (*out_preds)[gid]
= PredValue(inst, gid, root_index, = PredValue(inst, gid, root_index,
&thread_temp[0], 0, ntree_limit) + model_.base_margin; &thread_temp_[0], 0, ntree_limit) + model_.base_margin;
} }
} }
@ -400,8 +401,8 @@ class Dart : public GBTree {
} }
if (init_out_preds) { if (init_out_preds) {
size_t n = num_group * p_fmat->info().num_row; size_t n = num_group * p_fmat->Info().num_row_;
const std::vector<bst_float>& base_margin = p_fmat->info().base_margin; const std::vector<bst_float>& base_margin = p_fmat->Info().base_margin_;
out_preds->resize(n); out_preds->resize(n);
if (base_margin.size() != 0) { if (base_margin.size() != 0) {
CHECK_EQ(out_preds->size(), n); CHECK_EQ(out_preds->size(), n);
@ -427,37 +428,37 @@ class Dart : public GBTree {
int num_group, int num_group,
unsigned tree_begin, unsigned tree_begin,
unsigned tree_end) { unsigned tree_end) {
const MetaInfo& info = p_fmat->info(); const MetaInfo& info = p_fmat->Info();
const int nthread = omp_get_max_threads(); const int nthread = omp_get_max_threads();
CHECK_EQ(num_group, model_.param.num_output_group); CHECK_EQ(num_group, model_.param.num_output_group);
InitThreadTemp(nthread); InitThreadTemp(nthread);
std::vector<bst_float>& preds = *out_preds; std::vector<bst_float>& preds = *out_preds;
CHECK_EQ(model_.param.size_leaf_vector, 0) CHECK_EQ(model_.param.size_leaf_vector, 0)
<< "size_leaf_vector is enforced to 0 so far"; << "size_leaf_vector is enforced to 0 so far";
CHECK_EQ(preds.size(), p_fmat->info().num_row * num_group); CHECK_EQ(preds.size(), p_fmat->Info().num_row_ * num_group);
// start collecting the prediction // start collecting the prediction
dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator(); dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator();
Derived* self = static_cast<Derived*>(this); auto* self = static_cast<Derived*>(this);
iter->BeforeFirst(); iter->BeforeFirst();
while (iter->Next()) { while (iter->Next()) {
const RowBatch &batch = iter->Value(); const RowBatch &batch = iter->Value();
// parallel over local batch // parallel over local batch
const int K = 8; constexpr int kUnroll = 8;
const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size); const auto nsize = static_cast<bst_omp_uint>(batch.size);
const bst_omp_uint rest = nsize % K; const bst_omp_uint rest = nsize % kUnroll;
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nsize - rest; i += K) { for (bst_omp_uint i = 0; i < nsize - rest; i += kUnroll) {
const int tid = omp_get_thread_num(); const int tid = omp_get_thread_num();
RegTree::FVec& feats = thread_temp[tid]; RegTree::FVec& feats = thread_temp_[tid];
int64_t ridx[K]; int64_t ridx[kUnroll];
RowBatch::Inst inst[K]; RowBatch::Inst inst[kUnroll];
for (int k = 0; k < K; ++k) { for (int k = 0; k < kUnroll; ++k) {
ridx[k] = static_cast<int64_t>(batch.base_rowid + i + k); ridx[k] = static_cast<int64_t>(batch.base_rowid + i + k);
} }
for (int k = 0; k < K; ++k) { for (int k = 0; k < kUnroll; ++k) {
inst[k] = batch[i + k]; inst[k] = batch[i + k];
} }
for (int k = 0; k < K; ++k) { for (int k = 0; k < kUnroll; ++k) {
for (int gid = 0; gid < num_group; ++gid) { for (int gid = 0; gid < num_group; ++gid) {
const size_t offset = ridx[k] * num_group + gid; const size_t offset = ridx[k] * num_group + gid;
preds[offset] += preds[offset] +=
@ -467,8 +468,8 @@ class Dart : public GBTree {
} }
} }
for (bst_omp_uint i = nsize - rest; i < nsize; ++i) { for (bst_omp_uint i = nsize - rest; i < nsize; ++i) {
RegTree::FVec& feats = thread_temp[0]; RegTree::FVec& feats = thread_temp_[0];
const int64_t ridx = static_cast<int64_t>(batch.base_rowid + i); const auto ridx = static_cast<int64_t>(batch.base_rowid + i);
const RowBatch::Inst inst = batch[i]; const RowBatch::Inst inst = batch[i];
for (int gid = 0; gid < num_group; ++gid) { for (int gid = 0; gid < num_group; ++gid) {
const size_t offset = ridx * num_group + gid; const size_t offset = ridx * num_group + gid;
@ -489,9 +490,9 @@ class Dart : public GBTree {
model_.CommitModel(std::move(new_trees[gid]), gid); model_.CommitModel(std::move(new_trees[gid]), gid);
} }
size_t num_drop = NormalizeTrees(num_new_trees); size_t num_drop = NormalizeTrees(num_new_trees);
if (dparam.silent != 1) { if (dparam_.silent != 1) {
LOG(INFO) << "drop " << num_drop << " trees, " LOG(INFO) << "drop " << num_drop << " trees, "
<< "weight = " << weight_drop.back(); << "weight = " << weight_drop_.back();
} }
} }
@ -506,10 +507,10 @@ class Dart : public GBTree {
p_feats->Fill(inst); p_feats->Fill(inst);
for (size_t i = tree_begin; i < tree_end; ++i) { for (size_t i = tree_begin; i < tree_end; ++i) {
if (model_.tree_info[i] == bst_group) { if (model_.tree_info[i] == bst_group) {
bool drop = (std::binary_search(idx_drop.begin(), idx_drop.end(), i)); bool drop = (std::binary_search(idx_drop_.begin(), idx_drop_.end(), i));
if (!drop) { if (!drop) {
int tid = model_.trees[i]->GetLeafIndex(*p_feats, root_index); int tid = model_.trees[i]->GetLeafIndex(*p_feats, root_index);
psum += weight_drop[i] * (*model_.trees[i])[tid].leaf_value(); psum += weight_drop_[i] * (*model_.trees[i])[tid].LeafValue();
} }
} }
} }
@ -519,45 +520,45 @@ class Dart : public GBTree {
// select which trees to drop // select which trees to drop
inline void DropTrees(unsigned ntree_limit_drop) { inline void DropTrees(unsigned ntree_limit_drop) {
idx_drop.clear(); idx_drop_.clear();
if (ntree_limit_drop > 0) return; if (ntree_limit_drop > 0) return;
std::uniform_real_distribution<> runif(0.0, 1.0); std::uniform_real_distribution<> runif(0.0, 1.0);
auto& rnd = common::GlobalRandom(); auto& rnd = common::GlobalRandom();
bool skip = false; bool skip = false;
if (dparam.skip_drop > 0.0) skip = (runif(rnd) < dparam.skip_drop); if (dparam_.skip_drop > 0.0) skip = (runif(rnd) < dparam_.skip_drop);
// sample some trees to drop // sample some trees to drop
if (!skip) { if (!skip) {
if (dparam.sample_type == 1) { if (dparam_.sample_type == 1) {
bst_float sum_weight = 0.0; bst_float sum_weight = 0.0;
for (size_t i = 0; i < weight_drop.size(); ++i) { for (auto elem : weight_drop_) {
sum_weight += weight_drop[i]; sum_weight += elem;
} }
for (size_t i = 0; i < weight_drop.size(); ++i) { for (size_t i = 0; i < weight_drop_.size(); ++i) {
if (runif(rnd) < dparam.rate_drop * weight_drop.size() * weight_drop[i] / sum_weight) { if (runif(rnd) < dparam_.rate_drop * weight_drop_.size() * weight_drop_[i] / sum_weight) {
idx_drop.push_back(i); idx_drop_.push_back(i);
} }
} }
if (dparam.one_drop && idx_drop.empty() && !weight_drop.empty()) { if (dparam_.one_drop && idx_drop_.empty() && !weight_drop_.empty()) {
// the expression below is an ugly but MSVC2013-friendly equivalent of // the expression below is an ugly but MSVC2013-friendly equivalent of
// size_t i = std::discrete_distribution<size_t>(weight_drop.begin(), // size_t i = std::discrete_distribution<size_t>(weight_drop.begin(),
// weight_drop.end())(rnd); // weight_drop.end())(rnd);
size_t i = std::discrete_distribution<size_t>( size_t i = std::discrete_distribution<size_t>(
weight_drop.size(), 0., static_cast<double>(weight_drop.size()), weight_drop_.size(), 0., static_cast<double>(weight_drop_.size()),
[this](double x) -> double { [this](double x) -> double {
return weight_drop[static_cast<size_t>(x)]; return weight_drop_[static_cast<size_t>(x)];
})(rnd); })(rnd);
idx_drop.push_back(i); idx_drop_.push_back(i);
} }
} else { } else {
for (size_t i = 0; i < weight_drop.size(); ++i) { for (size_t i = 0; i < weight_drop_.size(); ++i) {
if (runif(rnd) < dparam.rate_drop) { if (runif(rnd) < dparam_.rate_drop) {
idx_drop.push_back(i); idx_drop_.push_back(i);
} }
} }
if (dparam.one_drop && idx_drop.empty() && !weight_drop.empty()) { if (dparam_.one_drop && idx_drop_.empty() && !weight_drop_.empty()) {
size_t i = std::uniform_int_distribution<size_t>(0, weight_drop.size() - 1)(rnd); size_t i = std::uniform_int_distribution<size_t>(0, weight_drop_.size() - 1)(rnd);
idx_drop.push_back(i); idx_drop_.push_back(i);
} }
} }
} }
@ -565,58 +566,58 @@ class Dart : public GBTree {
// set normalization factors // set normalization factors
inline size_t NormalizeTrees(size_t size_new_trees) { inline size_t NormalizeTrees(size_t size_new_trees) {
float lr = 1.0 * dparam.learning_rate / size_new_trees; float lr = 1.0 * dparam_.learning_rate / size_new_trees;
size_t num_drop = idx_drop.size(); size_t num_drop = idx_drop_.size();
if (num_drop == 0) { if (num_drop == 0) {
for (size_t i = 0; i < size_new_trees; ++i) { for (size_t i = 0; i < size_new_trees; ++i) {
weight_drop.push_back(1.0); weight_drop_.push_back(1.0);
} }
} else { } else {
if (dparam.normalize_type == 1) { if (dparam_.normalize_type == 1) {
// normalize_type 1 // normalize_type 1
float factor = 1.0 / (1.0 + lr); float factor = 1.0 / (1.0 + lr);
for (size_t i = 0; i < idx_drop.size(); ++i) { for (auto i : idx_drop_) {
weight_drop[idx_drop[i]] *= factor; weight_drop_[i] *= factor;
} }
for (size_t i = 0; i < size_new_trees; ++i) { for (size_t i = 0; i < size_new_trees; ++i) {
weight_drop.push_back(factor); weight_drop_.push_back(factor);
} }
} else { } else {
// normalize_type 0 // normalize_type 0
float factor = 1.0 * num_drop / (num_drop + lr); float factor = 1.0 * num_drop / (num_drop + lr);
for (size_t i = 0; i < idx_drop.size(); ++i) { for (auto i : idx_drop_) {
weight_drop[idx_drop[i]] *= factor; weight_drop_[i] *= factor;
} }
for (size_t i = 0; i < size_new_trees; ++i) { for (size_t i = 0; i < size_new_trees; ++i) {
weight_drop.push_back(1.0 / (num_drop + lr)); weight_drop_.push_back(1.0 / (num_drop + lr));
} }
} }
} }
// reset // reset
idx_drop.clear(); idx_drop_.clear();
return num_drop; return num_drop;
} }
// init thread buffers // init thread buffers
inline void InitThreadTemp(int nthread) { inline void InitThreadTemp(int nthread) {
int prev_thread_temp_size = thread_temp.size(); int prev_thread_temp_size = thread_temp_.size();
if (prev_thread_temp_size < nthread) { if (prev_thread_temp_size < nthread) {
thread_temp.resize(nthread, RegTree::FVec()); thread_temp_.resize(nthread, RegTree::FVec());
for (int i = prev_thread_temp_size; i < nthread; ++i) { for (int i = prev_thread_temp_size; i < nthread; ++i) {
thread_temp[i].Init(model_.param.num_feature); thread_temp_[i].Init(model_.param.num_feature);
} }
} }
} }
// --- data structure --- // --- data structure ---
// training parameter // training parameter
DartTrainParam dparam; DartTrainParam dparam_;
/*! \brief prediction buffer */ /*! \brief prediction buffer */
std::vector<bst_float> weight_drop; std::vector<bst_float> weight_drop_;
// indexes of dropped trees // indexes of dropped trees
std::vector<size_t> idx_drop; std::vector<size_t> idx_drop_;
// temporal storage for per thread // temporal storage for per thread
std::vector<RegTree::FVec> thread_temp; std::vector<RegTree::FVec> thread_temp_;
}; };
// register the objective functions // register the objective functions
@ -627,7 +628,7 @@ DMLC_REGISTER_PARAMETER(DartTrainParam);
XGBOOST_REGISTER_GBM(GBTree, "gbtree") XGBOOST_REGISTER_GBM(GBTree, "gbtree")
.describe("Tree booster, gradient boosted trees.") .describe("Tree booster, gradient boosted trees.")
.set_body([](const std::vector<std::shared_ptr<DMatrix> >& cached_mats, bst_float base_margin) { .set_body([](const std::vector<std::shared_ptr<DMatrix> >& cached_mats, bst_float base_margin) {
GBTree* p = new GBTree(base_margin); auto* p = new GBTree(base_margin);
p->InitCache(cached_mats); p->InitCache(cached_mats);
return p; return p;
}); });

View File

@ -70,8 +70,8 @@ struct GBTreeModel {
void InitTreesToUpdate() { void InitTreesToUpdate() {
if (trees_to_update.size() == 0u) { if (trees_to_update.size() == 0u) {
for (size_t i = 0; i < trees.size(); ++i) { for (auto & tree : trees) {
trees_to_update.push_back(std::move(trees[i])); trees_to_update.push_back(std::move(tree));
} }
trees.clear(); trees.clear();
param.num_trees = 0; param.num_trees = 0;
@ -100,8 +100,8 @@ struct GBTreeModel {
void Save(dmlc::Stream* fo) const { void Save(dmlc::Stream* fo) const {
CHECK_EQ(param.num_trees, static_cast<int>(trees.size())); CHECK_EQ(param.num_trees, static_cast<int>(trees.size()));
fo->Write(&param, sizeof(param)); fo->Write(&param, sizeof(param));
for (size_t i = 0; i < trees.size(); ++i) { for (const auto & tree : trees) {
trees[i]->Save(fo); tree->Save(fo);
} }
if (tree_info.size() != 0) { if (tree_info.size() != 0) {
fo->Write(dmlc::BeginPtr(tree_info), sizeof(int) * tree_info.size()); fo->Write(dmlc::BeginPtr(tree_info), sizeof(int) * tree_info.size());
@ -111,15 +111,15 @@ struct GBTreeModel {
std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats, std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,
std::string format) const { std::string format) const {
std::vector<std::string> dump; std::vector<std::string> dump;
for (size_t i = 0; i < trees.size(); i++) { for (const auto & tree : trees) {
dump.push_back(trees[i]->DumpModel(fmap, with_stats, format)); dump.push_back(tree->DumpModel(fmap, with_stats, format));
} }
return dump; return dump;
} }
void CommitModel(std::vector<std::unique_ptr<RegTree> >&& new_trees, void CommitModel(std::vector<std::unique_ptr<RegTree> >&& new_trees,
int bst_group) { int bst_group) {
for (size_t i = 0; i < new_trees.size(); ++i) { for (auto & new_tree : new_trees) {
trees.push_back(std::move(new_trees[i])); trees.push_back(std::move(new_tree));
tree_info.push_back(bst_group); tree_info.push_back(bst_group);
} }
param.num_trees += static_cast<int>(new_trees.size()); param.num_trees += static_cast<int>(new_trees.size());

View File

@ -141,8 +141,8 @@ DMLC_REGISTER_PARAMETER(LearnerTrainParam);
*/ */
class LearnerImpl : public Learner { class LearnerImpl : public Learner {
public: public:
explicit LearnerImpl(const std::vector<std::shared_ptr<DMatrix> >& cache) explicit LearnerImpl(std::vector<std::shared_ptr<DMatrix> > cache)
: cache_(cache) { : cache_(std::move(cache)) {
// boosted tree // boosted tree
name_obj_ = "reg:linear"; name_obj_ = "reg:linear";
name_gbm_ = "gbtree"; name_gbm_ = "gbtree";
@ -155,25 +155,25 @@ class LearnerImpl : public Learner {
} }
void ConfigureUpdaters() { void ConfigureUpdaters() {
if (tparam.tree_method == 0 || tparam.tree_method == 1 || if (tparam_.tree_method == 0 || tparam_.tree_method == 1 ||
tparam.tree_method == 2) { tparam_.tree_method == 2) {
if (cfg_.count("updater") == 0) { if (cfg_.count("updater") == 0) {
if (tparam.dsplit == 1) { if (tparam_.dsplit == 1) {
cfg_["updater"] = "distcol"; cfg_["updater"] = "distcol";
} else if (tparam.dsplit == 2) { } else if (tparam_.dsplit == 2) {
cfg_["updater"] = "grow_histmaker,prune"; cfg_["updater"] = "grow_histmaker,prune";
} }
if (tparam.prob_buffer_row != 1.0f) { if (tparam_.prob_buffer_row != 1.0f) {
cfg_["updater"] = "grow_histmaker,refresh,prune"; cfg_["updater"] = "grow_histmaker,refresh,prune";
} }
} }
} else if (tparam.tree_method == 3) { } else if (tparam_.tree_method == 3) {
/* histogram-based algorithm */ /* histogram-based algorithm */
LOG(CONSOLE) << "Tree method is selected to be \'hist\', which uses a " LOG(CONSOLE) << "Tree method is selected to be \'hist\', which uses a "
"single updater " "single updater "
<< "grow_fast_histmaker."; << "grow_fast_histmaker.";
cfg_["updater"] = "grow_fast_histmaker"; cfg_["updater"] = "grow_fast_histmaker";
} else if (tparam.tree_method == 4) { } else if (tparam_.tree_method == 4) {
this->AssertGPUSupport(); this->AssertGPUSupport();
if (cfg_.count("updater") == 0) { if (cfg_.count("updater") == 0) {
cfg_["updater"] = "grow_gpu,prune"; cfg_["updater"] = "grow_gpu,prune";
@ -181,7 +181,7 @@ class LearnerImpl : public Learner {
if (cfg_.count("predictor") == 0) { if (cfg_.count("predictor") == 0) {
cfg_["predictor"] = "gpu_predictor"; cfg_["predictor"] = "gpu_predictor";
} }
} else if (tparam.tree_method == 5) { } else if (tparam_.tree_method == 5) {
this->AssertGPUSupport(); this->AssertGPUSupport();
if (cfg_.count("updater") == 0) { if (cfg_.count("updater") == 0) {
cfg_["updater"] = "grow_gpu_hist"; cfg_["updater"] = "grow_gpu_hist";
@ -195,8 +195,8 @@ class LearnerImpl : public Learner {
void Configure( void Configure(
const std::vector<std::pair<std::string, std::string> >& args) override { const std::vector<std::pair<std::string, std::string> >& args) override {
// add to configurations // add to configurations
tparam.InitAllowUnknown(args); tparam_.InitAllowUnknown(args);
monitor.Init("Learner", tparam.debug_verbose); monitor_.Init("Learner", tparam_.debug_verbose);
cfg_.clear(); cfg_.clear();
for (const auto& kv : args) { for (const auto& kv : args) {
if (kv.first == "eval_metric") { if (kv.first == "eval_metric") {
@ -206,20 +206,20 @@ class LearnerImpl : public Learner {
}; };
if (std::all_of(metrics_.begin(), metrics_.end(), dup_check)) { if (std::all_of(metrics_.begin(), metrics_.end(), dup_check)) {
metrics_.emplace_back(Metric::Create(kv.second)); metrics_.emplace_back(Metric::Create(kv.second));
mparam.contain_eval_metrics = 1; mparam_.contain_eval_metrics = 1;
} }
} else { } else {
cfg_[kv.first] = kv.second; cfg_[kv.first] = kv.second;
} }
} }
if (tparam.nthread != 0) { if (tparam_.nthread != 0) {
omp_set_num_threads(tparam.nthread); omp_set_num_threads(tparam_.nthread);
} }
// add additional parameters // add additional parameters
// These are cosntraints that need to be satisfied. // These are cosntraints that need to be satisfied.
if (tparam.dsplit == 0 && rabit::IsDistributed()) { if (tparam_.dsplit == 0 && rabit::IsDistributed()) {
tparam.dsplit = 2; tparam_.dsplit = 2;
} }
if (cfg_.count("num_class") != 0) { if (cfg_.count("num_class") != 0) {
@ -244,21 +244,21 @@ class LearnerImpl : public Learner {
} }
if (!this->ModelInitialized()) { if (!this->ModelInitialized()) {
mparam.InitAllowUnknown(args); mparam_.InitAllowUnknown(args);
name_obj_ = cfg_["objective"]; name_obj_ = cfg_["objective"];
name_gbm_ = cfg_["booster"]; name_gbm_ = cfg_["booster"];
// set seed only before the model is initialized // set seed only before the model is initialized
common::GlobalRandom().seed(tparam.seed); common::GlobalRandom().seed(tparam_.seed);
} }
// set number of features correctly. // set number of features correctly.
cfg_["num_feature"] = common::ToString(mparam.num_feature); cfg_["num_feature"] = common::ToString(mparam_.num_feature);
cfg_["num_class"] = common::ToString(mparam.num_class); cfg_["num_class"] = common::ToString(mparam_.num_class);
if (gbm_.get() != nullptr) { if (gbm_ != nullptr) {
gbm_->Configure(cfg_.begin(), cfg_.end()); gbm_->Configure(cfg_.begin(), cfg_.end());
} }
if (obj_.get() != nullptr) { if (obj_ != nullptr) {
obj_->Configure(cfg_.begin(), cfg_.end()); obj_->Configure(cfg_.begin(), cfg_.end());
} }
} }
@ -281,7 +281,7 @@ class LearnerImpl : public Learner {
// use the peekable reader. // use the peekable reader.
fi = &fp; fi = &fp;
// read parameter // read parameter
CHECK_EQ(fi->Read(&mparam, sizeof(mparam)), sizeof(mparam)) CHECK_EQ(fi->Read(&mparam_, sizeof(mparam_)), sizeof(mparam_))
<< "BoostLearner: wrong model format"; << "BoostLearner: wrong model format";
{ {
// backward compatibility code for compatible with old model type // backward compatibility code for compatible with old model type
@ -303,9 +303,9 @@ class LearnerImpl : public Learner {
CHECK(fi->Read(&name_gbm_)) << "BoostLearner: wrong model format"; CHECK(fi->Read(&name_gbm_)) << "BoostLearner: wrong model format";
// duplicated code with LazyInitModel // duplicated code with LazyInitModel
obj_.reset(ObjFunction::Create(name_obj_)); obj_.reset(ObjFunction::Create(name_obj_));
gbm_.reset(GradientBooster::Create(name_gbm_, cache_, mparam.base_score)); gbm_.reset(GradientBooster::Create(name_gbm_, cache_, mparam_.base_score));
gbm_->Load(fi); gbm_->Load(fi);
if (mparam.contain_extra_attrs != 0) { if (mparam_.contain_extra_attrs != 0) {
std::vector<std::pair<std::string, std::string> > attr; std::vector<std::pair<std::string, std::string> > attr;
fi->Read(&attr); fi->Read(&attr);
attributes_ = attributes_ =
@ -316,35 +316,35 @@ class LearnerImpl : public Learner {
fi->Read(&max_delta_step); fi->Read(&max_delta_step);
cfg_["max_delta_step"] = max_delta_step; cfg_["max_delta_step"] = max_delta_step;
} }
if (mparam.contain_eval_metrics != 0) { if (mparam_.contain_eval_metrics != 0) {
std::vector<std::string> metr; std::vector<std::string> metr;
fi->Read(&metr); fi->Read(&metr);
for (auto name : metr) { for (auto name : metr) {
metrics_.emplace_back(Metric::Create(name)); metrics_.emplace_back(Metric::Create(name));
} }
} }
cfg_["num_class"] = common::ToString(mparam.num_class); cfg_["num_class"] = common::ToString(mparam_.num_class);
cfg_["num_feature"] = common::ToString(mparam.num_feature); cfg_["num_feature"] = common::ToString(mparam_.num_feature);
obj_->Configure(cfg_.begin(), cfg_.end()); obj_->Configure(cfg_.begin(), cfg_.end());
} }
// rabit save model to rabit checkpoint // rabit save model to rabit checkpoint
void Save(dmlc::Stream* fo) const override { void Save(dmlc::Stream* fo) const override {
fo->Write(&mparam, sizeof(LearnerModelParam)); fo->Write(&mparam_, sizeof(LearnerModelParam));
fo->Write(name_obj_); fo->Write(name_obj_);
fo->Write(name_gbm_); fo->Write(name_gbm_);
gbm_->Save(fo); gbm_->Save(fo);
if (mparam.contain_extra_attrs != 0) { if (mparam_.contain_extra_attrs != 0) {
std::vector<std::pair<std::string, std::string> > attr( std::vector<std::pair<std::string, std::string> > attr(
attributes_.begin(), attributes_.end()); attributes_.begin(), attributes_.end());
fo->Write(attr); fo->Write(attr);
} }
if (name_obj_ == "count:poisson") { if (name_obj_ == "count:poisson") {
std::map<std::string, std::string>::const_iterator it = auto it =
cfg_.find("max_delta_step"); cfg_.find("max_delta_step");
if (it != cfg_.end()) fo->Write(it->second); if (it != cfg_.end()) fo->Write(it->second);
} }
if (mparam.contain_eval_metrics != 0) { if (mparam_.contain_eval_metrics != 0) {
std::vector<std::string> metr; std::vector<std::string> metr;
for (auto& ev : metrics_) { for (auto& ev : metrics_) {
metr.emplace_back(ev->Name()); metr.emplace_back(ev->Name());
@ -354,37 +354,37 @@ class LearnerImpl : public Learner {
} }
void UpdateOneIter(int iter, DMatrix* train) override { void UpdateOneIter(int iter, DMatrix* train) override {
monitor.Start("UpdateOneIter"); monitor_.Start("UpdateOneIter");
CHECK(ModelInitialized()) CHECK(ModelInitialized())
<< "Always call InitModel or LoadModel before update"; << "Always call InitModel or LoadModel before update";
if (tparam.seed_per_iteration || rabit::IsDistributed()) { if (tparam_.seed_per_iteration || rabit::IsDistributed()) {
common::GlobalRandom().seed(tparam.seed * kRandSeedMagic + iter); common::GlobalRandom().seed(tparam_.seed * kRandSeedMagic + iter);
} }
this->LazyInitDMatrix(train); this->LazyInitDMatrix(train);
monitor.Start("PredictRaw"); monitor_.Start("PredictRaw");
this->PredictRaw(train, &preds_); this->PredictRaw(train, &preds_);
monitor.Stop("PredictRaw"); monitor_.Stop("PredictRaw");
monitor.Start("GetGradient"); monitor_.Start("GetGradient");
obj_->GetGradient(&preds_, train->info(), iter, &gpair_); obj_->GetGradient(&preds_, train->Info(), iter, &gpair_);
monitor.Stop("GetGradient"); monitor_.Stop("GetGradient");
gbm_->DoBoost(train, &gpair_, obj_.get()); gbm_->DoBoost(train, &gpair_, obj_.get());
monitor.Stop("UpdateOneIter"); monitor_.Stop("UpdateOneIter");
} }
void BoostOneIter(int iter, DMatrix* train, void BoostOneIter(int iter, DMatrix* train,
HostDeviceVector<bst_gpair>* in_gpair) override { HostDeviceVector<GradientPair>* in_gpair) override {
monitor.Start("BoostOneIter"); monitor_.Start("BoostOneIter");
if (tparam.seed_per_iteration || rabit::IsDistributed()) { if (tparam_.seed_per_iteration || rabit::IsDistributed()) {
common::GlobalRandom().seed(tparam.seed * kRandSeedMagic + iter); common::GlobalRandom().seed(tparam_.seed * kRandSeedMagic + iter);
} }
this->LazyInitDMatrix(train); this->LazyInitDMatrix(train);
gbm_->DoBoost(train, in_gpair); gbm_->DoBoost(train, in_gpair);
monitor.Stop("BoostOneIter"); monitor_.Stop("BoostOneIter");
} }
std::string EvalOneIter(int iter, const std::vector<DMatrix*>& data_sets, std::string EvalOneIter(int iter, const std::vector<DMatrix*>& data_sets,
const std::vector<std::string>& data_names) override { const std::vector<std::string>& data_names) override {
monitor.Start("EvalOneIter"); monitor_.Start("EvalOneIter");
std::ostringstream os; std::ostringstream os;
os << '[' << iter << ']' << std::setiosflags(std::ios::fixed); os << '[' << iter << ']' << std::setiosflags(std::ios::fixed);
if (metrics_.size() == 0) { if (metrics_.size() == 0) {
@ -395,17 +395,17 @@ class LearnerImpl : public Learner {
obj_->EvalTransform(&preds_); obj_->EvalTransform(&preds_);
for (auto& ev : metrics_) { for (auto& ev : metrics_) {
os << '\t' << data_names[i] << '-' << ev->Name() << ':' os << '\t' << data_names[i] << '-' << ev->Name() << ':'
<< ev->Eval(preds_.data_h(), data_sets[i]->info(), tparam.dsplit == 2); << ev->Eval(preds_.HostVector(), data_sets[i]->Info(), tparam_.dsplit == 2);
} }
} }
monitor.Stop("EvalOneIter"); monitor_.Stop("EvalOneIter");
return os.str(); return os.str();
} }
void SetAttr(const std::string& key, const std::string& value) override { void SetAttr(const std::string& key, const std::string& value) override {
attributes_[key] = value; attributes_[key] = value;
mparam.contain_extra_attrs = 1; mparam_.contain_extra_attrs = 1;
} }
bool GetAttr(const std::string& key, std::string* out) const override { bool GetAttr(const std::string& key, std::string* out) const override {
@ -438,7 +438,7 @@ class LearnerImpl : public Learner {
this->PredictRaw(data, &preds_); this->PredictRaw(data, &preds_);
obj_->EvalTransform(&preds_); obj_->EvalTransform(&preds_);
return std::make_pair(metric, return std::make_pair(metric,
ev->Eval(preds_.data_h(), data->info(), tparam.dsplit == 2)); ev->Eval(preds_.HostVector(), data->Info(), tparam_.dsplit == 2));
} }
void Predict(DMatrix* data, bool output_margin, void Predict(DMatrix* data, bool output_margin,
@ -446,12 +446,12 @@ class LearnerImpl : public Learner {
bool pred_leaf, bool pred_contribs, bool approx_contribs, bool pred_leaf, bool pred_contribs, bool approx_contribs,
bool pred_interactions) const override { bool pred_interactions) const override {
if (pred_contribs) { if (pred_contribs) {
gbm_->PredictContribution(data, &out_preds->data_h(), ntree_limit, approx_contribs); gbm_->PredictContribution(data, &out_preds->HostVector(), ntree_limit, approx_contribs);
} else if (pred_interactions) { } else if (pred_interactions) {
gbm_->PredictInteractionContributions(data, &out_preds->data_h(), ntree_limit, gbm_->PredictInteractionContributions(data, &out_preds->HostVector(), ntree_limit,
approx_contribs); approx_contribs);
} else if (pred_leaf) { } else if (pred_leaf) {
gbm_->PredictLeaf(data, &out_preds->data_h(), ntree_limit); gbm_->PredictLeaf(data, &out_preds->HostVector(), ntree_limit);
} else { } else {
this->PredictRaw(data, out_preds, ntree_limit); this->PredictRaw(data, out_preds, ntree_limit);
if (!output_margin) { if (!output_margin) {
@ -464,21 +464,21 @@ class LearnerImpl : public Learner {
// check if p_train is ready to used by training. // check if p_train is ready to used by training.
// if not, initialize the column access. // if not, initialize the column access.
inline void LazyInitDMatrix(DMatrix* p_train) { inline void LazyInitDMatrix(DMatrix* p_train) {
if (tparam.tree_method == 3 || tparam.tree_method == 4 || if (tparam_.tree_method == 3 || tparam_.tree_method == 4 ||
tparam.tree_method == 5 || name_gbm_ == "gblinear") { tparam_.tree_method == 5 || name_gbm_ == "gblinear") {
return; return;
} }
monitor.Start("LazyInitDMatrix"); monitor_.Start("LazyInitDMatrix");
if (!p_train->HaveColAccess(true)) { if (!p_train->HaveColAccess(true)) {
int ncol = static_cast<int>(p_train->info().num_col); auto ncol = static_cast<int>(p_train->Info().num_col_);
std::vector<bool> enabled(ncol, true); std::vector<bool> enabled(ncol, true);
// set max row per batch to limited value // set max row per batch to limited value
// in distributed mode, use safe choice otherwise // in distributed mode, use safe choice otherwise
size_t max_row_perbatch = tparam.max_row_perbatch; size_t max_row_perbatch = tparam_.max_row_perbatch;
const size_t safe_max_row = static_cast<size_t>(32ul << 10ul); const auto safe_max_row = static_cast<size_t>(32ul << 10ul);
if (tparam.tree_method == 0 && p_train->info().num_row >= (4UL << 20UL)) { if (tparam_.tree_method == 0 && p_train->Info().num_row_ >= (4UL << 20UL)) {
LOG(CONSOLE) LOG(CONSOLE)
<< "Tree method is automatically selected to be \'approx\'" << "Tree method is automatically selected to be \'approx\'"
<< " for faster speed." << " for faster speed."
@ -487,57 +487,57 @@ class LearnerImpl : public Learner {
max_row_perbatch = std::min(max_row_perbatch, safe_max_row); max_row_perbatch = std::min(max_row_perbatch, safe_max_row);
} }
if (tparam.tree_method == 1) { if (tparam_.tree_method == 1) {
LOG(CONSOLE) << "Tree method is selected to be \'approx\'"; LOG(CONSOLE) << "Tree method is selected to be \'approx\'";
max_row_perbatch = std::min(max_row_perbatch, safe_max_row); max_row_perbatch = std::min(max_row_perbatch, safe_max_row);
} }
if (tparam.test_flag == "block" || tparam.dsplit == 2) { if (tparam_.test_flag == "block" || tparam_.dsplit == 2) {
max_row_perbatch = std::min(max_row_perbatch, safe_max_row); max_row_perbatch = std::min(max_row_perbatch, safe_max_row);
} }
// initialize column access // initialize column access
p_train->InitColAccess(enabled, tparam.prob_buffer_row, max_row_perbatch, true); p_train->InitColAccess(enabled, tparam_.prob_buffer_row, max_row_perbatch, true);
} }
if (!p_train->SingleColBlock() && cfg_.count("updater") == 0) { if (!p_train->SingleColBlock() && cfg_.count("updater") == 0) {
if (tparam.tree_method == 2) { if (tparam_.tree_method == 2) {
LOG(CONSOLE) << "tree method is set to be 'exact'," LOG(CONSOLE) << "tree method is set to be 'exact',"
<< " but currently we are only able to proceed with " << " but currently we are only able to proceed with "
"approximate algorithm"; "approximate algorithm";
} }
cfg_["updater"] = "grow_histmaker,prune"; cfg_["updater"] = "grow_histmaker,prune";
if (gbm_.get() != nullptr) { if (gbm_ != nullptr) {
gbm_->Configure(cfg_.begin(), cfg_.end()); gbm_->Configure(cfg_.begin(), cfg_.end());
} }
} }
monitor.Stop("LazyInitDMatrix"); monitor_.Stop("LazyInitDMatrix");
} }
// return whether model is already initialized. // return whether model is already initialized.
inline bool ModelInitialized() const { return gbm_.get() != nullptr; } inline bool ModelInitialized() const { return gbm_ != nullptr; }
// lazily initialize the model if it haven't yet been initialized. // lazily initialize the model if it haven't yet been initialized.
inline void LazyInitModel() { inline void LazyInitModel() {
if (this->ModelInitialized()) return; if (this->ModelInitialized()) return;
// estimate feature bound // estimate feature bound
unsigned num_feature = 0; unsigned num_feature = 0;
for (size_t i = 0; i < cache_.size(); ++i) { for (auto & matrix : cache_) {
CHECK(cache_[i] != nullptr); CHECK(matrix != nullptr);
num_feature = std::max(num_feature, num_feature = std::max(num_feature,
static_cast<unsigned>(cache_[i]->info().num_col)); static_cast<unsigned>(matrix->Info().num_col_));
} }
// run allreduce on num_feature to find the maximum value // run allreduce on num_feature to find the maximum value
rabit::Allreduce<rabit::op::Max>(&num_feature, 1); rabit::Allreduce<rabit::op::Max>(&num_feature, 1);
if (num_feature > mparam.num_feature) { if (num_feature > mparam_.num_feature) {
mparam.num_feature = num_feature; mparam_.num_feature = num_feature;
} }
// setup // setup
cfg_["num_feature"] = common::ToString(mparam.num_feature); cfg_["num_feature"] = common::ToString(mparam_.num_feature);
CHECK(obj_.get() == nullptr && gbm_.get() == nullptr); CHECK(obj_ == nullptr && gbm_ == nullptr);
obj_.reset(ObjFunction::Create(name_obj_)); obj_.reset(ObjFunction::Create(name_obj_));
obj_->Configure(cfg_.begin(), cfg_.end()); obj_->Configure(cfg_.begin(), cfg_.end());
// reset the base score // reset the base score
mparam.base_score = obj_->ProbToMargin(mparam.base_score); mparam_.base_score = obj_->ProbToMargin(mparam_.base_score);
gbm_.reset(GradientBooster::Create(name_gbm_, cache_, mparam.base_score)); gbm_.reset(GradientBooster::Create(name_gbm_, cache_, mparam_.base_score));
gbm_->Configure(cfg_.begin(), cfg_.end()); gbm_->Configure(cfg_.begin(), cfg_.end());
} }
/*! /*!
@ -549,15 +549,15 @@ class LearnerImpl : public Learner {
*/ */
inline void PredictRaw(DMatrix* data, HostDeviceVector<bst_float>* out_preds, inline void PredictRaw(DMatrix* data, HostDeviceVector<bst_float>* out_preds,
unsigned ntree_limit = 0) const { unsigned ntree_limit = 0) const {
CHECK(gbm_.get() != nullptr) CHECK(gbm_ != nullptr)
<< "Predict must happen after Load or InitModel"; << "Predict must happen after Load or InitModel";
gbm_->PredictBatch(data, out_preds, ntree_limit); gbm_->PredictBatch(data, out_preds, ntree_limit);
} }
// model parameter // model parameter
LearnerModelParam mparam; LearnerModelParam mparam_;
// training parameter // training parameter
LearnerTrainParam tparam; LearnerTrainParam tparam_;
// configurations // configurations
std::map<std::string, std::string> cfg_; std::map<std::string, std::string> cfg_;
// attributes // attributes
@ -569,7 +569,7 @@ class LearnerImpl : public Learner {
// temporal storages for prediction // temporal storages for prediction
HostDeviceVector<bst_float> preds_; HostDeviceVector<bst_float> preds_;
// gradient pairs // gradient pairs
HostDeviceVector<bst_gpair> gpair_; HostDeviceVector<GradientPair> gpair_;
private: private:
/*! \brief random number transformation seed. */ /*! \brief random number transformation seed. */
@ -577,7 +577,7 @@ class LearnerImpl : public Learner {
// internal cached dmatrix // internal cached dmatrix
std::vector<std::shared_ptr<DMatrix> > cache_; std::vector<std::shared_ptr<DMatrix> > cache_;
common::Monitor monitor; common::Monitor monitor_;
}; };
Learner* Learner::Create( Learner* Learner::Create(

View File

@ -62,14 +62,14 @@ inline double CoordinateDeltaBias(double sum_grad, double sum_hess) {
* \return The gradient and diagonal Hessian entry for a given feature. * \return The gradient and diagonal Hessian entry for a given feature.
*/ */
inline std::pair<double, double> GetGradient(int group_idx, int num_group, int fidx, inline std::pair<double, double> GetGradient(int group_idx, int num_group, int fidx,
const std::vector<bst_gpair> &gpair, const std::vector<GradientPair> &gpair,
DMatrix *p_fmat) { DMatrix *p_fmat) {
double sum_grad = 0.0, sum_hess = 0.0; double sum_grad = 0.0, sum_hess = 0.0;
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator({static_cast<bst_uint>(fidx)}); dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator({static_cast<bst_uint>(fidx)});
while (iter->Next()) { while (iter->Next()) {
const ColBatch &batch = iter->Value(); const ColBatch &batch = iter->Value();
ColBatch::Inst col = batch[0]; ColBatch::Inst col = batch[0];
const bst_omp_uint ndata = static_cast<bst_omp_uint>(col.length); const auto ndata = static_cast<bst_omp_uint>(col.length);
for (bst_omp_uint j = 0; j < ndata; ++j) { for (bst_omp_uint j = 0; j < ndata; ++j) {
const bst_float v = col[j].fvalue; const bst_float v = col[j].fvalue;
auto &p = gpair[col[j].index * num_group + group_idx]; auto &p = gpair[col[j].index * num_group + group_idx];
@ -93,14 +93,14 @@ inline std::pair<double, double> GetGradient(int group_idx, int num_group, int f
* \return The gradient and diagonal Hessian entry for a given feature. * \return The gradient and diagonal Hessian entry for a given feature.
*/ */
inline std::pair<double, double> GetGradientParallel(int group_idx, int num_group, int fidx, inline std::pair<double, double> GetGradientParallel(int group_idx, int num_group, int fidx,
const std::vector<bst_gpair> &gpair, const std::vector<GradientPair> &gpair,
DMatrix *p_fmat) { DMatrix *p_fmat) {
double sum_grad = 0.0, sum_hess = 0.0; double sum_grad = 0.0, sum_hess = 0.0;
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator({static_cast<bst_uint>(fidx)}); dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator({static_cast<bst_uint>(fidx)});
while (iter->Next()) { while (iter->Next()) {
const ColBatch &batch = iter->Value(); const ColBatch &batch = iter->Value();
ColBatch::Inst col = batch[0]; ColBatch::Inst col = batch[0];
const bst_omp_uint ndata = static_cast<bst_omp_uint>(col.length); const auto ndata = static_cast<bst_omp_uint>(col.length);
#pragma omp parallel for schedule(static) reduction(+ : sum_grad, sum_hess) #pragma omp parallel for schedule(static) reduction(+ : sum_grad, sum_hess)
for (bst_omp_uint j = 0; j < ndata; ++j) { for (bst_omp_uint j = 0; j < ndata; ++j) {
const bst_float v = col[j].fvalue; const bst_float v = col[j].fvalue;
@ -124,11 +124,11 @@ inline std::pair<double, double> GetGradientParallel(int group_idx, int num_grou
* \return The gradient and diagonal Hessian entry for the bias. * \return The gradient and diagonal Hessian entry for the bias.
*/ */
inline std::pair<double, double> GetBiasGradientParallel(int group_idx, int num_group, inline std::pair<double, double> GetBiasGradientParallel(int group_idx, int num_group,
const std::vector<bst_gpair> &gpair, const std::vector<GradientPair> &gpair,
DMatrix *p_fmat) { DMatrix *p_fmat) {
const RowSet &rowset = p_fmat->buffered_rowset(); const RowSet &rowset = p_fmat->BufferedRowset();
double sum_grad = 0.0, sum_hess = 0.0; double sum_grad = 0.0, sum_hess = 0.0;
const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size()); const auto ndata = static_cast<bst_omp_uint>(rowset.Size());
#pragma omp parallel for schedule(static) reduction(+ : sum_grad, sum_hess) #pragma omp parallel for schedule(static) reduction(+ : sum_grad, sum_hess)
for (bst_omp_uint i = 0; i < ndata; ++i) { for (bst_omp_uint i = 0; i < ndata; ++i) {
auto &p = gpair[rowset[i] * num_group + group_idx]; auto &p = gpair[rowset[i] * num_group + group_idx];
@ -151,7 +151,7 @@ inline std::pair<double, double> GetBiasGradientParallel(int group_idx, int num_
* \param p_fmat The input feature matrix. * \param p_fmat The input feature matrix.
*/ */
inline void UpdateResidualParallel(int fidx, int group_idx, int num_group, inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
float dw, std::vector<bst_gpair> *in_gpair, float dw, std::vector<GradientPair> *in_gpair,
DMatrix *p_fmat) { DMatrix *p_fmat) {
if (dw == 0.0f) return; if (dw == 0.0f) return;
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator({static_cast<bst_uint>(fidx)}); dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator({static_cast<bst_uint>(fidx)});
@ -159,12 +159,12 @@ inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
const ColBatch &batch = iter->Value(); const ColBatch &batch = iter->Value();
ColBatch::Inst col = batch[0]; ColBatch::Inst col = batch[0];
// update grad value // update grad value
const bst_omp_uint num_row = static_cast<bst_omp_uint>(col.length); const auto num_row = static_cast<bst_omp_uint>(col.length);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < num_row; ++j) { for (bst_omp_uint j = 0; j < num_row; ++j) {
bst_gpair &p = (*in_gpair)[col[j].index * num_group + group_idx]; GradientPair &p = (*in_gpair)[col[j].index * num_group + group_idx];
if (p.GetHess() < 0.0f) continue; if (p.GetHess() < 0.0f) continue;
p += bst_gpair(p.GetHess() * col[j].fvalue * dw, 0); p += GradientPair(p.GetHess() * col[j].fvalue * dw, 0);
} }
} }
} }
@ -179,16 +179,16 @@ inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
* \param p_fmat The input feature matrix. * \param p_fmat The input feature matrix.
*/ */
inline void UpdateBiasResidualParallel(int group_idx, int num_group, float dbias, inline void UpdateBiasResidualParallel(int group_idx, int num_group, float dbias,
std::vector<bst_gpair> *in_gpair, std::vector<GradientPair> *in_gpair,
DMatrix *p_fmat) { DMatrix *p_fmat) {
if (dbias == 0.0f) return; if (dbias == 0.0f) return;
const RowSet &rowset = p_fmat->buffered_rowset(); const RowSet &rowset = p_fmat->BufferedRowset();
const bst_omp_uint ndata = static_cast<bst_omp_uint>(p_fmat->info().num_row); const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < ndata; ++i) { for (bst_omp_uint i = 0; i < ndata; ++i) {
bst_gpair &g = (*in_gpair)[rowset[i] * num_group + group_idx]; GradientPair &g = (*in_gpair)[rowset[i] * num_group + group_idx];
if (g.GetHess() < 0.0f) continue; if (g.GetHess() < 0.0f) continue;
g += bst_gpair(g.GetHess() * dbias, 0); g += GradientPair(g.GetHess() * dbias, 0);
} }
} }
@ -201,7 +201,7 @@ class FeatureSelector {
/*! \brief factory method */ /*! \brief factory method */
static FeatureSelector *Create(int choice); static FeatureSelector *Create(int choice);
/*! \brief virtual destructor */ /*! \brief virtual destructor */
virtual ~FeatureSelector() {} virtual ~FeatureSelector() = default;
/** /**
* \brief Setting up the selector state prior to looping through features. * \brief Setting up the selector state prior to looping through features.
* *
@ -213,7 +213,7 @@ class FeatureSelector {
* \param param A parameter with algorithm-dependent use. * \param param A parameter with algorithm-dependent use.
*/ */
virtual void Setup(const gbm::GBLinearModel &model, virtual void Setup(const gbm::GBLinearModel &model,
const std::vector<bst_gpair> &gpair, const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, DMatrix *p_fmat,
float alpha, float lambda, int param) {} float alpha, float lambda, int param) {}
/** /**
@ -232,7 +232,7 @@ class FeatureSelector {
virtual int NextFeature(int iteration, virtual int NextFeature(int iteration,
const gbm::GBLinearModel &model, const gbm::GBLinearModel &model,
int group_idx, int group_idx,
const std::vector<bst_gpair> &gpair, const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, float alpha, float lambda) = 0; DMatrix *p_fmat, float alpha, float lambda) = 0;
}; };
@ -242,7 +242,7 @@ class FeatureSelector {
class CyclicFeatureSelector : public FeatureSelector { class CyclicFeatureSelector : public FeatureSelector {
public: public:
int NextFeature(int iteration, const gbm::GBLinearModel &model, int NextFeature(int iteration, const gbm::GBLinearModel &model,
int group_idx, const std::vector<bst_gpair> &gpair, int group_idx, const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, float alpha, float lambda) override { DMatrix *p_fmat, float alpha, float lambda) override {
return iteration % model.param.num_feature; return iteration % model.param.num_feature;
} }
@ -255,23 +255,23 @@ class CyclicFeatureSelector : public FeatureSelector {
class ShuffleFeatureSelector : public FeatureSelector { class ShuffleFeatureSelector : public FeatureSelector {
public: public:
void Setup(const gbm::GBLinearModel &model, void Setup(const gbm::GBLinearModel &model,
const std::vector<bst_gpair> &gpair, const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, float alpha, float lambda, int param) override { DMatrix *p_fmat, float alpha, float lambda, int param) override {
if (feat_index.size() == 0) { if (feat_index_.size() == 0) {
feat_index.resize(model.param.num_feature); feat_index_.resize(model.param.num_feature);
std::iota(feat_index.begin(), feat_index.end(), 0); std::iota(feat_index_.begin(), feat_index_.end(), 0);
} }
std::shuffle(feat_index.begin(), feat_index.end(), common::GlobalRandom()); std::shuffle(feat_index_.begin(), feat_index_.end(), common::GlobalRandom());
} }
int NextFeature(int iteration, const gbm::GBLinearModel &model, int NextFeature(int iteration, const gbm::GBLinearModel &model,
int group_idx, const std::vector<bst_gpair> &gpair, int group_idx, const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, float alpha, float lambda) override { DMatrix *p_fmat, float alpha, float lambda) override {
return feat_index[iteration % model.param.num_feature]; return feat_index_[iteration % model.param.num_feature];
} }
protected: protected:
std::vector<bst_uint> feat_index; std::vector<bst_uint> feat_index_;
}; };
/** /**
@ -281,7 +281,7 @@ class ShuffleFeatureSelector : public FeatureSelector {
class RandomFeatureSelector : public FeatureSelector { class RandomFeatureSelector : public FeatureSelector {
public: public:
int NextFeature(int iteration, const gbm::GBLinearModel &model, int NextFeature(int iteration, const gbm::GBLinearModel &model,
int group_idx, const std::vector<bst_gpair> &gpair, int group_idx, const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, float alpha, float lambda) override { DMatrix *p_fmat, float alpha, float lambda) override {
return common::GlobalRandom()() % model.param.num_feature; return common::GlobalRandom()() % model.param.num_feature;
} }
@ -299,32 +299,32 @@ class RandomFeatureSelector : public FeatureSelector {
class GreedyFeatureSelector : public FeatureSelector { class GreedyFeatureSelector : public FeatureSelector {
public: public:
void Setup(const gbm::GBLinearModel &model, void Setup(const gbm::GBLinearModel &model,
const std::vector<bst_gpair> &gpair, const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, float alpha, float lambda, int param) override { DMatrix *p_fmat, float alpha, float lambda, int param) override {
top_k = static_cast<bst_uint>(param); top_k_ = static_cast<bst_uint>(param);
const bst_uint ngroup = model.param.num_output_group; const bst_uint ngroup = model.param.num_output_group;
if (param <= 0) top_k = std::numeric_limits<bst_uint>::max(); if (param <= 0) top_k_ = std::numeric_limits<bst_uint>::max();
if (counter.size() == 0) { if (counter_.size() == 0) {
counter.resize(ngroup); counter_.resize(ngroup);
gpair_sums.resize(model.param.num_feature * ngroup); gpair_sums_.resize(model.param.num_feature * ngroup);
} }
for (bst_uint gid = 0u; gid < ngroup; ++gid) { for (bst_uint gid = 0u; gid < ngroup; ++gid) {
counter[gid] = 0u; counter_[gid] = 0u;
} }
} }
int NextFeature(int iteration, const gbm::GBLinearModel &model, int NextFeature(int iteration, const gbm::GBLinearModel &model,
int group_idx, const std::vector<bst_gpair> &gpair, int group_idx, const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, float alpha, float lambda) override { DMatrix *p_fmat, float alpha, float lambda) override {
// k-th selected feature for a group // k-th selected feature for a group
auto k = counter[group_idx]++; auto k = counter_[group_idx]++;
// stop after either reaching top-K or going through all the features in a group // stop after either reaching top-K or going through all the features in a group
if (k >= top_k || counter[group_idx] == model.param.num_feature) return -1; if (k >= top_k_ || counter_[group_idx] == model.param.num_feature) return -1;
const int ngroup = model.param.num_output_group; const int ngroup = model.param.num_output_group;
const bst_omp_uint nfeat = model.param.num_feature; const bst_omp_uint nfeat = model.param.num_feature;
// Calculate univariate gradient sums // Calculate univariate gradient sums
std::fill(gpair_sums.begin(), gpair_sums.end(), std::make_pair(0., 0.)); std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(); dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator();
while (iter->Next()) { while (iter->Next()) {
const ColBatch &batch = iter->Value(); const ColBatch &batch = iter->Value();
@ -332,7 +332,7 @@ class GreedyFeatureSelector : public FeatureSelector {
for (bst_omp_uint i = 0; i < nfeat; ++i) { for (bst_omp_uint i = 0; i < nfeat; ++i) {
const ColBatch::Inst col = batch[i]; const ColBatch::Inst col = batch[i];
const bst_uint ndata = col.length; const bst_uint ndata = col.length;
auto &sums = gpair_sums[group_idx * nfeat + i]; auto &sums = gpair_sums_[group_idx * nfeat + i];
for (bst_uint j = 0u; j < ndata; ++j) { for (bst_uint j = 0u; j < ndata; ++j) {
const bst_float v = col[j].fvalue; const bst_float v = col[j].fvalue;
auto &p = gpair[col[j].index * ngroup + group_idx]; auto &p = gpair[col[j].index * ngroup + group_idx];
@ -346,7 +346,7 @@ class GreedyFeatureSelector : public FeatureSelector {
int best_fidx = 0; int best_fidx = 0;
double best_weight_update = 0.0f; double best_weight_update = 0.0f;
for (bst_omp_uint fidx = 0; fidx < nfeat; ++fidx) { for (bst_omp_uint fidx = 0; fidx < nfeat; ++fidx) {
auto &s = gpair_sums[group_idx * nfeat + fidx]; auto &s = gpair_sums_[group_idx * nfeat + fidx];
float dw = std::abs(static_cast<bst_float>( float dw = std::abs(static_cast<bst_float>(
CoordinateDelta(s.first, s.second, model[fidx][group_idx], alpha, lambda))); CoordinateDelta(s.first, s.second, model[fidx][group_idx], alpha, lambda)));
if (dw > best_weight_update) { if (dw > best_weight_update) {
@ -358,9 +358,9 @@ class GreedyFeatureSelector : public FeatureSelector {
} }
protected: protected:
bst_uint top_k; bst_uint top_k_;
std::vector<bst_uint> counter; std::vector<bst_uint> counter_;
std::vector<std::pair<double, double>> gpair_sums; std::vector<std::pair<double, double>> gpair_sums_;
}; };
/** /**
@ -377,21 +377,21 @@ class GreedyFeatureSelector : public FeatureSelector {
class ThriftyFeatureSelector : public FeatureSelector { class ThriftyFeatureSelector : public FeatureSelector {
public: public:
void Setup(const gbm::GBLinearModel &model, void Setup(const gbm::GBLinearModel &model,
const std::vector<bst_gpair> &gpair, const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, float alpha, float lambda, int param) override { DMatrix *p_fmat, float alpha, float lambda, int param) override {
top_k = static_cast<bst_uint>(param); top_k_ = static_cast<bst_uint>(param);
if (param <= 0) top_k = std::numeric_limits<bst_uint>::max(); if (param <= 0) top_k_ = std::numeric_limits<bst_uint>::max();
const bst_uint ngroup = model.param.num_output_group; const bst_uint ngroup = model.param.num_output_group;
const bst_omp_uint nfeat = model.param.num_feature; const bst_omp_uint nfeat = model.param.num_feature;
if (deltaw.size() == 0) { if (deltaw_.size() == 0) {
deltaw.resize(nfeat * ngroup); deltaw_.resize(nfeat * ngroup);
sorted_idx.resize(nfeat * ngroup); sorted_idx_.resize(nfeat * ngroup);
counter.resize(ngroup); counter_.resize(ngroup);
gpair_sums.resize(nfeat * ngroup); gpair_sums_.resize(nfeat * ngroup);
} }
// Calculate univariate gradient sums // Calculate univariate gradient sums
std::fill(gpair_sums.begin(), gpair_sums.end(), std::make_pair(0., 0.)); std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(); dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator();
while (iter->Next()) { while (iter->Next()) {
const ColBatch &batch = iter->Value(); const ColBatch &batch = iter->Value();
@ -401,7 +401,7 @@ class ThriftyFeatureSelector : public FeatureSelector {
const ColBatch::Inst col = batch[i]; const ColBatch::Inst col = batch[i];
const bst_uint ndata = col.length; const bst_uint ndata = col.length;
for (bst_uint gid = 0u; gid < ngroup; ++gid) { for (bst_uint gid = 0u; gid < ngroup; ++gid) {
auto &sums = gpair_sums[gid * nfeat + i]; auto &sums = gpair_sums_[gid * nfeat + i];
for (bst_uint j = 0u; j < ndata; ++j) { for (bst_uint j = 0u; j < ndata; ++j) {
const bst_float v = col[j].fvalue; const bst_float v = col[j].fvalue;
auto &p = gpair[col[j].index * ngroup + gid]; auto &p = gpair[col[j].index * ngroup + gid];
@ -413,45 +413,45 @@ class ThriftyFeatureSelector : public FeatureSelector {
} }
} }
// rank by descending weight magnitude within the groups // rank by descending weight magnitude within the groups
std::fill(deltaw.begin(), deltaw.end(), 0.f); std::fill(deltaw_.begin(), deltaw_.end(), 0.f);
std::iota(sorted_idx.begin(), sorted_idx.end(), 0); std::iota(sorted_idx_.begin(), sorted_idx_.end(), 0);
bst_float *pdeltaw = &deltaw[0]; bst_float *pdeltaw = &deltaw_[0];
for (bst_uint gid = 0u; gid < ngroup; ++gid) { for (bst_uint gid = 0u; gid < ngroup; ++gid) {
// Calculate univariate weight changes // Calculate univariate weight changes
for (bst_omp_uint i = 0; i < nfeat; ++i) { for (bst_omp_uint i = 0; i < nfeat; ++i) {
auto ii = gid * nfeat + i; auto ii = gid * nfeat + i;
auto &s = gpair_sums[ii]; auto &s = gpair_sums_[ii];
deltaw[ii] = static_cast<bst_float>(CoordinateDelta( deltaw_[ii] = static_cast<bst_float>(CoordinateDelta(
s.first, s.second, model[i][gid], alpha, lambda)); s.first, s.second, model[i][gid], alpha, lambda));
} }
// sort in descending order of deltaw abs values // sort in descending order of deltaw abs values
auto start = sorted_idx.begin() + gid * nfeat; auto start = sorted_idx_.begin() + gid * nfeat;
std::sort(start, start + nfeat, std::sort(start, start + nfeat,
[pdeltaw](size_t i, size_t j) { [pdeltaw](size_t i, size_t j) {
return std::abs(*(pdeltaw + i)) > std::abs(*(pdeltaw + j)); return std::abs(*(pdeltaw + i)) > std::abs(*(pdeltaw + j));
}); });
counter[gid] = 0u; counter_[gid] = 0u;
} }
} }
int NextFeature(int iteration, const gbm::GBLinearModel &model, int NextFeature(int iteration, const gbm::GBLinearModel &model,
int group_idx, const std::vector<bst_gpair> &gpair, int group_idx, const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, float alpha, float lambda) override { DMatrix *p_fmat, float alpha, float lambda) override {
// k-th selected feature for a group // k-th selected feature for a group
auto k = counter[group_idx]++; auto k = counter_[group_idx]++;
// stop after either reaching top-N or going through all the features in a group // stop after either reaching top-N or going through all the features in a group
if (k >= top_k || counter[group_idx] == model.param.num_feature) return -1; if (k >= top_k_ || counter_[group_idx] == model.param.num_feature) return -1;
// note that sorted_idx stores the "long" indices // note that sorted_idx stores the "long" indices
const size_t grp_offset = group_idx * model.param.num_feature; const size_t grp_offset = group_idx * model.param.num_feature;
return static_cast<int>(sorted_idx[grp_offset + k] - grp_offset); return static_cast<int>(sorted_idx_[grp_offset + k] - grp_offset);
} }
protected: protected:
bst_uint top_k; bst_uint top_k_;
std::vector<bst_float> deltaw; std::vector<bst_float> deltaw_;
std::vector<size_t> sorted_idx; std::vector<size_t> sorted_idx_;
std::vector<bst_uint> counter; std::vector<bst_uint> counter_;
std::vector<std::pair<double, double>> gpair_sums; std::vector<std::pair<double, double>> gpair_sums_;
}; };
/** /**

View File

@ -85,7 +85,7 @@ class CoordinateUpdater : public LinearUpdater {
monitor.Init("CoordinateUpdater", param.debug_verbose); monitor.Init("CoordinateUpdater", param.debug_verbose);
} }
void Update(std::vector<bst_gpair> *in_gpair, DMatrix *p_fmat, void Update(std::vector<GradientPair> *in_gpair, DMatrix *p_fmat,
gbm::GBLinearModel *model, double sum_instance_weight) override { gbm::GBLinearModel *model, double sum_instance_weight) override {
param.DenormalizePenalties(sum_instance_weight); param.DenormalizePenalties(sum_instance_weight);
const int ngroup = model->param.num_output_group; const int ngroup = model->param.num_output_group;
@ -111,7 +111,7 @@ class CoordinateUpdater : public LinearUpdater {
} }
} }
inline void UpdateFeature(int fidx, int group_idx, std::vector<bst_gpair> *in_gpair, inline void UpdateFeature(int fidx, int group_idx, std::vector<GradientPair> *in_gpair,
DMatrix *p_fmat, gbm::GBLinearModel *model) { DMatrix *p_fmat, gbm::GBLinearModel *model) {
const int ngroup = model->param.num_output_group; const int ngroup = model->param.num_output_group;
bst_float &w = (*model)[fidx][group_idx]; bst_float &w = (*model)[fidx][group_idx];

View File

@ -58,59 +58,60 @@ class ShotgunUpdater : public LinearUpdater {
public: public:
// set training parameter // set training parameter
void Init(const std::vector<std::pair<std::string, std::string> > &args) override { void Init(const std::vector<std::pair<std::string, std::string> > &args) override {
param.InitAllowUnknown(args); param_.InitAllowUnknown(args);
selector.reset(FeatureSelector::Create(param.feature_selector)); selector_.reset(FeatureSelector::Create(param_.feature_selector));
} }
void Update(std::vector<bst_gpair> *in_gpair, DMatrix *p_fmat, void Update(std::vector<GradientPair> *in_gpair, DMatrix *p_fmat,
gbm::GBLinearModel *model, double sum_instance_weight) override { gbm::GBLinearModel *model, double sum_instance_weight) override {
param.DenormalizePenalties(sum_instance_weight); param_.DenormalizePenalties(sum_instance_weight);
std::vector<bst_gpair> &gpair = *in_gpair; std::vector<GradientPair> &gpair = *in_gpair;
const int ngroup = model->param.num_output_group; const int ngroup = model->param.num_output_group;
// update bias // update bias
for (int gid = 0; gid < ngroup; ++gid) { for (int gid = 0; gid < ngroup; ++gid) {
auto grad = GetBiasGradientParallel(gid, ngroup, *in_gpair, p_fmat); auto grad = GetBiasGradientParallel(gid, ngroup, *in_gpair, p_fmat);
auto dbias = static_cast<bst_float>(param.learning_rate * auto dbias = static_cast<bst_float>(param_.learning_rate *
CoordinateDeltaBias(grad.first, grad.second)); CoordinateDeltaBias(grad.first, grad.second));
model->bias()[gid] += dbias; model->bias()[gid] += dbias;
UpdateBiasResidualParallel(gid, ngroup, dbias, in_gpair, p_fmat); UpdateBiasResidualParallel(gid, ngroup, dbias, in_gpair, p_fmat);
} }
// lock-free parallel updates of weights // lock-free parallel updates of weights
selector->Setup(*model, *in_gpair, p_fmat, param.reg_alpha_denorm, param.reg_lambda_denorm, 0); selector_->Setup(*model, *in_gpair, p_fmat, param_.reg_alpha_denorm,
param_.reg_lambda_denorm, 0);
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(); dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator();
while (iter->Next()) { while (iter->Next()) {
const ColBatch &batch = iter->Value(); const ColBatch &batch = iter->Value();
const bst_omp_uint nfeat = static_cast<bst_omp_uint>(batch.size); const auto nfeat = static_cast<bst_omp_uint>(batch.size);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nfeat; ++i) { for (bst_omp_uint i = 0; i < nfeat; ++i) {
int ii = selector->NextFeature(i, *model, 0, *in_gpair, p_fmat, int ii = selector_->NextFeature(i, *model, 0, *in_gpair, p_fmat,
param.reg_alpha_denorm, param.reg_lambda_denorm); param_.reg_alpha_denorm, param_.reg_lambda_denorm);
if (ii < 0) continue; if (ii < 0) continue;
const bst_uint fid = batch.col_index[ii]; const bst_uint fid = batch.col_index[ii];
ColBatch::Inst col = batch[ii]; ColBatch::Inst col = batch[ii];
for (int gid = 0; gid < ngroup; ++gid) { for (int gid = 0; gid < ngroup; ++gid) {
double sum_grad = 0.0, sum_hess = 0.0; double sum_grad = 0.0, sum_hess = 0.0;
for (bst_uint j = 0; j < col.length; ++j) { for (bst_uint j = 0; j < col.length; ++j) {
bst_gpair &p = gpair[col[j].index * ngroup + gid]; GradientPair &p = gpair[col[j].index * ngroup + gid];
if (p.GetHess() < 0.0f) continue; if (p.GetHess() < 0.0f) continue;
const bst_float v = col[j].fvalue; const bst_float v = col[j].fvalue;
sum_grad += p.GetGrad() * v; sum_grad += p.GetGrad() * v;
sum_hess += p.GetHess() * v * v; sum_hess += p.GetHess() * v * v;
} }
bst_float &w = (*model)[fid][gid]; bst_float &w = (*model)[fid][gid];
bst_float dw = static_cast<bst_float>( auto dw = static_cast<bst_float>(
param.learning_rate * param_.learning_rate *
CoordinateDelta(sum_grad, sum_hess, w, param.reg_alpha_denorm, CoordinateDelta(sum_grad, sum_hess, w, param_.reg_alpha_denorm,
param.reg_lambda_denorm)); param_.reg_lambda_denorm));
if (dw == 0.f) continue; if (dw == 0.f) continue;
w += dw; w += dw;
// update grad values // update grad values
for (bst_uint j = 0; j < col.length; ++j) { for (bst_uint j = 0; j < col.length; ++j) {
bst_gpair &p = gpair[col[j].index * ngroup + gid]; GradientPair &p = gpair[col[j].index * ngroup + gid];
if (p.GetHess() < 0.0f) continue; if (p.GetHess() < 0.0f) continue;
p += bst_gpair(p.GetHess() * col[j].fvalue * dw, 0); p += GradientPair(p.GetHess() * col[j].fvalue * dw, 0);
} }
} }
} }
@ -119,9 +120,9 @@ class ShotgunUpdater : public LinearUpdater {
protected: protected:
// training parameters // training parameters
ShotgunTrainParam param; ShotgunTrainParam param_;
std::unique_ptr<FeatureSelector> selector; std::unique_ptr<FeatureSelector> selector_;
}; };
DMLC_REGISTER_PARAMETER(ShotgunTrainParam); DMLC_REGISTER_PARAMETER(ShotgunTrainParam);

View File

@ -24,16 +24,16 @@ struct EvalEWiseBase : public Metric {
bst_float Eval(const std::vector<bst_float>& preds, bst_float Eval(const std::vector<bst_float>& preds,
const MetaInfo& info, const MetaInfo& info,
bool distributed) const override { bool distributed) const override {
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.size(), info.labels.size()) CHECK_EQ(preds.size(), info.labels_.size())
<< "label and prediction size not match, " << "label and prediction size not match, "
<< "hint: use merror or mlogloss for multi-class classification"; << "hint: use merror or mlogloss for multi-class classification";
const omp_ulong ndata = static_cast<omp_ulong>(info.labels.size()); const auto ndata = static_cast<omp_ulong>(info.labels_.size());
double sum = 0.0, wsum = 0.0; double sum = 0.0, wsum = 0.0;
#pragma omp parallel for reduction(+: sum, wsum) schedule(static) #pragma omp parallel for reduction(+: sum, wsum) schedule(static)
for (omp_ulong i = 0; i < ndata; ++i) { for (omp_ulong i = 0; i < ndata; ++i) {
const bst_float wt = info.GetWeight(i); const bst_float wt = info.GetWeight(i);
sum += static_cast<const Derived*>(this)->EvalRow(info.labels[i], preds[i]) * wt; sum += static_cast<const Derived*>(this)->EvalRow(info.labels_[i], preds[i]) * wt;
wsum += wt; wsum += wt;
} }
double dat[2]; dat[0] = sum, dat[1] = wsum; double dat[2]; dat[0] = sum, dat[1] = wsum;

View File

@ -23,23 +23,23 @@ struct EvalMClassBase : public Metric {
bst_float Eval(const std::vector<bst_float> &preds, bst_float Eval(const std::vector<bst_float> &preds,
const MetaInfo &info, const MetaInfo &info,
bool distributed) const override { bool distributed) const override {
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
CHECK(preds.size() % info.labels.size() == 0) CHECK(preds.size() % info.labels_.size() == 0)
<< "label and prediction size not match"; << "label and prediction size not match";
const size_t nclass = preds.size() / info.labels.size(); const size_t nclass = preds.size() / info.labels_.size();
CHECK_GE(nclass, 1U) CHECK_GE(nclass, 1U)
<< "mlogloss and merror are only used for multi-class classification," << "mlogloss and merror are only used for multi-class classification,"
<< " use logloss for binary classification"; << " use logloss for binary classification";
const bst_omp_uint ndata = static_cast<bst_omp_uint>(info.labels.size()); const auto ndata = static_cast<bst_omp_uint>(info.labels_.size());
double sum = 0.0, wsum = 0.0; double sum = 0.0, wsum = 0.0;
int label_error = 0; int label_error = 0;
#pragma omp parallel for reduction(+: sum, wsum) schedule(static) #pragma omp parallel for reduction(+: sum, wsum) schedule(static)
for (bst_omp_uint i = 0; i < ndata; ++i) { for (bst_omp_uint i = 0; i < ndata; ++i) {
const bst_float wt = info.GetWeight(i); const bst_float wt = info.GetWeight(i);
int label = static_cast<int>(info.labels[i]); auto label = static_cast<int>(info.labels_[i]);
if (label >= 0 && label < static_cast<int>(nclass)) { if (label >= 0 && label < static_cast<int>(nclass)) {
sum += Derived::EvalRow(label, sum += Derived::EvalRow(label,
dmlc::BeginPtr(preds) + i * nclass, preds.data() + i * nclass,
nclass) * wt; nclass) * wt;
wsum += wt; wsum += wt;
} else { } else {
@ -99,7 +99,7 @@ struct EvalMultiLogLoss : public EvalMClassBase<EvalMultiLogLoss> {
const bst_float *pred, const bst_float *pred,
size_t nclass) { size_t nclass) {
const bst_float eps = 1e-16f; const bst_float eps = 1e-16f;
size_t k = static_cast<size_t>(label); auto k = static_cast<size_t>(label);
if (pred[k] > eps) { if (pred[k] > eps) {
return -std::log(pred[k]); return -std::log(pred[k]);
} else { } else {

View File

@ -19,7 +19,7 @@ DMLC_REGISTRY_FILE_TAG(rank_metric);
struct EvalAMS : public Metric { struct EvalAMS : public Metric {
public: public:
explicit EvalAMS(const char* param) { explicit EvalAMS(const char* param) {
CHECK(param != nullptr) CHECK(param != nullptr) // NOLINT
<< "AMS must be in format ams@k"; << "AMS must be in format ams@k";
ratio_ = atof(param); ratio_ = atof(param);
std::ostringstream os; std::ostringstream os;
@ -32,7 +32,7 @@ struct EvalAMS : public Metric {
CHECK(!distributed) << "metric AMS do not support distributed evaluation"; CHECK(!distributed) << "metric AMS do not support distributed evaluation";
using namespace std; // NOLINT(*) using namespace std; // NOLINT(*)
const bst_omp_uint ndata = static_cast<bst_omp_uint>(info.labels.size()); const auto ndata = static_cast<bst_omp_uint>(info.labels_.size());
std::vector<std::pair<bst_float, unsigned> > rec(ndata); std::vector<std::pair<bst_float, unsigned> > rec(ndata);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
@ -40,7 +40,7 @@ struct EvalAMS : public Metric {
rec[i] = std::make_pair(preds[i], i); rec[i] = std::make_pair(preds[i], i);
} }
std::sort(rec.begin(), rec.end(), common::CmpFirst); std::sort(rec.begin(), rec.end(), common::CmpFirst);
unsigned ntop = static_cast<unsigned>(ratio_ * ndata); auto ntop = static_cast<unsigned>(ratio_ * ndata);
if (ntop == 0) ntop = ndata; if (ntop == 0) ntop = ndata;
const double br = 10.0; const double br = 10.0;
unsigned thresindex = 0; unsigned thresindex = 0;
@ -48,7 +48,7 @@ struct EvalAMS : public Metric {
for (unsigned i = 0; i < static_cast<unsigned>(ndata-1) && i < ntop; ++i) { for (unsigned i = 0; i < static_cast<unsigned>(ndata-1) && i < ntop; ++i) {
const unsigned ridx = rec[i].second; const unsigned ridx = rec[i].second;
const bst_float wt = info.GetWeight(ridx); const bst_float wt = info.GetWeight(ridx);
if (info.labels[ridx] > 0.5f) { if (info.labels_[ridx] > 0.5f) {
s_tp += wt; s_tp += wt;
} else { } else {
b_fp += wt; b_fp += wt;
@ -84,16 +84,16 @@ struct EvalAuc : public Metric {
bst_float Eval(const std::vector<bst_float> &preds, bst_float Eval(const std::vector<bst_float> &preds,
const MetaInfo &info, const MetaInfo &info,
bool distributed) const override { bool distributed) const override {
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.size(), info.labels.size()) CHECK_EQ(preds.size(), info.labels_.size())
<< "label size predict size not match"; << "label size predict size not match";
std::vector<unsigned> tgptr(2, 0); std::vector<unsigned> tgptr(2, 0);
tgptr[1] = static_cast<unsigned>(info.labels.size()); tgptr[1] = static_cast<unsigned>(info.labels_.size());
const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr; const std::vector<unsigned> &gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_;
CHECK_EQ(gptr.back(), info.labels.size()) CHECK_EQ(gptr.back(), info.labels_.size())
<< "EvalAuc: group structure must match number of prediction"; << "EvalAuc: group structure must match number of prediction";
const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1); const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
// sum statistics // sum statistics
bst_float sum_auc = 0.0f; bst_float sum_auc = 0.0f;
int auc_error = 0; int auc_error = 0;
@ -102,7 +102,7 @@ struct EvalAuc : public Metric {
for (bst_omp_uint k = 0; k < ngroup; ++k) { for (bst_omp_uint k = 0; k < ngroup; ++k) {
rec.clear(); rec.clear();
for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) { for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
rec.push_back(std::make_pair(preds[j], j)); rec.emplace_back(preds[j], j);
} }
XGBOOST_PARALLEL_SORT(rec.begin(), rec.end(), common::CmpFirst); XGBOOST_PARALLEL_SORT(rec.begin(), rec.end(), common::CmpFirst);
// calculate AUC // calculate AUC
@ -110,7 +110,7 @@ struct EvalAuc : public Metric {
double sum_npos = 0.0, sum_nneg = 0.0, buf_pos = 0.0, buf_neg = 0.0; double sum_npos = 0.0, sum_nneg = 0.0, buf_pos = 0.0, buf_neg = 0.0;
for (size_t j = 0; j < rec.size(); ++j) { for (size_t j = 0; j < rec.size(); ++j) {
const bst_float wt = info.GetWeight(rec[j].second); const bst_float wt = info.GetWeight(rec[j].second);
const bst_float ctr = info.labels[rec[j].second]; const bst_float ctr = info.labels_[rec[j].second];
// keep bucketing predictions in same bucket // keep bucketing predictions in same bucket
if (j != 0 && rec[j].first != rec[j - 1].first) { if (j != 0 && rec[j].first != rec[j - 1].first) {
sum_pospair += buf_neg * (sum_npos + buf_pos *0.5); sum_pospair += buf_neg * (sum_npos + buf_pos *0.5);
@ -156,16 +156,16 @@ struct EvalRankList : public Metric {
bst_float Eval(const std::vector<bst_float> &preds, bst_float Eval(const std::vector<bst_float> &preds,
const MetaInfo &info, const MetaInfo &info,
bool distributed) const override { bool distributed) const override {
CHECK_EQ(preds.size(), info.labels.size()) CHECK_EQ(preds.size(), info.labels_.size())
<< "label size predict size not match"; << "label size predict size not match";
// quick consistency when group is not available // quick consistency when group is not available
std::vector<unsigned> tgptr(2, 0); std::vector<unsigned> tgptr(2, 0);
tgptr[1] = static_cast<unsigned>(preds.size()); tgptr[1] = static_cast<unsigned>(preds.size());
const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr; const std::vector<unsigned> &gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_;
CHECK_NE(gptr.size(), 0U) << "must specify group when constructing rank file"; CHECK_NE(gptr.size(), 0U) << "must specify group when constructing rank file";
CHECK_EQ(gptr.back(), preds.size()) CHECK_EQ(gptr.back(), preds.size())
<< "EvalRanklist: group structure must match number of prediction"; << "EvalRanklist: group structure must match number of prediction";
const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1); const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
// sum statistics // sum statistics
double sum_metric = 0.0f; double sum_metric = 0.0f;
#pragma omp parallel reduction(+:sum_metric) #pragma omp parallel reduction(+:sum_metric)
@ -176,7 +176,7 @@ struct EvalRankList : public Metric {
for (bst_omp_uint k = 0; k < ngroup; ++k) { for (bst_omp_uint k = 0; k < ngroup; ++k) {
rec.clear(); rec.clear();
for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) { for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
rec.push_back(std::make_pair(preds[j], static_cast<int>(info.labels[j]))); rec.emplace_back(preds[j], static_cast<int>(info.labels_[j]));
} }
sum_metric += this->EvalMetric(rec); sum_metric += this->EvalMetric(rec);
} }
@ -230,7 +230,7 @@ struct EvalPrecision : public EvalRankList{
explicit EvalPrecision(const char *name) : EvalRankList("pre", name) {} explicit EvalPrecision(const char *name) : EvalRankList("pre", name) {}
protected: protected:
virtual bst_float EvalMetric(std::vector< std::pair<bst_float, unsigned> > &rec) const { bst_float EvalMetric(std::vector< std::pair<bst_float, unsigned> > &rec) const override {
// calculate Precision // calculate Precision
std::sort(rec.begin(), rec.end(), common::CmpFirst); std::sort(rec.begin(), rec.end(), common::CmpFirst);
unsigned nhit = 0; unsigned nhit = 0;
@ -279,7 +279,7 @@ struct EvalMAP : public EvalRankList {
explicit EvalMAP(const char *name) : EvalRankList("map", name) {} explicit EvalMAP(const char *name) : EvalRankList("map", name) {}
protected: protected:
virtual bst_float EvalMetric(std::vector< std::pair<bst_float, unsigned> > &rec) const { bst_float EvalMetric(std::vector< std::pair<bst_float, unsigned> > &rec) const override {
std::sort(rec.begin(), rec.end(), common::CmpFirst); std::sort(rec.begin(), rec.end(), common::CmpFirst);
unsigned nhits = 0; unsigned nhits = 0;
double sumap = 0.0; double sumap = 0.0;
@ -307,14 +307,14 @@ struct EvalMAP : public EvalRankList {
/*! \brief Cox: Partial likelihood of the Cox proportional hazards model */ /*! \brief Cox: Partial likelihood of the Cox proportional hazards model */
struct EvalCox : public Metric { struct EvalCox : public Metric {
public: public:
EvalCox() {} EvalCox() = default;
bst_float Eval(const std::vector<bst_float> &preds, bst_float Eval(const std::vector<bst_float> &preds,
const MetaInfo &info, const MetaInfo &info,
bool distributed) const override { bool distributed) const override {
CHECK(!distributed) << "Cox metric does not support distributed evaluation"; CHECK(!distributed) << "Cox metric does not support distributed evaluation";
using namespace std; // NOLINT(*) using namespace std; // NOLINT(*)
const bst_omp_uint ndata = static_cast<bst_omp_uint>(info.labels.size()); const auto ndata = static_cast<bst_omp_uint>(info.labels_.size());
const std::vector<size_t> &label_order = info.LabelAbsSort(); const std::vector<size_t> &label_order = info.LabelAbsSort();
// pre-compute a sum for the denominator // pre-compute a sum for the denominator
@ -328,7 +328,7 @@ struct EvalCox : public Metric {
bst_omp_uint num_events = 0; bst_omp_uint num_events = 0;
for (bst_omp_uint i = 0; i < ndata; ++i) { for (bst_omp_uint i = 0; i < ndata; ++i) {
const size_t ind = label_order[i]; const size_t ind = label_order[i];
const auto label = info.labels[ind]; const auto label = info.labels_[ind];
if (label > 0) { if (label > 0) {
out -= log(preds[ind]) - log(exp_p_sum); out -= log(preds[ind]) - log(exp_p_sum);
++num_events; ++num_events;
@ -336,7 +336,7 @@ struct EvalCox : public Metric {
// only update the denominator after we move forward in time (labels are sorted) // only update the denominator after we move forward in time (labels are sorted)
accumulated_sum += preds[ind]; accumulated_sum += preds[ind];
if (i == ndata - 1 || std::abs(label) < std::abs(info.labels[label_order[i + 1]])) { if (i == ndata - 1 || std::abs(label) < std::abs(info.labels_[label_order[i + 1]])) {
exp_p_sum -= accumulated_sum; exp_p_sum -= accumulated_sum;
accumulated_sum = 0; accumulated_sum = 0;
} }
@ -358,16 +358,16 @@ struct EvalAucPR : public Metric {
bst_float Eval(const std::vector<bst_float> &preds, const MetaInfo &info, bst_float Eval(const std::vector<bst_float> &preds, const MetaInfo &info,
bool distributed) const override { bool distributed) const override {
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds.size(), info.labels.size()) CHECK_EQ(preds.size(), info.labels_.size())
<< "label size predict size not match"; << "label size predict size not match";
std::vector<unsigned> tgptr(2, 0); std::vector<unsigned> tgptr(2, 0);
tgptr[1] = static_cast<unsigned>(info.labels.size()); tgptr[1] = static_cast<unsigned>(info.labels_.size());
const std::vector<unsigned> &gptr = const std::vector<unsigned> &gptr =
info.group_ptr.size() == 0 ? tgptr : info.group_ptr; info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_;
CHECK_EQ(gptr.back(), info.labels.size()) CHECK_EQ(gptr.back(), info.labels_.size())
<< "EvalAucPR: group structure must match number of prediction"; << "EvalAucPR: group structure must match number of prediction";
const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1); const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
// sum statistics // sum statistics
double auc = 0.0; double auc = 0.0;
int auc_error = 0, auc_gt_one = 0; int auc_error = 0, auc_gt_one = 0;
@ -378,9 +378,9 @@ struct EvalAucPR : public Metric {
double total_neg = 0.0; double total_neg = 0.0;
rec.clear(); rec.clear();
for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) { for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
total_pos += info.GetWeight(j) * info.labels[j]; total_pos += info.GetWeight(j) * info.labels_[j];
total_neg += info.GetWeight(j) * (1.0f - info.labels[j]); total_neg += info.GetWeight(j) * (1.0f - info.labels_[j]);
rec.push_back(std::make_pair(preds[j], j)); rec.emplace_back(preds[j], j);
} }
XGBOOST_PARALLEL_SORT(rec.begin(), rec.end(), common::CmpFirst); XGBOOST_PARALLEL_SORT(rec.begin(), rec.end(), common::CmpFirst);
// we need pos > 0 && neg > 0 // we need pos > 0 && neg > 0
@ -390,11 +390,10 @@ struct EvalAucPR : public Metric {
// calculate AUC // calculate AUC
double tp = 0.0, prevtp = 0.0, fp = 0.0, prevfp = 0.0, h = 0.0, a = 0.0, b = 0.0; double tp = 0.0, prevtp = 0.0, fp = 0.0, prevfp = 0.0, h = 0.0, a = 0.0, b = 0.0;
for (size_t j = 0; j < rec.size(); ++j) { for (size_t j = 0; j < rec.size(); ++j) {
tp += info.GetWeight(rec[j].second) * info.labels[rec[j].second]; tp += info.GetWeight(rec[j].second) * info.labels_[rec[j].second];
fp += info.GetWeight(rec[j].second) * (1.0f - info.labels[rec[j].second]); fp += info.GetWeight(rec[j].second) * (1.0f - info.labels_[rec[j].second]);
if ((j < rec.size() - 1 && rec[j].first != rec[j + 1].first) || j == rec.size() - 1) { if ((j < rec.size() - 1 && rec[j].first != rec[j + 1].first) || j == rec.size() - 1) {
if (tp == prevtp) { if (tp == prevtp) {
h = 1.0;
a = 1.0; a = 1.0;
b = 0.0; b = 0.0;
} else { } else {

View File

@ -38,15 +38,15 @@ class SoftmaxMultiClassObj : public ObjFunction {
void GetGradient(HostDeviceVector<bst_float>* preds, void GetGradient(HostDeviceVector<bst_float>* preds,
const MetaInfo& info, const MetaInfo& info,
int iter, int iter,
HostDeviceVector<bst_gpair>* out_gpair) override { HostDeviceVector<GradientPair>* out_gpair) override {
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
CHECK(preds->size() == (static_cast<size_t>(param_.num_class) * info.labels.size())) CHECK(preds->Size() == (static_cast<size_t>(param_.num_class) * info.labels_.size()))
<< "SoftmaxMultiClassObj: label size and pred size does not match"; << "SoftmaxMultiClassObj: label size and pred size does not match";
std::vector<bst_float>& preds_h = preds->data_h(); std::vector<bst_float>& preds_h = preds->HostVector();
out_gpair->resize(preds_h.size()); out_gpair->Resize(preds_h.size());
std::vector<bst_gpair>& gpair = out_gpair->data_h(); std::vector<GradientPair>& gpair = out_gpair->HostVector();
const int nclass = param_.num_class; const int nclass = param_.num_class;
const omp_ulong ndata = static_cast<omp_ulong>(preds_h.size() / nclass); const auto ndata = static_cast<omp_ulong>(preds_h.size() / nclass);
int label_error = 0; int label_error = 0;
#pragma omp parallel #pragma omp parallel
@ -58,7 +58,7 @@ class SoftmaxMultiClassObj : public ObjFunction {
rec[k] = preds_h[i * nclass + k]; rec[k] = preds_h[i * nclass + k];
} }
common::Softmax(&rec); common::Softmax(&rec);
int label = static_cast<int>(info.labels[i]); auto label = static_cast<int>(info.labels_[i]);
if (label < 0 || label >= nclass) { if (label < 0 || label >= nclass) {
label_error = label; label = 0; label_error = label; label = 0;
} }
@ -67,9 +67,9 @@ class SoftmaxMultiClassObj : public ObjFunction {
bst_float p = rec[k]; bst_float p = rec[k];
const bst_float h = 2.0f * p * (1.0f - p) * wt; const bst_float h = 2.0f * p * (1.0f - p) * wt;
if (label == k) { if (label == k) {
gpair[i * nclass + k] = bst_gpair((p - 1.0f) * wt, h); gpair[i * nclass + k] = GradientPair((p - 1.0f) * wt, h);
} else { } else {
gpair[i * nclass + k] = bst_gpair(p* wt, h); gpair[i * nclass + k] = GradientPair(p* wt, h);
} }
} }
} }
@ -91,10 +91,10 @@ class SoftmaxMultiClassObj : public ObjFunction {
private: private:
inline void Transform(HostDeviceVector<bst_float> *io_preds, bool prob) { inline void Transform(HostDeviceVector<bst_float> *io_preds, bool prob) {
std::vector<bst_float> &preds = io_preds->data_h(); std::vector<bst_float> &preds = io_preds->HostVector();
std::vector<bst_float> tmp; std::vector<bst_float> tmp;
const int nclass = param_.num_class; const int nclass = param_.num_class;
const omp_ulong ndata = static_cast<omp_ulong>(preds.size() / nclass); const auto ndata = static_cast<omp_ulong>(preds.size() / nclass);
if (!prob) tmp.resize(ndata); if (!prob) tmp.resize(ndata);
#pragma omp parallel #pragma omp parallel

View File

@ -40,17 +40,17 @@ class LambdaRankObj : public ObjFunction {
void GetGradient(HostDeviceVector<bst_float>* preds, void GetGradient(HostDeviceVector<bst_float>* preds,
const MetaInfo& info, const MetaInfo& info,
int iter, int iter,
HostDeviceVector<bst_gpair>* out_gpair) override { HostDeviceVector<GradientPair>* out_gpair) override {
CHECK_EQ(preds->size(), info.labels.size()) << "label size predict size not match"; CHECK_EQ(preds->Size(), info.labels_.size()) << "label size predict size not match";
auto& preds_h = preds->data_h(); auto& preds_h = preds->HostVector();
out_gpair->resize(preds_h.size()); out_gpair->Resize(preds_h.size());
std::vector<bst_gpair>& gpair = out_gpair->data_h(); std::vector<GradientPair>& gpair = out_gpair->HostVector();
// quick consistency when group is not available // quick consistency when group is not available
std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(info.labels.size()); std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(info.labels_.size());
const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr; const std::vector<unsigned> &gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_;
CHECK(gptr.size() != 0 && gptr.back() == info.labels.size()) CHECK(gptr.size() != 0 && gptr.back() == info.labels_.size())
<< "group structure not consistent with #rows"; << "group structure not consistent with #rows";
const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1); const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
#pragma omp parallel #pragma omp parallel
{ {
// parall construct, declare random number generator here, so that each // parall construct, declare random number generator here, so that each
@ -64,8 +64,8 @@ class LambdaRankObj : public ObjFunction {
for (bst_omp_uint k = 0; k < ngroup; ++k) { for (bst_omp_uint k = 0; k < ngroup; ++k) {
lst.clear(); pairs.clear(); lst.clear(); pairs.clear();
for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) { for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) {
lst.push_back(ListEntry(preds_h[j], info.labels[j], j)); lst.emplace_back(preds_h[j], info.labels_[j], j);
gpair[j] = bst_gpair(0.0f, 0.0f); gpair[j] = GradientPair(0.0f, 0.0f);
} }
std::sort(lst.begin(), lst.end(), ListEntry::CmpPred); std::sort(lst.begin(), lst.end(), ListEntry::CmpPred);
rec.resize(lst.size()); rec.resize(lst.size());
@ -85,9 +85,9 @@ class LambdaRankObj : public ObjFunction {
for (unsigned pid = i; pid < j; ++pid) { for (unsigned pid = i; pid < j; ++pid) {
unsigned ridx = std::uniform_int_distribution<unsigned>(0, nleft + nright - 1)(rnd); unsigned ridx = std::uniform_int_distribution<unsigned>(0, nleft + nright - 1)(rnd);
if (ridx < nleft) { if (ridx < nleft) {
pairs.push_back(LambdaPair(rec[ridx].second, rec[pid].second)); pairs.emplace_back(rec[ridx].second, rec[pid].second);
} else { } else {
pairs.push_back(LambdaPair(rec[pid].second, rec[ridx+j-i].second)); pairs.emplace_back(rec[pid].second, rec[ridx+j-i].second);
} }
} }
} }
@ -101,22 +101,22 @@ class LambdaRankObj : public ObjFunction {
if (param_.fix_list_weight != 0.0f) { if (param_.fix_list_weight != 0.0f) {
scale *= param_.fix_list_weight / (gptr[k + 1] - gptr[k]); scale *= param_.fix_list_weight / (gptr[k + 1] - gptr[k]);
} }
for (size_t i = 0; i < pairs.size(); ++i) { for (auto & pair : pairs) {
const ListEntry &pos = lst[pairs[i].pos_index]; const ListEntry &pos = lst[pair.pos_index];
const ListEntry &neg = lst[pairs[i].neg_index]; const ListEntry &neg = lst[pair.neg_index];
const bst_float w = pairs[i].weight * scale; const bst_float w = pair.weight * scale;
const float eps = 1e-16f; const float eps = 1e-16f;
bst_float p = common::Sigmoid(pos.pred - neg.pred); bst_float p = common::Sigmoid(pos.pred - neg.pred);
bst_float g = p - 1.0f; bst_float g = p - 1.0f;
bst_float h = std::max(p * (1.0f - p), eps); bst_float h = std::max(p * (1.0f - p), eps);
// accumulate gradient and hessian in both pid, and nid // accumulate gradient and hessian in both pid, and nid
gpair[pos.rindex] += bst_gpair(g * w, 2.0f*w*h); gpair[pos.rindex] += GradientPair(g * w, 2.0f*w*h);
gpair[neg.rindex] += bst_gpair(-g * w, 2.0f*w*h); gpair[neg.rindex] += GradientPair(-g * w, 2.0f*w*h);
} }
} }
} }
} }
const char* DefaultEvalMetric(void) const override { const char* DefaultEvalMetric() const override {
return "map"; return "map";
} }
@ -177,7 +177,7 @@ class LambdaRankObjNDCG : public LambdaRankObj {
void GetLambdaWeight(const std::vector<ListEntry> &sorted_list, void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
std::vector<LambdaPair> *io_pairs) override { std::vector<LambdaPair> *io_pairs) override {
std::vector<LambdaPair> &pairs = *io_pairs; std::vector<LambdaPair> &pairs = *io_pairs;
float IDCG; float IDCG; // NOLINT
{ {
std::vector<bst_float> labels(sorted_list.size()); std::vector<bst_float> labels(sorted_list.size());
for (size_t i = 0; i < sorted_list.size(); ++i) { for (size_t i = 0; i < sorted_list.size(); ++i) {
@ -187,32 +187,32 @@ class LambdaRankObjNDCG : public LambdaRankObj {
IDCG = CalcDCG(labels); IDCG = CalcDCG(labels);
} }
if (IDCG == 0.0) { if (IDCG == 0.0) {
for (size_t i = 0; i < pairs.size(); ++i) { for (auto & pair : pairs) {
pairs[i].weight = 0.0f; pair.weight = 0.0f;
} }
} else { } else {
IDCG = 1.0f / IDCG; IDCG = 1.0f / IDCG;
for (size_t i = 0; i < pairs.size(); ++i) { for (auto & pair : pairs) {
unsigned pos_idx = pairs[i].pos_index; unsigned pos_idx = pair.pos_index;
unsigned neg_idx = pairs[i].neg_index; unsigned neg_idx = pair.neg_index;
float pos_loginv = 1.0f / std::log2(pos_idx + 2.0f); float pos_loginv = 1.0f / std::log2(pos_idx + 2.0f);
float neg_loginv = 1.0f / std::log2(neg_idx + 2.0f); float neg_loginv = 1.0f / std::log2(neg_idx + 2.0f);
int pos_label = static_cast<int>(sorted_list[pos_idx].label); auto pos_label = static_cast<int>(sorted_list[pos_idx].label);
int neg_label = static_cast<int>(sorted_list[neg_idx].label); auto neg_label = static_cast<int>(sorted_list[neg_idx].label);
bst_float original = bst_float original =
((1 << pos_label) - 1) * pos_loginv + ((1 << neg_label) - 1) * neg_loginv; ((1 << pos_label) - 1) * pos_loginv + ((1 << neg_label) - 1) * neg_loginv;
float changed = float changed =
((1 << neg_label) - 1) * pos_loginv + ((1 << pos_label) - 1) * neg_loginv; ((1 << neg_label) - 1) * pos_loginv + ((1 << pos_label) - 1) * neg_loginv;
bst_float delta = (original - changed) * IDCG; bst_float delta = (original - changed) * IDCG;
if (delta < 0.0f) delta = - delta; if (delta < 0.0f) delta = - delta;
pairs[i].weight = delta; pair.weight = delta;
} }
} }
} }
inline static bst_float CalcDCG(const std::vector<bst_float> &labels) { inline static bst_float CalcDCG(const std::vector<bst_float> &labels) {
double sumdcg = 0.0; double sumdcg = 0.0;
for (size_t i = 0; i < labels.size(); ++i) { for (size_t i = 0; i < labels.size(); ++i) {
const unsigned rel = static_cast<unsigned>(labels[i]); const auto rel = static_cast<unsigned>(labels[i]);
if (rel != 0) { if (rel != 0) {
sumdcg += ((1 << rel) - 1) / std::log2(static_cast<bst_float>(i + 2)); sumdcg += ((1 << rel) - 1) / std::log2(static_cast<bst_float>(i + 2));
} }
@ -238,7 +238,7 @@ class LambdaRankObjMAP : public LambdaRankObj {
float ap_acc_add; float ap_acc_add;
/* \brief the accumulated positive instance count */ /* \brief the accumulated positive instance count */
float hits; float hits;
MAPStats(void) {} MAPStats() = default;
MAPStats(float ap_acc, float ap_acc_miss, float ap_acc_add, float hits) MAPStats(float ap_acc, float ap_acc_miss, float ap_acc_add, float hits)
: ap_acc(ap_acc), ap_acc_miss(ap_acc_miss), ap_acc_add(ap_acc_add), hits(hits) {} : ap_acc(ap_acc), ap_acc_miss(ap_acc_miss), ap_acc_add(ap_acc_add), hits(hits) {}
}; };
@ -300,10 +300,10 @@ class LambdaRankObjMAP : public LambdaRankObj {
std::vector<LambdaPair> &pairs = *io_pairs; std::vector<LambdaPair> &pairs = *io_pairs;
std::vector<MAPStats> map_stats; std::vector<MAPStats> map_stats;
GetMAPStats(sorted_list, &map_stats); GetMAPStats(sorted_list, &map_stats);
for (size_t i = 0; i < pairs.size(); ++i) { for (auto & pair : pairs) {
pairs[i].weight = pair.weight =
GetLambdaMAP(sorted_list, pairs[i].pos_index, GetLambdaMAP(sorted_list, pair.pos_index,
pairs[i].neg_index, &map_stats); pair.neg_index, &map_stats);
} }
} }
}; };

View File

@ -32,26 +32,26 @@ struct RegLossParam : public dmlc::Parameter<RegLossParam> {
template <typename Loss> template <typename Loss>
class RegLossObj : public ObjFunction { class RegLossObj : public ObjFunction {
public: public:
RegLossObj() : labels_checked(false) {} RegLossObj() = default;
void Configure( void Configure(
const std::vector<std::pair<std::string, std::string> > &args) override { const std::vector<std::pair<std::string, std::string> > &args) override {
param_.InitAllowUnknown(args); param_.InitAllowUnknown(args);
} }
void GetGradient(HostDeviceVector<bst_float> *preds, const MetaInfo &info, void GetGradient(HostDeviceVector<bst_float> *preds, const MetaInfo &info,
int iter, HostDeviceVector<bst_gpair> *out_gpair) override { int iter, HostDeviceVector<GradientPair> *out_gpair) override {
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds->size(), info.labels.size()) CHECK_EQ(preds->Size(), info.labels_.size())
<< "labels are not correctly provided" << "labels are not correctly provided"
<< "preds.size=" << preds->size() << "preds.size=" << preds->Size()
<< ", label.size=" << info.labels.size(); << ", label.size=" << info.labels_.size();
auto& preds_h = preds->data_h(); auto& preds_h = preds->HostVector();
this->LazyCheckLabels(info.labels); this->LazyCheckLabels(info.labels_);
out_gpair->resize(preds_h.size()); out_gpair->Resize(preds_h.size());
auto& gpair = out_gpair->data_h(); auto& gpair = out_gpair->HostVector();
const omp_ulong n = static_cast<omp_ulong>(preds_h.size()); const auto n = static_cast<omp_ulong>(preds_h.size());
auto gpair_ptr = out_gpair->ptr_h(); auto gpair_ptr = out_gpair->HostPointer();
avx::Float8 scale(param_.scale_pos_weight); avx::Float8 scale(param_.scale_pos_weight);
const omp_ulong remainder = n % 8; const omp_ulong remainder = n % 8;
@ -59,10 +59,10 @@ class RegLossObj : public ObjFunction {
// Use a maximum of 8 threads // Use a maximum of 8 threads
#pragma omp parallel for schedule(static) num_threads(std::min(8, nthread)) #pragma omp parallel for schedule(static) num_threads(std::min(8, nthread))
for (omp_ulong i = 0; i < n - remainder; i += 8) { for (omp_ulong i = 0; i < n - remainder; i += 8) {
avx::Float8 y(&info.labels[i]); avx::Float8 y(&info.labels_[i]);
avx::Float8 p = Loss::PredTransform(avx::Float8(&preds_h[i])); avx::Float8 p = Loss::PredTransform(avx::Float8(&preds_h[i]));
avx::Float8 w = info.weights.empty() ? avx::Float8(1.0f) avx::Float8 w = info.weights_.empty() ? avx::Float8(1.0f)
: avx::Float8(&info.weights[i]); : avx::Float8(&info.weights_[i]);
// Adjust weight // Adjust weight
w += y * (scale * w - w); w += y * (scale * w - w);
avx::Float8 grad = Loss::FirstOrderGradient(p, y); avx::Float8 grad = Loss::FirstOrderGradient(p, y);
@ -70,11 +70,11 @@ class RegLossObj : public ObjFunction {
avx::StoreGpair(gpair_ptr + i, grad * w, hess * w); avx::StoreGpair(gpair_ptr + i, grad * w, hess * w);
} }
for (omp_ulong i = n - remainder; i < n; ++i) { for (omp_ulong i = n - remainder; i < n; ++i) {
auto y = info.labels[i]; auto y = info.labels_[i];
bst_float p = Loss::PredTransform(preds_h[i]); bst_float p = Loss::PredTransform(preds_h[i]);
bst_float w = info.GetWeight(i); bst_float w = info.GetWeight(i);
w += y * ((param_.scale_pos_weight * w) - w); w += y * ((param_.scale_pos_weight * w) - w);
gpair[i] = bst_gpair(Loss::FirstOrderGradient(p, y) * w, gpair[i] = GradientPair(Loss::FirstOrderGradient(p, y) * w,
Loss::SecondOrderGradient(p, y) * w); Loss::SecondOrderGradient(p, y) * w);
} }
@ -85,8 +85,8 @@ class RegLossObj : public ObjFunction {
return Loss::DefaultEvalMetric(); return Loss::DefaultEvalMetric();
} }
void PredTransform(HostDeviceVector<bst_float> *io_preds) override { void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
std::vector<bst_float> &preds = io_preds->data_h(); std::vector<bst_float> &preds = io_preds->HostVector();
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size()); const auto ndata = static_cast<bst_omp_uint>(preds.size());
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < ndata; ++j) { for (bst_omp_uint j = 0; j < ndata; ++j) {
preds[j] = Loss::PredTransform(preds[j]); preds[j] = Loss::PredTransform(preds[j]);
@ -98,14 +98,14 @@ class RegLossObj : public ObjFunction {
protected: protected:
void LazyCheckLabels(const std::vector<float> &labels) { void LazyCheckLabels(const std::vector<float> &labels) {
if (labels_checked) return; if (labels_checked_) return;
for (auto &y : labels) { for (auto &y : labels) {
CHECK(Loss::CheckLabel(y)) << Loss::LabelErrorMsg(); CHECK(Loss::CheckLabel(y)) << Loss::LabelErrorMsg();
} }
labels_checked = true; labels_checked_ = true;
} }
RegLossParam param_; RegLossParam param_;
bool labels_checked; bool labels_checked_{false};
}; };
// register the objective functions // register the objective functions
@ -148,12 +148,12 @@ class PoissonRegression : public ObjFunction {
void GetGradient(HostDeviceVector<bst_float> *preds, void GetGradient(HostDeviceVector<bst_float> *preds,
const MetaInfo &info, const MetaInfo &info,
int iter, int iter,
HostDeviceVector<bst_gpair> *out_gpair) override { HostDeviceVector<GradientPair> *out_gpair) override {
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds->size(), info.labels.size()) << "labels are not correctly provided"; CHECK_EQ(preds->Size(), info.labels_.size()) << "labels are not correctly provided";
auto& preds_h = preds->data_h(); auto& preds_h = preds->HostVector();
out_gpair->resize(preds->size()); out_gpair->Resize(preds->Size());
auto& gpair = out_gpair->data_h(); auto& gpair = out_gpair->HostVector();
// check if label in range // check if label in range
bool label_correct = true; bool label_correct = true;
// start calculating gradient // start calculating gradient
@ -162,9 +162,9 @@ class PoissonRegression : public ObjFunction {
for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*) for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
bst_float p = preds_h[i]; bst_float p = preds_h[i];
bst_float w = info.GetWeight(i); bst_float w = info.GetWeight(i);
bst_float y = info.labels[i]; bst_float y = info.labels_[i];
if (y >= 0.0f) { if (y >= 0.0f) {
gpair[i] = bst_gpair((std::exp(p) - y) * w, gpair[i] = GradientPair((std::exp(p) - y) * w,
std::exp(p + param_.max_delta_step) * w); std::exp(p + param_.max_delta_step) * w);
} else { } else {
label_correct = false; label_correct = false;
@ -173,7 +173,7 @@ class PoissonRegression : public ObjFunction {
CHECK(label_correct) << "PoissonRegression: label must be nonnegative"; CHECK(label_correct) << "PoissonRegression: label must be nonnegative";
} }
void PredTransform(HostDeviceVector<bst_float> *io_preds) override { void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
std::vector<bst_float> &preds = io_preds->data_h(); std::vector<bst_float> &preds = io_preds->HostVector();
const long ndata = static_cast<long>(preds.size()); // NOLINT(*) const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (long j = 0; j < ndata; ++j) { // NOLINT(*) for (long j = 0; j < ndata; ++j) { // NOLINT(*)
@ -186,7 +186,7 @@ class PoissonRegression : public ObjFunction {
bst_float ProbToMargin(bst_float base_score) const override { bst_float ProbToMargin(bst_float base_score) const override {
return std::log(base_score); return std::log(base_score);
} }
const char* DefaultEvalMetric(void) const override { const char* DefaultEvalMetric() const override {
return "poisson-nloglik"; return "poisson-nloglik";
} }
@ -209,12 +209,12 @@ class CoxRegression : public ObjFunction {
void GetGradient(HostDeviceVector<bst_float> *preds, void GetGradient(HostDeviceVector<bst_float> *preds,
const MetaInfo &info, const MetaInfo &info,
int iter, int iter,
HostDeviceVector<bst_gpair> *out_gpair) override { HostDeviceVector<GradientPair> *out_gpair) override {
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds->size(), info.labels.size()) << "labels are not correctly provided"; CHECK_EQ(preds->Size(), info.labels_.size()) << "labels are not correctly provided";
auto& preds_h = preds->data_h(); auto& preds_h = preds->HostVector();
out_gpair->resize(preds_h.size()); out_gpair->Resize(preds_h.size());
auto& gpair = out_gpair->data_h(); auto& gpair = out_gpair->HostVector();
const std::vector<size_t> &label_order = info.LabelAbsSort(); const std::vector<size_t> &label_order = info.LabelAbsSort();
const omp_ulong ndata = static_cast<omp_ulong>(preds_h.size()); // NOLINT(*) const omp_ulong ndata = static_cast<omp_ulong>(preds_h.size()); // NOLINT(*)
@ -236,7 +236,7 @@ class CoxRegression : public ObjFunction {
const double p = preds_h[ind]; const double p = preds_h[ind];
const double exp_p = std::exp(p); const double exp_p = std::exp(p);
const double w = info.GetWeight(ind); const double w = info.GetWeight(ind);
const double y = info.labels[ind]; const double y = info.labels_[ind];
const double abs_y = std::abs(y); const double abs_y = std::abs(y);
// only update the denominator after we move forward in time (labels are sorted) // only update the denominator after we move forward in time (labels are sorted)
@ -257,14 +257,14 @@ class CoxRegression : public ObjFunction {
const double grad = exp_p*r_k - static_cast<bst_float>(y > 0); const double grad = exp_p*r_k - static_cast<bst_float>(y > 0);
const double hess = exp_p*r_k - exp_p*exp_p * s_k; const double hess = exp_p*r_k - exp_p*exp_p * s_k;
gpair.at(ind) = bst_gpair(grad * w, hess * w); gpair.at(ind) = GradientPair(grad * w, hess * w);
last_abs_y = abs_y; last_abs_y = abs_y;
last_exp_p = exp_p; last_exp_p = exp_p;
} }
} }
void PredTransform(HostDeviceVector<bst_float> *io_preds) override { void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
std::vector<bst_float> &preds = io_preds->data_h(); std::vector<bst_float> &preds = io_preds->HostVector();
const long ndata = static_cast<long>(preds.size()); // NOLINT(*) const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (long j = 0; j < ndata; ++j) { // NOLINT(*) for (long j = 0; j < ndata; ++j) { // NOLINT(*)
@ -277,7 +277,7 @@ class CoxRegression : public ObjFunction {
bst_float ProbToMargin(bst_float base_score) const override { bst_float ProbToMargin(bst_float base_score) const override {
return std::log(base_score); return std::log(base_score);
} }
const char* DefaultEvalMetric(void) const override { const char* DefaultEvalMetric() const override {
return "cox-nloglik"; return "cox-nloglik";
} }
}; };
@ -297,12 +297,12 @@ class GammaRegression : public ObjFunction {
void GetGradient(HostDeviceVector<bst_float> *preds, void GetGradient(HostDeviceVector<bst_float> *preds,
const MetaInfo &info, const MetaInfo &info,
int iter, int iter,
HostDeviceVector<bst_gpair> *out_gpair) override { HostDeviceVector<GradientPair> *out_gpair) override {
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds->size(), info.labels.size()) << "labels are not correctly provided"; CHECK_EQ(preds->Size(), info.labels_.size()) << "labels are not correctly provided";
auto& preds_h = preds->data_h(); auto& preds_h = preds->HostVector();
out_gpair->resize(preds_h.size()); out_gpair->Resize(preds_h.size());
auto& gpair = out_gpair->data_h(); auto& gpair = out_gpair->HostVector();
// check if label in range // check if label in range
bool label_correct = true; bool label_correct = true;
// start calculating gradient // start calculating gradient
@ -311,9 +311,9 @@ class GammaRegression : public ObjFunction {
for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*) for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
bst_float p = preds_h[i]; bst_float p = preds_h[i];
bst_float w = info.GetWeight(i); bst_float w = info.GetWeight(i);
bst_float y = info.labels[i]; bst_float y = info.labels_[i];
if (y >= 0.0f) { if (y >= 0.0f) {
gpair[i] = bst_gpair((1 - y / std::exp(p)) * w, y / std::exp(p) * w); gpair[i] = GradientPair((1 - y / std::exp(p)) * w, y / std::exp(p) * w);
} else { } else {
label_correct = false; label_correct = false;
} }
@ -321,7 +321,7 @@ class GammaRegression : public ObjFunction {
CHECK(label_correct) << "GammaRegression: label must be positive"; CHECK(label_correct) << "GammaRegression: label must be positive";
} }
void PredTransform(HostDeviceVector<bst_float> *io_preds) override { void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
std::vector<bst_float> &preds = io_preds->data_h(); std::vector<bst_float> &preds = io_preds->HostVector();
const long ndata = static_cast<long>(preds.size()); // NOLINT(*) const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (long j = 0; j < ndata; ++j) { // NOLINT(*) for (long j = 0; j < ndata; ++j) { // NOLINT(*)
@ -334,7 +334,7 @@ class GammaRegression : public ObjFunction {
bst_float ProbToMargin(bst_float base_score) const override { bst_float ProbToMargin(bst_float base_score) const override {
return std::log(base_score); return std::log(base_score);
} }
const char* DefaultEvalMetric(void) const override { const char* DefaultEvalMetric() const override {
return "gamma-nloglik"; return "gamma-nloglik";
} }
}; };
@ -364,27 +364,27 @@ class TweedieRegression : public ObjFunction {
void GetGradient(HostDeviceVector<bst_float> *preds, void GetGradient(HostDeviceVector<bst_float> *preds,
const MetaInfo &info, const MetaInfo &info,
int iter, int iter,
HostDeviceVector<bst_gpair> *out_gpair) override { HostDeviceVector<GradientPair> *out_gpair) override {
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds->size(), info.labels.size()) << "labels are not correctly provided"; CHECK_EQ(preds->Size(), info.labels_.size()) << "labels are not correctly provided";
auto& preds_h = preds->data_h(); auto& preds_h = preds->HostVector();
out_gpair->resize(preds->size()); out_gpair->Resize(preds->Size());
auto& gpair = out_gpair->data_h(); auto& gpair = out_gpair->HostVector();
// check if label in range // check if label in range
bool label_correct = true; bool label_correct = true;
// start calculating gradient // start calculating gradient
const omp_ulong ndata = static_cast<omp_ulong>(preds->size()); // NOLINT(*) const omp_ulong ndata = static_cast<omp_ulong>(preds->Size()); // NOLINT(*)
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*) for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
bst_float p = preds_h[i]; bst_float p = preds_h[i];
bst_float w = info.GetWeight(i); bst_float w = info.GetWeight(i);
bst_float y = info.labels[i]; bst_float y = info.labels_[i];
float rho = param_.tweedie_variance_power; float rho = param_.tweedie_variance_power;
if (y >= 0.0f) { if (y >= 0.0f) {
bst_float grad = -y * std::exp((1 - rho) * p) + std::exp((2 - rho) * p); bst_float grad = -y * std::exp((1 - rho) * p) + std::exp((2 - rho) * p);
bst_float hess = -y * (1 - rho) * \ bst_float hess = -y * (1 - rho) * \
std::exp((1 - rho) * p) + (2 - rho) * std::exp((2 - rho) * p); std::exp((1 - rho) * p) + (2 - rho) * std::exp((2 - rho) * p);
gpair[i] = bst_gpair(grad * w, hess * w); gpair[i] = GradientPair(grad * w, hess * w);
} else { } else {
label_correct = false; label_correct = false;
} }
@ -392,14 +392,14 @@ class TweedieRegression : public ObjFunction {
CHECK(label_correct) << "TweedieRegression: label must be nonnegative"; CHECK(label_correct) << "TweedieRegression: label must be nonnegative";
} }
void PredTransform(HostDeviceVector<bst_float> *io_preds) override { void PredTransform(HostDeviceVector<bst_float> *io_preds) override {
std::vector<bst_float> &preds = io_preds->data_h(); std::vector<bst_float> &preds = io_preds->HostVector();
const long ndata = static_cast<long>(preds.size()); // NOLINT(*) const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (long j = 0; j < ndata; ++j) { // NOLINT(*) for (long j = 0; j < ndata; ++j) { // NOLINT(*)
preds[j] = std::exp(preds[j]); preds[j] = std::exp(preds[j]);
} }
} }
const char* DefaultEvalMetric(void) const override { const char* DefaultEvalMetric() const override {
std::ostringstream os; std::ostringstream os;
os << "tweedie-nloglik@" << param_.tweedie_variance_power; os << "tweedie-nloglik@" << param_.tweedie_variance_power;
std::string metric = os.str(); std::string metric = os.str();

View File

@ -16,11 +16,12 @@
#include "../common/host_device_vector.h" #include "../common/host_device_vector.h"
#include "./regression_loss.h" #include "./regression_loss.h"
using namespace dh;
namespace xgboost { namespace xgboost {
namespace obj { namespace obj {
using dh::DVec;
DMLC_REGISTRY_FILE_TAG(regression_obj_gpu); DMLC_REGISTRY_FILE_TAG(regression_obj_gpu);
struct GPURegLossParam : public dmlc::Parameter<GPURegLossParam> { struct GPURegLossParam : public dmlc::Parameter<GPURegLossParam> {
@ -43,7 +44,7 @@ struct GPURegLossParam : public dmlc::Parameter<GPURegLossParam> {
// GPU kernel for gradient computation // GPU kernel for gradient computation
template<typename Loss> template<typename Loss>
__global__ void get_gradient_k __global__ void get_gradient_k
(bst_gpair *__restrict__ out_gpair, unsigned int *__restrict__ label_correct, (GradientPair *__restrict__ out_gpair, unsigned int *__restrict__ label_correct,
const float * __restrict__ preds, const float * __restrict__ labels, const float * __restrict__ preds, const float * __restrict__ labels,
const float * __restrict__ weights, int n, float scale_pos_weight) { const float * __restrict__ weights, int n, float scale_pos_weight) {
int i = threadIdx.x + blockIdx.x * blockDim.x; int i = threadIdx.x + blockIdx.x * blockDim.x;
@ -56,7 +57,7 @@ __global__ void get_gradient_k
w *= scale_pos_weight; w *= scale_pos_weight;
if (!Loss::CheckLabel(label)) if (!Loss::CheckLabel(label))
atomicAnd(label_correct, 0); atomicAnd(label_correct, 0);
out_gpair[i] = bst_gpair out_gpair[i] = GradientPair
(Loss::FirstOrderGradient(p, label) * w, Loss::SecondOrderGradient(p, label) * w); (Loss::FirstOrderGradient(p, label) * w, Loss::SecondOrderGradient(p, label) * w);
} }
@ -75,40 +76,40 @@ class GPURegLossObj : public ObjFunction {
protected: protected:
// manages device data // manages device data
struct DeviceData { struct DeviceData {
dvec<float> labels, weights; DVec<float> labels, weights;
dvec<unsigned int> label_correct; DVec<unsigned int> label_correct;
// allocate everything on device // allocate everything on device
DeviceData(bulk_allocator<memory_type::DEVICE>* ba, int device_idx, size_t n) { DeviceData(dh::BulkAllocator<dh::MemoryType::kDevice>* ba, int device_idx, size_t n) {
ba->allocate(device_idx, false, ba->Allocate(device_idx, false,
&labels, n, &labels, n,
&weights, n, &weights, n,
&label_correct, 1); &label_correct, 1);
} }
size_t size() const { return labels.size(); } size_t Size() const { return labels.Size(); }
}; };
bool copied_; bool copied_;
std::unique_ptr<bulk_allocator<memory_type::DEVICE>> ba_; std::unique_ptr<dh::BulkAllocator<dh::MemoryType::kDevice>> ba_;
std::unique_ptr<DeviceData> data_; std::unique_ptr<DeviceData> data_;
HostDeviceVector<bst_float> preds_d_; HostDeviceVector<bst_float> preds_d_;
HostDeviceVector<bst_gpair> out_gpair_d_; HostDeviceVector<GradientPair> out_gpair_d_;
// allocate device data for n elements, do nothing if enough memory is allocated already // allocate device data for n elements, do nothing if enough memory is allocated already
void LazyResize(int n) { void LazyResize(int n) {
if (data_.get() != nullptr && data_->size() >= n) if (data_.get() != nullptr && data_->Size() >= n)
return; return;
copied_ = false; copied_ = false;
// free the old data and allocate the new data // free the old data and allocate the new data
ba_.reset(new bulk_allocator<memory_type::DEVICE>()); ba_.reset(new dh::BulkAllocator<dh::MemoryType::kDevice>());
data_.reset(new DeviceData(ba_.get(), 0, n)); data_.reset(new DeviceData(ba_.get(), 0, n));
preds_d_.resize(n, 0.0f, param_.gpu_id); preds_d_.Resize(n, 0.0f, param_.gpu_id);
out_gpair_d_.resize(n, bst_gpair(), param_.gpu_id); out_gpair_d_.Resize(n, GradientPair(), param_.gpu_id);
} }
public: public:
GPURegLossObj() : copied_(false), preds_d_(0, -1), out_gpair_d_(0, -1) {} GPURegLossObj() : copied_(false), preds_d_(0, -1), out_gpair_d_({}, -1) {}
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override { void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
param_.InitAllowUnknown(args); param_.InitAllowUnknown(args);
@ -118,32 +119,32 @@ class GPURegLossObj : public ObjFunction {
void GetGradient(HostDeviceVector<float>* preds, void GetGradient(HostDeviceVector<float>* preds,
const MetaInfo &info, const MetaInfo &info,
int iter, int iter,
HostDeviceVector<bst_gpair>* out_gpair) override { HostDeviceVector<GradientPair>* out_gpair) override {
CHECK_NE(info.labels.size(), 0U) << "label set cannot be empty"; CHECK_NE(info.labels_.size(), 0U) << "label set cannot be empty";
CHECK_EQ(preds->size(), info.labels.size()) CHECK_EQ(preds->Size(), info.labels_.size())
<< "labels are not correctly provided" << "labels are not correctly provided"
<< "preds.size=" << preds->size() << ", label.size=" << info.labels.size(); << "preds.size=" << preds->Size() << ", label.size=" << info.labels_.size();
size_t ndata = preds->size(); size_t ndata = preds->Size();
out_gpair->resize(ndata, bst_gpair(), param_.gpu_id); out_gpair->Resize(ndata, GradientPair(), param_.gpu_id);
LazyResize(ndata); LazyResize(ndata);
GetGradientDevice(preds->ptr_d(param_.gpu_id), info, iter, GetGradientDevice(preds->DevicePointer(param_.gpu_id), info, iter,
out_gpair->ptr_d(param_.gpu_id), ndata); out_gpair->DevicePointer(param_.gpu_id), ndata);
} }
private: private:
void GetGradientDevice(float* preds, void GetGradientDevice(float* preds,
const MetaInfo &info, const MetaInfo &info,
int iter, int iter,
bst_gpair* out_gpair, size_t n) { GradientPair* out_gpair, size_t n) {
safe_cuda(cudaSetDevice(param_.gpu_id)); dh::safe_cuda(cudaSetDevice(param_.gpu_id));
DeviceData& d = *data_; DeviceData& d = *data_;
d.label_correct.fill(1); d.label_correct.Fill(1);
// only copy the labels and weights once, similar to how the data is copied // only copy the labels and weights once, similar to how the data is copied
if (!copied_) { if (!copied_) {
thrust::copy(info.labels.begin(), info.labels.begin() + n, thrust::copy(info.labels_.begin(), info.labels_.begin() + n,
d.labels.tbegin()); d.labels.tbegin());
if (info.weights.size() > 0) { if (info.weights_.size() > 0) {
thrust::copy(info.weights.begin(), info.weights.begin() + n, thrust::copy(info.weights_.begin(), info.weights_.begin() + n,
d.weights.tbegin()); d.weights.tbegin());
} }
copied_ = true; copied_ = true;
@ -151,11 +152,11 @@ class GPURegLossObj : public ObjFunction {
// run the kernel // run the kernel
const int block = 256; const int block = 256;
get_gradient_k<Loss><<<div_round_up(n, block), block>>> get_gradient_k<Loss><<<dh::DivRoundUp(n, block), block>>>
(out_gpair, d.label_correct.data(), preds, (out_gpair, d.label_correct.Data(), preds,
d.labels.data(), info.weights.size() > 0 ? d.weights.data() : nullptr, d.labels.Data(), info.weights_.size() > 0 ? d.weights.Data() : nullptr,
n, param_.scale_pos_weight); n, param_.scale_pos_weight);
safe_cuda(cudaGetLastError()); dh::safe_cuda(cudaGetLastError());
// copy output data from the GPU // copy output data from the GPU
unsigned int label_correct_h; unsigned int label_correct_h;
@ -173,15 +174,15 @@ class GPURegLossObj : public ObjFunction {
} }
void PredTransform(HostDeviceVector<float> *io_preds) override { void PredTransform(HostDeviceVector<float> *io_preds) override {
PredTransformDevice(io_preds->ptr_d(param_.gpu_id), io_preds->size()); PredTransformDevice(io_preds->DevicePointer(param_.gpu_id), io_preds->Size());
} }
void PredTransformDevice(float* preds, size_t n) { void PredTransformDevice(float* preds, size_t n) {
safe_cuda(cudaSetDevice(param_.gpu_id)); dh::safe_cuda(cudaSetDevice(param_.gpu_id));
const int block = 256; const int block = 256;
pred_transform_k<Loss><<<div_round_up(n, block), block>>>(preds, n); pred_transform_k<Loss><<<dh::DivRoundUp(n, block), block>>>(preds, n);
safe_cuda(cudaGetLastError()); dh::safe_cuda(cudaGetLastError());
safe_cuda(cudaDeviceSynchronize()); dh::safe_cuda(cudaDeviceSynchronize());
} }

View File

@ -24,7 +24,7 @@ class CPUPredictor : public Predictor {
for (size_t i = tree_begin; i < tree_end; ++i) { for (size_t i = tree_begin; i < tree_end; ++i) {
if (tree_info[i] == bst_group) { if (tree_info[i] == bst_group) {
int tid = trees[i]->GetLeafIndex(*p_feats, root_index); int tid = trees[i]->GetLeafIndex(*p_feats, root_index);
psum += (*trees[i])[tid].leaf_value(); psum += (*trees[i])[tid].LeafValue();
} }
} }
p_feats->Drop(inst); p_feats->Drop(inst);
@ -45,35 +45,35 @@ class CPUPredictor : public Predictor {
std::vector<bst_float>* out_preds, std::vector<bst_float>* out_preds,
const gbm::GBTreeModel& model, int num_group, const gbm::GBTreeModel& model, int num_group,
unsigned tree_begin, unsigned tree_end) { unsigned tree_begin, unsigned tree_end) {
const MetaInfo& info = p_fmat->info(); const MetaInfo& info = p_fmat->Info();
const int nthread = omp_get_max_threads(); const int nthread = omp_get_max_threads();
InitThreadTemp(nthread, model.param.num_feature); InitThreadTemp(nthread, model.param.num_feature);
std::vector<bst_float>& preds = *out_preds; std::vector<bst_float>& preds = *out_preds;
CHECK_EQ(model.param.size_leaf_vector, 0) CHECK_EQ(model.param.size_leaf_vector, 0)
<< "size_leaf_vector is enforced to 0 so far"; << "size_leaf_vector is enforced to 0 so far";
CHECK_EQ(preds.size(), p_fmat->info().num_row * num_group); CHECK_EQ(preds.size(), p_fmat->Info().num_row_ * num_group);
// start collecting the prediction // start collecting the prediction
dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator(); dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator();
iter->BeforeFirst(); iter->BeforeFirst();
while (iter->Next()) { while (iter->Next()) {
const RowBatch& batch = iter->Value(); const RowBatch& batch = iter->Value();
// parallel over local batch // parallel over local batch
const int K = 8; constexpr int kUnroll = 8;
const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size); const auto nsize = static_cast<bst_omp_uint>(batch.size);
const bst_omp_uint rest = nsize % K; const bst_omp_uint rest = nsize % kUnroll;
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nsize - rest; i += K) { for (bst_omp_uint i = 0; i < nsize - rest; i += kUnroll) {
const int tid = omp_get_thread_num(); const int tid = omp_get_thread_num();
RegTree::FVec& feats = thread_temp[tid]; RegTree::FVec& feats = thread_temp[tid];
int64_t ridx[K]; int64_t ridx[kUnroll];
RowBatch::Inst inst[K]; RowBatch::Inst inst[kUnroll];
for (int k = 0; k < K; ++k) { for (int k = 0; k < kUnroll; ++k) {
ridx[k] = static_cast<int64_t>(batch.base_rowid + i + k); ridx[k] = static_cast<int64_t>(batch.base_rowid + i + k);
} }
for (int k = 0; k < K; ++k) { for (int k = 0; k < kUnroll; ++k) {
inst[k] = batch[i + k]; inst[k] = batch[i + k];
} }
for (int k = 0; k < K; ++k) { for (int k = 0; k < kUnroll; ++k) {
for (int gid = 0; gid < num_group; ++gid) { for (int gid = 0; gid < num_group; ++gid) {
const size_t offset = ridx[k] * num_group + gid; const size_t offset = ridx[k] * num_group + gid;
preds[offset] += this->PredValue( preds[offset] += this->PredValue(
@ -84,7 +84,7 @@ class CPUPredictor : public Predictor {
} }
for (bst_omp_uint i = nsize - rest; i < nsize; ++i) { for (bst_omp_uint i = nsize - rest; i < nsize; ++i) {
RegTree::FVec& feats = thread_temp[0]; RegTree::FVec& feats = thread_temp[0];
const int64_t ridx = static_cast<int64_t>(batch.base_rowid + i); const auto ridx = static_cast<int64_t>(batch.base_rowid + i);
const RowBatch::Inst inst = batch[i]; const RowBatch::Inst inst = batch[i];
for (int gid = 0; gid < num_group; ++gid) { for (int gid = 0; gid < num_group; ++gid) {
const size_t offset = ridx * num_group + gid; const size_t offset = ridx * num_group + gid;
@ -113,10 +113,10 @@ class CPUPredictor : public Predictor {
auto it = cache_.find(dmat); auto it = cache_.find(dmat);
if (it != cache_.end()) { if (it != cache_.end()) {
HostDeviceVector<bst_float>& y = it->second.predictions; HostDeviceVector<bst_float>& y = it->second.predictions;
if (y.size() != 0) { if (y.Size() != 0) {
out_preds->resize(y.size()); out_preds->Resize(y.Size());
std::copy(y.data_h().begin(), y.data_h().end(), std::copy(y.HostVector().begin(), y.HostVector().end(),
out_preds->data_h().begin()); out_preds->HostVector().begin());
return true; return true;
} }
} }
@ -127,12 +127,12 @@ class CPUPredictor : public Predictor {
void InitOutPredictions(const MetaInfo& info, void InitOutPredictions(const MetaInfo& info,
HostDeviceVector<bst_float>* out_preds, HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model) const { const gbm::GBTreeModel& model) const {
size_t n = model.param.num_output_group * info.num_row; size_t n = model.param.num_output_group * info.num_row_;
const std::vector<bst_float>& base_margin = info.base_margin; const std::vector<bst_float>& base_margin = info.base_margin_;
out_preds->resize(n); out_preds->Resize(n);
std::vector<bst_float>& out_preds_h = out_preds->data_h(); std::vector<bst_float>& out_preds_h = out_preds->HostVector();
if (base_margin.size() != 0) { if (base_margin.size() != 0) {
CHECK_EQ(out_preds->size(), n); CHECK_EQ(out_preds->Size(), n);
std::copy(base_margin.begin(), base_margin.end(), out_preds_h.begin()); std::copy(base_margin.begin(), base_margin.end(), out_preds_h.begin());
} else { } else {
std::fill(out_preds_h.begin(), out_preds_h.end(), model.base_margin); std::fill(out_preds_h.begin(), out_preds_h.end(), model.base_margin);
@ -147,14 +147,14 @@ class CPUPredictor : public Predictor {
return; return;
} }
this->InitOutPredictions(dmat->info(), out_preds, model); this->InitOutPredictions(dmat->Info(), out_preds, model);
ntree_limit *= model.param.num_output_group; ntree_limit *= model.param.num_output_group;
if (ntree_limit == 0 || ntree_limit > model.trees.size()) { if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
ntree_limit = static_cast<unsigned>(model.trees.size()); ntree_limit = static_cast<unsigned>(model.trees.size());
} }
this->PredLoopInternal(dmat, &out_preds->data_h(), model, this->PredLoopInternal(dmat, &out_preds->HostVector(), model,
tree_begin, ntree_limit); tree_begin, ntree_limit);
} }
@ -167,9 +167,9 @@ class CPUPredictor : public Predictor {
for (auto& kv : cache_) { for (auto& kv : cache_) {
PredictionCacheEntry& e = kv.second; PredictionCacheEntry& e = kv.second;
if (e.predictions.size() == 0) { if (e.predictions.Size() == 0) {
InitOutPredictions(e.data->info(), &(e.predictions), model); InitOutPredictions(e.data->Info(), &(e.predictions), model);
PredLoopInternal(e.data.get(), &(e.predictions.data_h()), model, 0, PredLoopInternal(e.data.get(), &(e.predictions.HostVector()), model, 0,
model.trees.size()); model.trees.size());
} else if (model.param.num_output_group == 1 && updaters->size() > 0 && } else if (model.param.num_output_group == 1 && updaters->size() > 0 &&
num_new_trees == 1 && num_new_trees == 1 &&
@ -177,7 +177,7 @@ class CPUPredictor : public Predictor {
&(e.predictions))) { &(e.predictions))) {
{} // do nothing {} // do nothing
} else { } else {
PredLoopInternal(e.data.get(), &(e.predictions.data_h()), model, old_ntree, PredLoopInternal(e.data.get(), &(e.predictions.HostVector()), model, old_ntree,
model.trees.size()); model.trees.size());
} }
} }
@ -209,25 +209,25 @@ class CPUPredictor : public Predictor {
const gbm::GBTreeModel& model, unsigned ntree_limit) override { const gbm::GBTreeModel& model, unsigned ntree_limit) override {
const int nthread = omp_get_max_threads(); const int nthread = omp_get_max_threads();
InitThreadTemp(nthread, model.param.num_feature); InitThreadTemp(nthread, model.param.num_feature);
const MetaInfo& info = p_fmat->info(); const MetaInfo& info = p_fmat->Info();
// number of valid trees // number of valid trees
ntree_limit *= model.param.num_output_group; ntree_limit *= model.param.num_output_group;
if (ntree_limit == 0 || ntree_limit > model.trees.size()) { if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
ntree_limit = static_cast<unsigned>(model.trees.size()); ntree_limit = static_cast<unsigned>(model.trees.size());
} }
std::vector<bst_float>& preds = *out_preds; std::vector<bst_float>& preds = *out_preds;
preds.resize(info.num_row * ntree_limit); preds.resize(info.num_row_ * ntree_limit);
// start collecting the prediction // start collecting the prediction
dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator(); dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator();
iter->BeforeFirst(); iter->BeforeFirst();
while (iter->Next()) { while (iter->Next()) {
const RowBatch& batch = iter->Value(); const RowBatch& batch = iter->Value();
// parallel over local batch // parallel over local batch
const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size); const auto nsize = static_cast<bst_omp_uint>(batch.size);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nsize; ++i) { for (bst_omp_uint i = 0; i < nsize; ++i) {
const int tid = omp_get_thread_num(); const int tid = omp_get_thread_num();
size_t ridx = static_cast<size_t>(batch.base_rowid + i); auto ridx = static_cast<size_t>(batch.base_rowid + i);
RegTree::FVec& feats = thread_temp[tid]; RegTree::FVec& feats = thread_temp[tid];
feats.Fill(batch[i]); feats.Fill(batch[i]);
for (unsigned j = 0; j < ntree_limit; ++j) { for (unsigned j = 0; j < ntree_limit; ++j) {
@ -246,7 +246,7 @@ class CPUPredictor : public Predictor {
unsigned condition_feature) override { unsigned condition_feature) override {
const int nthread = omp_get_max_threads(); const int nthread = omp_get_max_threads();
InitThreadTemp(nthread, model.param.num_feature); InitThreadTemp(nthread, model.param.num_feature);
const MetaInfo& info = p_fmat->info(); const MetaInfo& info = p_fmat->Info();
// number of valid trees // number of valid trees
ntree_limit *= model.param.num_output_group; ntree_limit *= model.param.num_output_group;
if (ntree_limit == 0 || ntree_limit > model.trees.size()) { if (ntree_limit == 0 || ntree_limit > model.trees.size()) {
@ -256,7 +256,7 @@ class CPUPredictor : public Predictor {
size_t ncolumns = model.param.num_feature + 1; size_t ncolumns = model.param.num_feature + 1;
// allocate space for (number of features + bias) times the number of rows // allocate space for (number of features + bias) times the number of rows
std::vector<bst_float>& contribs = *out_contribs; std::vector<bst_float>& contribs = *out_contribs;
contribs.resize(info.num_row * ncolumns * model.param.num_output_group); contribs.resize(info.num_row_ * ncolumns * model.param.num_output_group);
// make sure contributions is zeroed, we could be reusing a previously // make sure contributions is zeroed, we could be reusing a previously
// allocated one // allocated one
std::fill(contribs.begin(), contribs.end(), 0); std::fill(contribs.begin(), contribs.end(), 0);
@ -267,15 +267,15 @@ class CPUPredictor : public Predictor {
} }
// start collecting the contributions // start collecting the contributions
dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator(); dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator();
const std::vector<bst_float>& base_margin = info.base_margin; const std::vector<bst_float>& base_margin = info.base_margin_;
iter->BeforeFirst(); iter->BeforeFirst();
while (iter->Next()) { while (iter->Next()) {
const RowBatch& batch = iter->Value(); const RowBatch& batch = iter->Value();
// parallel over local batch // parallel over local batch
const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size); const auto nsize = static_cast<bst_omp_uint>(batch.size);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nsize; ++i) { for (bst_omp_uint i = 0; i < nsize; ++i) {
size_t row_idx = static_cast<size_t>(batch.base_rowid + i); auto row_idx = static_cast<size_t>(batch.base_rowid + i);
unsigned root_id = info.GetRoot(row_idx); unsigned root_id = info.GetRoot(row_idx);
RegTree::FVec& feats = thread_temp[omp_get_thread_num()]; RegTree::FVec& feats = thread_temp[omp_get_thread_num()];
// loop over all classes // loop over all classes
@ -310,7 +310,7 @@ class CPUPredictor : public Predictor {
void PredictInteractionContributions(DMatrix* p_fmat, std::vector<bst_float>* out_contribs, void PredictInteractionContributions(DMatrix* p_fmat, std::vector<bst_float>* out_contribs,
const gbm::GBTreeModel& model, unsigned ntree_limit, const gbm::GBTreeModel& model, unsigned ntree_limit,
bool approximate) override { bool approximate) override {
const MetaInfo& info = p_fmat->info(); const MetaInfo& info = p_fmat->Info();
const int ngroup = model.param.num_output_group; const int ngroup = model.param.num_output_group;
size_t ncolumns = model.param.num_feature; size_t ncolumns = model.param.num_feature;
const unsigned row_chunk = ngroup * (ncolumns + 1) * (ncolumns + 1); const unsigned row_chunk = ngroup * (ncolumns + 1) * (ncolumns + 1);
@ -319,10 +319,10 @@ class CPUPredictor : public Predictor {
// allocate space for (number of features^2) times the number of rows and tmp off/on contribs // allocate space for (number of features^2) times the number of rows and tmp off/on contribs
std::vector<bst_float>& contribs = *out_contribs; std::vector<bst_float>& contribs = *out_contribs;
contribs.resize(info.num_row * ngroup * (ncolumns + 1) * (ncolumns + 1)); contribs.resize(info.num_row_ * ngroup * (ncolumns + 1) * (ncolumns + 1));
std::vector<bst_float> contribs_off(info.num_row * ngroup * (ncolumns + 1)); std::vector<bst_float> contribs_off(info.num_row_ * ngroup * (ncolumns + 1));
std::vector<bst_float> contribs_on(info.num_row * ngroup * (ncolumns + 1)); std::vector<bst_float> contribs_on(info.num_row_ * ngroup * (ncolumns + 1));
std::vector<bst_float> contribs_diag(info.num_row * ngroup * (ncolumns + 1)); std::vector<bst_float> contribs_diag(info.num_row_ * ngroup * (ncolumns + 1));
// Compute the difference in effects when conditioning on each of the features on and off // Compute the difference in effects when conditioning on each of the features on and off
// see: Axiomatic characterizations of probabilistic and // see: Axiomatic characterizations of probabilistic and
@ -332,7 +332,7 @@ class CPUPredictor : public Predictor {
PredictContribution(p_fmat, &contribs_off, model, ntree_limit, approximate, -1, i); PredictContribution(p_fmat, &contribs_off, model, ntree_limit, approximate, -1, i);
PredictContribution(p_fmat, &contribs_on, model, ntree_limit, approximate, 1, i); PredictContribution(p_fmat, &contribs_on, model, ntree_limit, approximate, 1, i);
for (size_t j = 0; j < info.num_row; ++j) { for (size_t j = 0; j < info.num_row_; ++j) {
for (int l = 0; l < ngroup; ++l) { for (int l = 0; l < ngroup; ++l) {
const unsigned o_offset = j * row_chunk + l * mrow_chunk + i * (ncolumns + 1); const unsigned o_offset = j * row_chunk + l * mrow_chunk + i * (ncolumns + 1);
const unsigned c_offset = j * crow_chunk + l * (ncolumns + 1); const unsigned c_offset = j * crow_chunk + l * (ncolumns + 1);

View File

@ -36,8 +36,8 @@ struct GPUPredictionParam : public dmlc::Parameter<GPUPredictionParam> {
}; };
DMLC_REGISTER_PARAMETER(GPUPredictionParam); DMLC_REGISTER_PARAMETER(GPUPredictionParam);
template <typename iter_t> template <typename IterT>
void increment_offset(iter_t begin_itr, iter_t end_itr, size_t amount) { void IncrementOffset(IterT begin_itr, IterT end_itr, size_t amount) {
thrust::transform(begin_itr, end_itr, begin_itr, thrust::transform(begin_itr, end_itr, begin_itr,
[=] __device__(size_t elem) { return elem + amount; }); [=] __device__(size_t elem) { return elem + amount; });
} }
@ -50,16 +50,16 @@ void increment_offset(iter_t begin_itr, iter_t end_itr, size_t amount) {
struct DeviceMatrix { struct DeviceMatrix {
DMatrix* p_mat; // Pointer to the original matrix on the host DMatrix* p_mat; // Pointer to the original matrix on the host
dh::bulk_allocator<dh::memory_type::DEVICE> ba; dh::BulkAllocator<dh::MemoryType::kDevice> ba;
dh::dvec<size_t> row_ptr; dh::DVec<size_t> row_ptr;
dh::dvec<SparseBatch::Entry> data; dh::DVec<SparseBatch::Entry> data;
thrust::device_vector<float> predictions; thrust::device_vector<float> predictions;
DeviceMatrix(DMatrix* dmat, int device_idx, bool silent) : p_mat(dmat) { DeviceMatrix(DMatrix* dmat, int device_idx, bool silent) : p_mat(dmat) {
dh::safe_cuda(cudaSetDevice(device_idx)); dh::safe_cuda(cudaSetDevice(device_idx));
auto info = dmat->info(); auto info = dmat->Info();
ba.allocate(device_idx, silent, &row_ptr, info.num_row + 1, &data, ba.Allocate(device_idx, silent, &row_ptr, info.num_row_ + 1, &data,
info.num_nonzero); info.num_nonzero_);
auto iter = dmat->RowIterator(); auto iter = dmat->RowIterator();
iter->BeforeFirst(); iter->BeforeFirst();
size_t data_offset = 0; size_t data_offset = 0;
@ -71,7 +71,7 @@ struct DeviceMatrix {
if (batch.base_rowid > 0) { if (batch.base_rowid > 0) {
auto begin_itr = row_ptr.tbegin() + batch.base_rowid; auto begin_itr = row_ptr.tbegin() + batch.base_rowid;
auto end_itr = begin_itr + batch.size + 1; auto end_itr = begin_itr + batch.size + 1;
increment_offset(begin_itr, end_itr, batch.base_rowid); IncrementOffset(begin_itr, end_itr, batch.base_rowid);
} }
// Copy data // Copy data
thrust::copy(batch.data_ptr, batch.data_ptr + batch.ind_ptr[batch.size], thrust::copy(batch.data_ptr, batch.data_ptr + batch.ind_ptr[batch.size],
@ -103,17 +103,17 @@ struct DevicePredictionNode {
NodeValue val; NodeValue val;
DevicePredictionNode(const RegTree::Node& n) { // NOLINT DevicePredictionNode(const RegTree::Node& n) { // NOLINT
this->left_child_idx = n.cleft(); this->left_child_idx = n.LeftChild();
this->right_child_idx = n.cright(); this->right_child_idx = n.RightChild();
this->fidx = n.split_index(); this->fidx = n.SplitIndex();
if (n.default_left()) { if (n.DefaultLeft()) {
fidx |= (1U << 31); fidx |= (1U << 31);
} }
if (n.is_leaf()) { if (n.IsLeaf()) {
this->val.leaf_weight = n.leaf_value(); this->val.leaf_weight = n.LeafValue();
} else { } else {
this->val.fvalue = n.split_cond(); this->val.fvalue = n.SplitCond();
} }
} }
@ -155,7 +155,7 @@ struct ElementLoader {
if (use_shared) { if (use_shared) {
bst_uint global_idx = blockDim.x * blockIdx.x + threadIdx.x; bst_uint global_idx = blockDim.x * blockIdx.x + threadIdx.x;
int shared_elements = blockDim.x * num_features; int shared_elements = blockDim.x * num_features;
dh::block_fill(smem, shared_elements, nanf("")); dh::BlockFill(smem, shared_elements, nanf(""));
__syncthreads(); __syncthreads();
if (global_idx < num_rows) { if (global_idx < num_rows) {
bst_uint elem_begin = d_row_ptr[global_idx]; bst_uint elem_begin = d_row_ptr[global_idx];
@ -309,16 +309,16 @@ class GPUPredictor : public xgboost::Predictor {
thrust::copy(model.tree_info.begin(), model.tree_info.end(), thrust::copy(model.tree_info.begin(), model.tree_info.end(),
tree_group.begin()); tree_group.begin());
device_matrix->predictions.resize(out_preds->size()); device_matrix->predictions.resize(out_preds->Size());
thrust::copy(out_preds->tbegin(param.gpu_id), out_preds->tend(param.gpu_id), thrust::copy(out_preds->tbegin(param.gpu_id), out_preds->tend(param.gpu_id),
device_matrix->predictions.begin()); device_matrix->predictions.begin());
const int BLOCK_THREADS = 128; const int BLOCK_THREADS = 128;
const int GRID_SIZE = static_cast<int>( const int GRID_SIZE = static_cast<int>(
dh::div_round_up(device_matrix->row_ptr.size() - 1, BLOCK_THREADS)); dh::DivRoundUp(device_matrix->row_ptr.Size() - 1, BLOCK_THREADS));
int shared_memory_bytes = static_cast<int>( int shared_memory_bytes = static_cast<int>(
sizeof(float) * device_matrix->p_mat->info().num_col * BLOCK_THREADS); sizeof(float) * device_matrix->p_mat->Info().num_col_ * BLOCK_THREADS);
bool use_shared = true; bool use_shared = true;
if (shared_memory_bytes > max_shared_memory_bytes) { if (shared_memory_bytes > max_shared_memory_bytes) {
shared_memory_bytes = 0; shared_memory_bytes = 0;
@ -327,11 +327,11 @@ class GPUPredictor : public xgboost::Predictor {
PredictKernel<BLOCK_THREADS> PredictKernel<BLOCK_THREADS>
<<<GRID_SIZE, BLOCK_THREADS, shared_memory_bytes>>>( <<<GRID_SIZE, BLOCK_THREADS, shared_memory_bytes>>>(
dh::raw(nodes), dh::raw(device_matrix->predictions), dh::Raw(nodes), dh::Raw(device_matrix->predictions),
dh::raw(tree_segments), dh::raw(tree_group), dh::Raw(tree_segments), dh::Raw(tree_group),
device_matrix->row_ptr.data(), device_matrix->data.data(), device_matrix->row_ptr.Data(), device_matrix->data.Data(),
tree_begin, tree_end, device_matrix->p_mat->info().num_col, tree_begin, tree_end, device_matrix->p_mat->Info().num_col_,
device_matrix->p_mat->info().num_row, use_shared, device_matrix->p_mat->Info().num_row_, use_shared,
model.param.num_output_group); model.param.num_output_group);
dh::safe_cuda(cudaDeviceSynchronize()); dh::safe_cuda(cudaDeviceSynchronize());
@ -349,7 +349,7 @@ class GPUPredictor : public xgboost::Predictor {
if (this->PredictFromCache(dmat, out_preds, model, ntree_limit)) { if (this->PredictFromCache(dmat, out_preds, model, ntree_limit)) {
return; return;
} }
this->InitOutPredictions(dmat->info(), out_preds, model); this->InitOutPredictions(dmat->Info(), out_preds, model);
int tree_end = ntree_limit * model.param.num_output_group; int tree_end = ntree_limit * model.param.num_output_group;
@ -364,11 +364,11 @@ class GPUPredictor : public xgboost::Predictor {
void InitOutPredictions(const MetaInfo& info, void InitOutPredictions(const MetaInfo& info,
HostDeviceVector<bst_float>* out_preds, HostDeviceVector<bst_float>* out_preds,
const gbm::GBTreeModel& model) const { const gbm::GBTreeModel& model) const {
size_t n = model.param.num_output_group * info.num_row; size_t n = model.param.num_output_group * info.num_row_;
const std::vector<bst_float>& base_margin = info.base_margin; const std::vector<bst_float>& base_margin = info.base_margin_;
out_preds->resize(n, 0.0f, param.gpu_id); out_preds->Resize(n, 0.0f, param.gpu_id);
if (base_margin.size() != 0) { if (base_margin.size() != 0) {
CHECK_EQ(out_preds->size(), n); CHECK_EQ(out_preds->Size(), n);
thrust::copy(base_margin.begin(), base_margin.end(), thrust::copy(base_margin.begin(), base_margin.end(),
out_preds->tbegin(param.gpu_id)); out_preds->tbegin(param.gpu_id));
} else { } else {
@ -384,12 +384,12 @@ class GPUPredictor : public xgboost::Predictor {
auto it = cache_.find(dmat); auto it = cache_.find(dmat);
if (it != cache_.end()) { if (it != cache_.end()) {
HostDeviceVector<bst_float>& y = it->second.predictions; HostDeviceVector<bst_float>& y = it->second.predictions;
if (y.size() != 0) { if (y.Size() != 0) {
dh::safe_cuda(cudaSetDevice(param.gpu_id)); dh::safe_cuda(cudaSetDevice(param.gpu_id));
out_preds->resize(y.size(), 0.0f, param.gpu_id); out_preds->Resize(y.Size(), 0.0f, param.gpu_id);
dh::safe_cuda(cudaMemcpy( dh::safe_cuda(cudaMemcpy(
out_preds->ptr_d(param.gpu_id), y.ptr_d(param.gpu_id), out_preds->DevicePointer(param.gpu_id), y.DevicePointer(param.gpu_id),
out_preds->size() * sizeof(bst_float), cudaMemcpyDefault)); out_preds->Size() * sizeof(bst_float), cudaMemcpyDefault));
return true; return true;
} }
} }
@ -409,9 +409,9 @@ class GPUPredictor : public xgboost::Predictor {
DMatrix* dmat = kv.first; DMatrix* dmat = kv.first;
HostDeviceVector<bst_float>& predictions = e.predictions; HostDeviceVector<bst_float>& predictions = e.predictions;
if (predictions.size() == 0) { if (predictions.Size() == 0) {
// ensure that the device in predictions is correct // ensure that the device in predictions is correct
predictions.resize(0, 0.0f, param.gpu_id); predictions.Resize(0, 0.0f, param.gpu_id);
cpu_predictor->PredictBatch(dmat, &predictions, model, 0, cpu_predictor->PredictBatch(dmat, &predictions, model, 0,
static_cast<bst_uint>(model.trees.size())); static_cast<bst_uint>(model.trees.size()));
} else if (model.param.num_output_group == 1 && updaters->size() > 0 && } else if (model.param.num_output_group == 1 && updaters->size() > 0 &&
@ -462,7 +462,7 @@ class GPUPredictor : public xgboost::Predictor {
Predictor::Init(cfg, cache); Predictor::Init(cfg, cache);
cpu_predictor->Init(cfg, cache); cpu_predictor->Init(cfg, cache);
param.InitAllowUnknown(cfg); param.InitAllowUnknown(cfg);
max_shared_memory_bytes = dh::max_shared_memory(param.gpu_id); max_shared_memory_bytes = dh::MaxSharedMemory(param.gpu_id);
} }
private: private:

View File

@ -11,8 +11,9 @@ namespace xgboost {
void Predictor::Init( void Predictor::Init(
const std::vector<std::pair<std::string, std::string>>& cfg, const std::vector<std::pair<std::string, std::string>>& cfg,
const std::vector<std::shared_ptr<DMatrix>>& cache) { const std::vector<std::shared_ptr<DMatrix>>& cache) {
for (const std::shared_ptr<DMatrix>& d : cache) for (const std::shared_ptr<DMatrix>& d : cache) {
cache_[d.get()].data = d; cache_[d.get()].data = d;
}
} }
Predictor* Predictor::Create(std::string name) { Predictor* Predictor::Create(std::string name) {
auto* e = ::dmlc::Registry<PredictorReg>::Get()->Find(name); auto* e = ::dmlc::Registry<PredictorReg>::Get()->Find(name);

View File

@ -13,7 +13,7 @@ namespace tree {
/*! \brief training parameters for histogram-based training */ /*! \brief training parameters for histogram-based training */
struct FastHistParam : public dmlc::Parameter<FastHistParam> { struct FastHistParam : public dmlc::Parameter<FastHistParam> {
// integral data type to be used with columnar data storage // integral data type to be used with columnar data storage
enum class DataType { uint8 = 1, uint16 = 2, uint32 = 4 }; enum class DataType { uint8 = 1, uint16 = 2, uint32 = 4 }; // NOLINT
int colmat_dtype; int colmat_dtype;
// percentage threshold for treating a feature as sparse // percentage threshold for treating a feature as sparse
// e.g. 0.2 indicates a feature with fewer than 20% nonzeros is considered sparse // e.g. 0.2 indicates a feature with fewer than 20% nonzeros is considered sparse

View File

@ -190,26 +190,26 @@ struct TrainParam : public dmlc::Parameter<TrainParam> {
DMLC_DECLARE_ALIAS(learning_rate, eta); DMLC_DECLARE_ALIAS(learning_rate, eta);
} }
/*! \brief whether need forward small to big search: default right */ /*! \brief whether need forward small to big search: default right */
inline bool need_forward_search(float col_density, bool indicator) const { inline bool NeedForwardSearch(float col_density, bool indicator) const {
return this->default_direction == 2 || return this->default_direction == 2 ||
(default_direction == 0 && (col_density < opt_dense_col) && (default_direction == 0 && (col_density < opt_dense_col) &&
!indicator); !indicator);
} }
/*! \brief whether need backward big to small search: default left */ /*! \brief whether need backward big to small search: default left */
inline bool need_backward_search(float col_density, bool indicator) const { inline bool NeedBackwardSearch(float col_density, bool indicator) const {
return this->default_direction != 2; return this->default_direction != 2;
} }
/*! \brief given the loss change, whether we need to invoke pruning */ /*! \brief given the loss change, whether we need to invoke pruning */
inline bool need_prune(double loss_chg, int depth) const { inline bool NeedPrune(double loss_chg, int depth) const {
return loss_chg < this->min_split_loss; return loss_chg < this->min_split_loss;
} }
/*! \brief whether we can split with current hessian */ /*! \brief whether we can split with current hessian */
inline bool cannot_split(double sum_hess, int depth) const { inline bool CannotSplit(double sum_hess, int depth) const {
return sum_hess < this->min_child_weight * 2.0; return sum_hess < this->min_child_weight * 2.0;
} }
/*! \brief maximum sketch size */ /*! \brief maximum sketch size */
inline unsigned max_sketch_size() const { inline unsigned MaxSketchSize() const {
unsigned ret = static_cast<unsigned>(sketch_ratio / sketch_eps); auto ret = static_cast<unsigned>(sketch_ratio / sketch_eps);
CHECK_GT(ret, 0U); CHECK_GT(ret, 0U);
return ret; return ret;
} }
@ -220,10 +220,12 @@ struct TrainParam : public dmlc::Parameter<TrainParam> {
// functions for L1 cost // functions for L1 cost
template <typename T1, typename T2> template <typename T1, typename T2>
XGBOOST_DEVICE inline static T1 ThresholdL1(T1 w, T2 lambda) { XGBOOST_DEVICE inline static T1 ThresholdL1(T1 w, T2 lambda) {
if (w > +lambda) if (w > +lambda) {
return w - lambda; return w - lambda;
if (w < -lambda) }
if (w < -lambda) {
return w + lambda; return w + lambda;
}
return 0.0; return 0.0;
} }
@ -240,8 +242,9 @@ XGBOOST_DEVICE inline T CalcGainGivenWeight(const TrainingParams &p, T sum_grad,
// calculate the cost of loss function // calculate the cost of loss function
template <typename TrainingParams, typename T> template <typename TrainingParams, typename T>
XGBOOST_DEVICE inline T CalcGain(const TrainingParams &p, T sum_grad, T sum_hess) { XGBOOST_DEVICE inline T CalcGain(const TrainingParams &p, T sum_grad, T sum_hess) {
if (sum_hess < p.min_child_weight) if (sum_hess < p.min_child_weight) {
return T(0.0); return T(0.0);
}
if (p.max_delta_step == 0.0f) { if (p.max_delta_step == 0.0f) {
if (p.reg_alpha == 0.0f) { if (p.reg_alpha == 0.0f) {
return Sqr(sum_grad) / (sum_hess + p.reg_lambda); return Sqr(sum_grad) / (sum_hess + p.reg_lambda);
@ -276,8 +279,9 @@ XGBOOST_DEVICE inline T CalcGain(const TrainingParams &p, T sum_grad, T sum_hess
template <typename TrainingParams, typename T> template <typename TrainingParams, typename T>
XGBOOST_DEVICE inline T CalcWeight(const TrainingParams &p, T sum_grad, XGBOOST_DEVICE inline T CalcWeight(const TrainingParams &p, T sum_grad,
T sum_hess) { T sum_hess) {
if (sum_hess < p.min_child_weight) if (sum_hess < p.min_child_weight) {
return 0.0; return 0.0;
}
T dw; T dw;
if (p.reg_alpha == 0.0f) { if (p.reg_alpha == 0.0f) {
dw = -sum_grad / (sum_hess + p.reg_lambda); dw = -sum_grad / (sum_hess + p.reg_lambda);
@ -285,16 +289,18 @@ XGBOOST_DEVICE inline T CalcWeight(const TrainingParams &p, T sum_grad,
dw = -ThresholdL1(sum_grad, p.reg_alpha) / (sum_hess + p.reg_lambda); dw = -ThresholdL1(sum_grad, p.reg_alpha) / (sum_hess + p.reg_lambda);
} }
if (p.max_delta_step != 0.0f) { if (p.max_delta_step != 0.0f) {
if (dw > p.max_delta_step) if (dw > p.max_delta_step) {
dw = p.max_delta_step; dw = p.max_delta_step;
if (dw < -p.max_delta_step) }
if (dw < -p.max_delta_step) {
dw = -p.max_delta_step; dw = -p.max_delta_step;
}
} }
return dw; return dw;
} }
template <typename TrainingParams, typename gpair_t> template <typename TrainingParams, typename GpairT>
XGBOOST_DEVICE inline float CalcWeight(const TrainingParams &p, gpair_t sum_grad) { XGBOOST_DEVICE inline float CalcWeight(const TrainingParams &p, GpairT sum_grad) {
return CalcWeight(p, sum_grad.GetGrad(), sum_grad.GetHess()); return CalcWeight(p, sum_grad.GetGrad(), sum_grad.GetHess());
} }
@ -312,8 +318,8 @@ struct XGBOOST_ALIGNAS(16) GradStats {
/*! \brief constructor, the object must be cleared during construction */ /*! \brief constructor, the object must be cleared during construction */
explicit GradStats(const TrainParam& param) { this->Clear(); } explicit GradStats(const TrainParam& param) { this->Clear(); }
template <typename gpair_t> template <typename GpairT>
XGBOOST_DEVICE explicit GradStats(const gpair_t &sum) XGBOOST_DEVICE explicit GradStats(const GpairT &sum)
: sum_grad(sum.GetGrad()), sum_hess(sum.GetHess()) {} : sum_grad(sum.GetGrad()), sum_hess(sum.GetHess()) {}
/*! \brief clear the statistics */ /*! \brief clear the statistics */
inline void Clear() { sum_grad = sum_hess = 0.0f; } inline void Clear() { sum_grad = sum_hess = 0.0f; }
@ -323,26 +329,26 @@ struct XGBOOST_ALIGNAS(16) GradStats {
* \brief accumulate statistics * \brief accumulate statistics
* \param p the gradient pair * \param p the gradient pair
*/ */
inline void Add(bst_gpair p) { this->Add(p.GetGrad(), p.GetHess()); } inline void Add(GradientPair p) { this->Add(p.GetGrad(), p.GetHess()); }
/*! /*!
* \brief accumulate statistics, more complicated version * \brief accumulate statistics, more complicated version
* \param gpair the vector storing the gradient statistics * \param gpair the vector storing the gradient statistics
* \param info the additional information * \param info the additional information
* \param ridx instance index of this instance * \param ridx instance index of this instance
*/ */
inline void Add(const std::vector<bst_gpair>& gpair, const MetaInfo& info, inline void Add(const std::vector<GradientPair>& gpair, const MetaInfo& info,
bst_uint ridx) { bst_uint ridx) {
const bst_gpair& b = gpair[ridx]; const GradientPair& b = gpair[ridx];
this->Add(b.GetGrad(), b.GetHess()); this->Add(b.GetGrad(), b.GetHess());
} }
/*! \brief calculate leaf weight */ /*! \brief calculate leaf weight */
template <typename param_t> template <typename ParamT>
XGBOOST_DEVICE inline double CalcWeight(const param_t &param) const { XGBOOST_DEVICE inline double CalcWeight(const ParamT &param) const {
return xgboost::tree::CalcWeight(param, sum_grad, sum_hess); return xgboost::tree::CalcWeight(param, sum_grad, sum_hess);
} }
/*! \brief calculate gain of the solution */ /*! \brief calculate gain of the solution */
template <typename param_t> template <typename ParamT>
inline double CalcGain(const param_t& param) const { inline double CalcGain(const ParamT& param) const {
return xgboost::tree::CalcGain(param, sum_grad, sum_hess); return xgboost::tree::CalcGain(param, sum_grad, sum_hess);
} }
/*! \brief add statistics to the data */ /*! \brief add statistics to the data */
@ -364,7 +370,7 @@ template <typename param_t>
/*! \brief set leaf vector value based on statistics */ /*! \brief set leaf vector value based on statistics */
inline void SetLeafVec(const TrainParam& param, bst_float* vec) const {} inline void SetLeafVec(const TrainParam& param, bst_float* vec) const {}
// constructor to allow inheritance // constructor to allow inheritance
GradStats() {} GradStats() = default;
/*! \brief add statistics to the data */ /*! \brief add statistics to the data */
inline void Add(double grad, double hess) { inline void Add(double grad, double hess) {
sum_grad += grad; sum_grad += grad;
@ -400,8 +406,8 @@ struct ValueConstraint {
inline static void Init(TrainParam *param, unsigned num_feature) { inline static void Init(TrainParam *param, unsigned num_feature) {
param->monotone_constraints.resize(num_feature, 0); param->monotone_constraints.resize(num_feature, 0);
} }
template <typename param_t> template <typename ParamT>
XGBOOST_DEVICE inline double CalcWeight(const param_t &param, GradStats stats) const { XGBOOST_DEVICE inline double CalcWeight(const ParamT &param, GradStats stats) const {
double w = stats.CalcWeight(param); double w = stats.CalcWeight(param);
if (w < lower_bound) { if (w < lower_bound) {
return lower_bound; return lower_bound;
@ -412,14 +418,14 @@ template <typename param_t>
return w; return w;
} }
template <typename param_t> template <typename ParamT>
XGBOOST_DEVICE inline double CalcGain(const param_t &param, GradStats stats) const { XGBOOST_DEVICE inline double CalcGain(const ParamT &param, GradStats stats) const {
return CalcGainGivenWeight(param, stats.sum_grad, stats.sum_hess, return CalcGainGivenWeight(param, stats.sum_grad, stats.sum_hess,
CalcWeight(param, stats)); CalcWeight(param, stats));
} }
template <typename param_t> template <typename ParamT>
XGBOOST_DEVICE inline double CalcSplitGain(const param_t &param, int constraint, XGBOOST_DEVICE inline double CalcSplitGain(const ParamT &param, int constraint,
GradStats left, GradStats right) const { GradStats left, GradStats right) const {
const double negative_infinity = -std::numeric_limits<double>::infinity(); const double negative_infinity = -std::numeric_limits<double>::infinity();
double wleft = CalcWeight(param, left); double wleft = CalcWeight(param, left);
@ -442,8 +448,9 @@ template <typename param_t>
int c = param.monotone_constraints.at(split_index); int c = param.monotone_constraints.at(split_index);
*cleft = *this; *cleft = *this;
*cright = *this; *cright = *this;
if (c == 0) if (c == 0) {
return; return;
}
double wleft = CalcWeight(param, left); double wleft = CalcWeight(param, left);
double wright = CalcWeight(param, right); double wright = CalcWeight(param, right);
double mid = (wleft + wright) / 2; double mid = (wleft + wright) / 2;
@ -464,13 +471,13 @@ template <typename param_t>
*/ */
struct SplitEntry { struct SplitEntry {
/*! \brief loss change after split this node */ /*! \brief loss change after split this node */
bst_float loss_chg; bst_float loss_chg{0.0f};
/*! \brief split index */ /*! \brief split index */
unsigned sindex; unsigned sindex{0};
/*! \brief split value */ /*! \brief split value */
bst_float split_value; bst_float split_value{0.0f};
/*! \brief constructor */ /*! \brief constructor */
SplitEntry() : loss_chg(0.0f), sindex(0), split_value(0.0f) {} SplitEntry() = default;
/*! /*!
* \brief decides whether we can replace current entry with the given * \brief decides whether we can replace current entry with the given
* statistics * statistics
@ -482,7 +489,7 @@ struct SplitEntry {
* \param split_index the feature index where the split is on * \param split_index the feature index where the split is on
*/ */
inline bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const { inline bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const {
if (this->split_index() <= split_index) { if (this->SplitIndex() <= split_index) {
return new_loss_chg > this->loss_chg; return new_loss_chg > this->loss_chg;
} else { } else {
return !(this->loss_chg > new_loss_chg); return !(this->loss_chg > new_loss_chg);
@ -494,7 +501,7 @@ struct SplitEntry {
* \return whether the proposed split is better and can replace current split * \return whether the proposed split is better and can replace current split
*/ */
inline bool Update(const SplitEntry &e) { inline bool Update(const SplitEntry &e) {
if (this->NeedReplace(e.loss_chg, e.split_index())) { if (this->NeedReplace(e.loss_chg, e.SplitIndex())) {
this->loss_chg = e.loss_chg; this->loss_chg = e.loss_chg;
this->sindex = e.sindex; this->sindex = e.sindex;
this->split_value = e.split_value; this->split_value = e.split_value;
@ -515,8 +522,9 @@ struct SplitEntry {
bst_float new_split_value, bool default_left) { bst_float new_split_value, bool default_left) {
if (this->NeedReplace(new_loss_chg, split_index)) { if (this->NeedReplace(new_loss_chg, split_index)) {
this->loss_chg = new_loss_chg; this->loss_chg = new_loss_chg;
if (default_left) if (default_left) {
split_index |= (1U << 31); split_index |= (1U << 31);
}
this->sindex = split_index; this->sindex = split_index;
this->split_value = new_split_value; this->split_value = new_split_value;
return true; return true;
@ -530,9 +538,9 @@ struct SplitEntry {
dst.Update(src); dst.Update(src);
} }
/*!\return feature index to split on */ /*!\return feature index to split on */
inline unsigned split_index() const { return sindex & ((1U << 31) - 1U); } inline unsigned SplitIndex() const { return sindex & ((1U << 31) - 1U); }
/*!\return whether missing value goes to left branch */ /*!\return whether missing value goes to left branch */
inline bool default_left() const { return (sindex >> 31) != 0; } inline bool DefaultLeft() const { return (sindex >> 31) != 0; }
}; };
} // namespace tree } // namespace tree
@ -542,14 +550,16 @@ struct SplitEntry {
namespace std { namespace std {
inline std::ostream &operator<<(std::ostream &os, const std::vector<int> &t) { inline std::ostream &operator<<(std::ostream &os, const std::vector<int> &t) {
os << '('; os << '(';
for (std::vector<int>::const_iterator it = t.begin(); it != t.end(); ++it) { for (auto it = t.begin(); it != t.end(); ++it) {
if (it != t.begin()) if (it != t.begin()) {
os << ','; os << ',';
}
os << *it; os << *it;
} }
// python style tuple // python style tuple
if (t.size() == 1) if (t.size() == 1) {
os << ','; os << ',';
}
os << ')'; os << ')';
return os; return os;
} }
@ -566,8 +576,9 @@ inline std::istream &operator>>(std::istream &is, std::vector<int> &t) {
return is; return is;
} }
is.get(); is.get();
if (ch == '(') if (ch == '(') {
break; break;
}
if (!isspace(ch)) { if (!isspace(ch)) {
is.setstate(std::ios::failbit); is.setstate(std::ios::failbit);
return is; return is;
@ -597,8 +608,9 @@ inline std::istream &operator>>(std::istream &is, std::vector<int> &t) {
} }
break; break;
} }
if (ch == ')') if (ch == ')') {
break; break;
}
} else if (ch == ')') { } else if (ch == ')') {
break; break;
} else { } else {

View File

@ -21,45 +21,53 @@ void DumpRegTree(std::stringstream& fo, // NOLINT(*)
int nid, int depth, int add_comma, int nid, int depth, int add_comma,
bool with_stats, std::string format) { bool with_stats, std::string format) {
if (format == "json") { if (format == "json") {
if (add_comma) fo << ","; if (add_comma) {
if (depth != 0) fo << std::endl; fo << ",";
for (int i = 0; i < depth+1; ++i) fo << " ";
} else {
for (int i = 0; i < depth; ++i) fo << '\t';
} }
if (tree[nid].is_leaf()) { if (depth != 0) {
fo << std::endl;
}
for (int i = 0; i < depth + 1; ++i) {
fo << " ";
}
} else {
for (int i = 0; i < depth; ++i) {
fo << '\t';
}
}
if (tree[nid].IsLeaf()) {
if (format == "json") { if (format == "json") {
fo << "{ \"nodeid\": " << nid fo << "{ \"nodeid\": " << nid
<< ", \"leaf\": " << tree[nid].leaf_value(); << ", \"leaf\": " << tree[nid].LeafValue();
if (with_stats) { if (with_stats) {
fo << ", \"cover\": " << tree.stat(nid).sum_hess; fo << ", \"cover\": " << tree.Stat(nid).sum_hess;
} }
fo << " }"; fo << " }";
} else { } else {
fo << nid << ":leaf=" << tree[nid].leaf_value(); fo << nid << ":leaf=" << tree[nid].LeafValue();
if (with_stats) { if (with_stats) {
fo << ",cover=" << tree.stat(nid).sum_hess; fo << ",cover=" << tree.Stat(nid).sum_hess;
} }
fo << '\n'; fo << '\n';
} }
} else { } else {
// right then left, // right then left,
bst_float cond = tree[nid].split_cond(); bst_float cond = tree[nid].SplitCond();
const unsigned split_index = tree[nid].split_index(); const unsigned split_index = tree[nid].SplitIndex();
if (split_index < fmap.size()) { if (split_index < fmap.Size()) {
switch (fmap.type(split_index)) { switch (fmap.type(split_index)) {
case FeatureMap::kIndicator: { case FeatureMap::kIndicator: {
int nyes = tree[nid].default_left() ? int nyes = tree[nid].DefaultLeft() ?
tree[nid].cright() : tree[nid].cleft(); tree[nid].RightChild() : tree[nid].LeftChild();
if (format == "json") { if (format == "json") {
fo << "{ \"nodeid\": " << nid fo << "{ \"nodeid\": " << nid
<< ", \"depth\": " << depth << ", \"depth\": " << depth
<< ", \"split\": \"" << fmap.name(split_index) << "\"" << ", \"split\": \"" << fmap.Name(split_index) << "\""
<< ", \"yes\": " << nyes << ", \"yes\": " << nyes
<< ", \"no\": " << tree[nid].cdefault(); << ", \"no\": " << tree[nid].DefaultChild();
} else { } else {
fo << nid << ":[" << fmap.name(split_index) << "] yes=" << nyes fo << nid << ":[" << fmap.Name(split_index) << "] yes=" << nyes
<< ",no=" << tree[nid].cdefault(); << ",no=" << tree[nid].DefaultChild();
} }
break; break;
} }
@ -67,17 +75,17 @@ void DumpRegTree(std::stringstream& fo, // NOLINT(*)
if (format == "json") { if (format == "json") {
fo << "{ \"nodeid\": " << nid fo << "{ \"nodeid\": " << nid
<< ", \"depth\": " << depth << ", \"depth\": " << depth
<< ", \"split\": \"" << fmap.name(split_index) << "\"" << ", \"split\": \"" << fmap.Name(split_index) << "\""
<< ", \"split_condition\": " << int(cond + 1.0) << ", \"split_condition\": " << int(cond + 1.0)
<< ", \"yes\": " << tree[nid].cleft() << ", \"yes\": " << tree[nid].LeftChild()
<< ", \"no\": " << tree[nid].cright() << ", \"no\": " << tree[nid].RightChild()
<< ", \"missing\": " << tree[nid].cdefault(); << ", \"missing\": " << tree[nid].DefaultChild();
} else { } else {
fo << nid << ":[" << fmap.name(split_index) << "<" fo << nid << ":[" << fmap.Name(split_index) << "<"
<< int(cond + 1.0) << int(cond + 1.0)
<< "] yes=" << tree[nid].cleft() << "] yes=" << tree[nid].LeftChild()
<< ",no=" << tree[nid].cright() << ",no=" << tree[nid].RightChild()
<< ",missing=" << tree[nid].cdefault(); << ",missing=" << tree[nid].DefaultChild();
} }
break; break;
} }
@ -86,16 +94,16 @@ void DumpRegTree(std::stringstream& fo, // NOLINT(*)
if (format == "json") { if (format == "json") {
fo << "{ \"nodeid\": " << nid fo << "{ \"nodeid\": " << nid
<< ", \"depth\": " << depth << ", \"depth\": " << depth
<< ", \"split\": \"" << fmap.name(split_index) << "\"" << ", \"split\": \"" << fmap.Name(split_index) << "\""
<< ", \"split_condition\": " << cond << ", \"split_condition\": " << cond
<< ", \"yes\": " << tree[nid].cleft() << ", \"yes\": " << tree[nid].LeftChild()
<< ", \"no\": " << tree[nid].cright() << ", \"no\": " << tree[nid].RightChild()
<< ", \"missing\": " << tree[nid].cdefault(); << ", \"missing\": " << tree[nid].DefaultChild();
} else { } else {
fo << nid << ":[" << fmap.name(split_index) << "<" << cond fo << nid << ":[" << fmap.Name(split_index) << "<" << cond
<< "] yes=" << tree[nid].cleft() << "] yes=" << tree[nid].LeftChild()
<< ",no=" << tree[nid].cright() << ",no=" << tree[nid].RightChild()
<< ",missing=" << tree[nid].cdefault(); << ",missing=" << tree[nid].DefaultChild();
} }
break; break;
} }
@ -107,22 +115,22 @@ void DumpRegTree(std::stringstream& fo, // NOLINT(*)
<< ", \"depth\": " << depth << ", \"depth\": " << depth
<< ", \"split\": " << split_index << ", \"split\": " << split_index
<< ", \"split_condition\": " << cond << ", \"split_condition\": " << cond
<< ", \"yes\": " << tree[nid].cleft() << ", \"yes\": " << tree[nid].LeftChild()
<< ", \"no\": " << tree[nid].cright() << ", \"no\": " << tree[nid].RightChild()
<< ", \"missing\": " << tree[nid].cdefault(); << ", \"missing\": " << tree[nid].DefaultChild();
} else { } else {
fo << nid << ":[f" << split_index << "<"<< cond fo << nid << ":[f" << split_index << "<"<< cond
<< "] yes=" << tree[nid].cleft() << "] yes=" << tree[nid].LeftChild()
<< ",no=" << tree[nid].cright() << ",no=" << tree[nid].RightChild()
<< ",missing=" << tree[nid].cdefault(); << ",missing=" << tree[nid].DefaultChild();
} }
} }
if (with_stats) { if (with_stats) {
if (format == "json") { if (format == "json") {
fo << ", \"gain\": " << tree.stat(nid).loss_chg fo << ", \"gain\": " << tree.Stat(nid).loss_chg
<< ", \"cover\": " << tree.stat(nid).sum_hess; << ", \"cover\": " << tree.Stat(nid).sum_hess;
} else { } else {
fo << ",gain=" << tree.stat(nid).loss_chg << ",cover=" << tree.stat(nid).sum_hess; fo << ",gain=" << tree.Stat(nid).loss_chg << ",cover=" << tree.Stat(nid).sum_hess;
} }
} }
if (format == "json") { if (format == "json") {
@ -130,11 +138,13 @@ void DumpRegTree(std::stringstream& fo, // NOLINT(*)
} else { } else {
fo << '\n'; fo << '\n';
} }
DumpRegTree(fo, tree, fmap, tree[nid].cleft(), depth + 1, false, with_stats, format); DumpRegTree(fo, tree, fmap, tree[nid].LeftChild(), depth + 1, false, with_stats, format);
DumpRegTree(fo, tree, fmap, tree[nid].cright(), depth + 1, true, with_stats, format); DumpRegTree(fo, tree, fmap, tree[nid].RightChild(), depth + 1, true, with_stats, format);
if (format == "json") { if (format == "json") {
fo << std::endl; fo << std::endl;
for (int i = 0; i < depth+1; ++i) fo << " "; for (int i = 0; i < depth + 1; ++i) {
fo << " ";
}
fo << "]}"; fo << "]}";
} }
} }

View File

@ -29,7 +29,7 @@ namespace tree {
class BaseMaker: public TreeUpdater { class BaseMaker: public TreeUpdater {
public: public:
void Init(const std::vector<std::pair<std::string, std::string> >& args) override { void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
param.InitAllowUnknown(args); param_.InitAllowUnknown(args);
} }
protected: protected:
@ -39,8 +39,8 @@ class BaseMaker: public TreeUpdater {
/*! \brief find type of each feature, use column format */ /*! \brief find type of each feature, use column format */
inline void InitByCol(DMatrix* p_fmat, inline void InitByCol(DMatrix* p_fmat,
const RegTree& tree) { const RegTree& tree) {
fminmax.resize(tree.param.num_feature * 2); fminmax_.resize(tree.param.num_feature * 2);
std::fill(fminmax.begin(), fminmax.end(), std::fill(fminmax_.begin(), fminmax_.end(),
-std::numeric_limits<bst_float>::max()); -std::numeric_limits<bst_float>::max());
// start accumulating statistics // start accumulating statistics
dmlc::DataIter<ColBatch>* iter = p_fmat->ColIterator(); dmlc::DataIter<ColBatch>* iter = p_fmat->ColIterator();
@ -51,22 +51,22 @@ class BaseMaker: public TreeUpdater {
const bst_uint fid = batch.col_index[i]; const bst_uint fid = batch.col_index[i];
const ColBatch::Inst& c = batch[i]; const ColBatch::Inst& c = batch[i];
if (c.length != 0) { if (c.length != 0) {
fminmax[fid * 2 + 0] = std::max(-c[0].fvalue, fminmax[fid * 2 + 0]); fminmax_[fid * 2 + 0] = std::max(-c[0].fvalue, fminmax_[fid * 2 + 0]);
fminmax[fid * 2 + 1] = std::max(c[c.length - 1].fvalue, fminmax[fid * 2 + 1]); fminmax_[fid * 2 + 1] = std::max(c[c.length - 1].fvalue, fminmax_[fid * 2 + 1]);
} }
} }
} }
} }
/*! \brief synchronize the information */ /*! \brief synchronize the information */
inline void SyncInfo() { inline void SyncInfo() {
rabit::Allreduce<rabit::op::Max>(dmlc::BeginPtr(fminmax), fminmax.size()); rabit::Allreduce<rabit::op::Max>(dmlc::BeginPtr(fminmax_), fminmax_.size());
} }
// get feature type, 0:empty 1:binary 2:real // get feature type, 0:empty 1:binary 2:real
inline int Type(bst_uint fid) const { inline int Type(bst_uint fid) const {
CHECK_LT(fid * 2 + 1, fminmax.size()) CHECK_LT(fid * 2 + 1, fminmax_.size())
<< "FeatHelper fid exceed query bound "; << "FeatHelper fid exceed query bound ";
bst_float a = fminmax[fid * 2]; bst_float a = fminmax_[fid * 2];
bst_float b = fminmax[fid * 2 + 1]; bst_float b = fminmax_[fid * 2 + 1];
if (a == -std::numeric_limits<bst_float>::max()) return 0; if (a == -std::numeric_limits<bst_float>::max()) return 0;
if (-a == b) { if (-a == b) {
return 1; return 1;
@ -75,16 +75,16 @@ class BaseMaker: public TreeUpdater {
} }
} }
inline bst_float MaxValue(bst_uint fid) const { inline bst_float MaxValue(bst_uint fid) const {
return fminmax[fid *2 + 1]; return fminmax_[fid *2 + 1];
} }
inline void SampleCol(float p, std::vector<bst_uint> *p_findex) const { inline void SampleCol(float p, std::vector<bst_uint> *p_findex) const {
std::vector<bst_uint> &findex = *p_findex; std::vector<bst_uint> &findex = *p_findex;
findex.clear(); findex.clear();
for (size_t i = 0; i < fminmax.size(); i += 2) { for (size_t i = 0; i < fminmax_.size(); i += 2) {
const bst_uint fid = static_cast<bst_uint>(i / 2); const auto fid = static_cast<bst_uint>(i / 2);
if (this->Type(fid) != 0) findex.push_back(fid); if (this->Type(fid) != 0) findex.push_back(fid);
} }
unsigned n = static_cast<unsigned>(p * findex.size()); auto n = static_cast<unsigned>(p * findex.size());
std::shuffle(findex.begin(), findex.end(), common::GlobalRandom()); std::shuffle(findex.begin(), findex.end(), common::GlobalRandom());
findex.resize(n); findex.resize(n);
// sync the findex if it is subsample // sync the findex if it is subsample
@ -99,64 +99,64 @@ class BaseMaker: public TreeUpdater {
} }
private: private:
std::vector<bst_float> fminmax; std::vector<bst_float> fminmax_;
}; };
// ------static helper functions ------ // ------static helper functions ------
// helper function to get to next level of the tree // helper function to get to next level of the tree
/*! \brief this is helper function for row based data*/ /*! \brief this is helper function for row based data*/
inline static int NextLevel(const RowBatch::Inst &inst, const RegTree &tree, int nid) { inline static int NextLevel(const RowBatch::Inst &inst, const RegTree &tree, int nid) {
const RegTree::Node &n = tree[nid]; const RegTree::Node &n = tree[nid];
bst_uint findex = n.split_index(); bst_uint findex = n.SplitIndex();
for (unsigned i = 0; i < inst.length; ++i) { for (unsigned i = 0; i < inst.length; ++i) {
if (findex == inst[i].index) { if (findex == inst[i].index) {
if (inst[i].fvalue < n.split_cond()) { if (inst[i].fvalue < n.SplitCond()) {
return n.cleft(); return n.LeftChild();
} else { } else {
return n.cright(); return n.RightChild();
} }
} }
} }
return n.cdefault(); return n.DefaultChild();
} }
// ------class member helpers--------- // ------class member helpers---------
/*! \brief initialize temp data structure */ /*! \brief initialize temp data structure */
inline void InitData(const std::vector<bst_gpair> &gpair, inline void InitData(const std::vector<GradientPair> &gpair,
const DMatrix &fmat, const DMatrix &fmat,
const RegTree &tree) { const RegTree &tree) {
CHECK_EQ(tree.param.num_nodes, tree.param.num_roots) CHECK_EQ(tree.param.num_nodes, tree.param.num_roots)
<< "TreeMaker: can only grow new tree"; << "TreeMaker: can only grow new tree";
const std::vector<unsigned> &root_index = fmat.info().root_index; const std::vector<unsigned> &root_index = fmat.Info().root_index_;
{ {
// setup position // setup position
position.resize(gpair.size()); position_.resize(gpair.size());
if (root_index.size() == 0) { if (root_index.size() == 0) {
std::fill(position.begin(), position.end(), 0); std::fill(position_.begin(), position_.end(), 0);
} else { } else {
for (size_t i = 0; i < position.size(); ++i) { for (size_t i = 0; i < position_.size(); ++i) {
position[i] = root_index[i]; position_[i] = root_index[i];
CHECK_LT(root_index[i], (unsigned)tree.param.num_roots) CHECK_LT(root_index[i], (unsigned)tree.param.num_roots)
<< "root index exceed setting"; << "root index exceed setting";
} }
} }
// mark delete for the deleted datas // mark delete for the deleted datas
for (size_t i = 0; i < position.size(); ++i) { for (size_t i = 0; i < position_.size(); ++i) {
if (gpair[i].GetHess() < 0.0f) position[i] = ~position[i]; if (gpair[i].GetHess() < 0.0f) position_[i] = ~position_[i];
} }
// mark subsample // mark subsample
if (param.subsample < 1.0f) { if (param_.subsample < 1.0f) {
std::bernoulli_distribution coin_flip(param.subsample); std::bernoulli_distribution coin_flip(param_.subsample);
auto& rnd = common::GlobalRandom(); auto& rnd = common::GlobalRandom();
for (size_t i = 0; i < position.size(); ++i) { for (size_t i = 0; i < position_.size(); ++i) {
if (gpair[i].GetHess() < 0.0f) continue; if (gpair[i].GetHess() < 0.0f) continue;
if (!coin_flip(rnd)) position[i] = ~position[i]; if (!coin_flip(rnd)) position_[i] = ~position_[i];
} }
} }
} }
{ {
// expand query // expand query
qexpand.reserve(256); qexpand.clear(); qexpand_.reserve(256); qexpand_.clear();
for (int i = 0; i < tree.param.num_roots; ++i) { for (int i = 0; i < tree.param.num_roots; ++i) {
qexpand.push_back(i); qexpand_.push_back(i);
} }
this->UpdateNode2WorkIndex(tree); this->UpdateNode2WorkIndex(tree);
} }
@ -164,28 +164,27 @@ class BaseMaker: public TreeUpdater {
/*! \brief update queue expand add in new leaves */ /*! \brief update queue expand add in new leaves */
inline void UpdateQueueExpand(const RegTree &tree) { inline void UpdateQueueExpand(const RegTree &tree) {
std::vector<int> newnodes; std::vector<int> newnodes;
for (size_t i = 0; i < qexpand.size(); ++i) { for (int nid : qexpand_) {
const int nid = qexpand[i]; if (!tree[nid].IsLeaf()) {
if (!tree[nid].is_leaf()) { newnodes.push_back(tree[nid].LeftChild());
newnodes.push_back(tree[nid].cleft()); newnodes.push_back(tree[nid].RightChild());
newnodes.push_back(tree[nid].cright());
} }
} }
// use new nodes for qexpand // use new nodes for qexpand
qexpand = newnodes; qexpand_ = newnodes;
this->UpdateNode2WorkIndex(tree); this->UpdateNode2WorkIndex(tree);
} }
// return decoded position // return decoded position
inline int DecodePosition(bst_uint ridx) const { inline int DecodePosition(bst_uint ridx) const {
const int pid = position[ridx]; const int pid = position_[ridx];
return pid < 0 ? ~pid : pid; return pid < 0 ? ~pid : pid;
} }
// encode the encoded position value for ridx // encode the encoded position value for ridx
inline void SetEncodePosition(bst_uint ridx, int nid) { inline void SetEncodePosition(bst_uint ridx, int nid) {
if (position[ridx] < 0) { if (position_[ridx] < 0) {
position[ridx] = ~nid; position_[ridx] = ~nid;
} else { } else {
position[ridx] = nid; position_[ridx] = nid;
} }
} }
/*! /*!
@ -211,27 +210,27 @@ class BaseMaker: public TreeUpdater {
inline void SetDefaultPostion(DMatrix *p_fmat, inline void SetDefaultPostion(DMatrix *p_fmat,
const RegTree &tree) { const RegTree &tree) {
// set rest of instances to default position // set rest of instances to default position
const RowSet &rowset = p_fmat->buffered_rowset(); const RowSet &rowset = p_fmat->BufferedRowset();
// set default direct nodes to default // set default direct nodes to default
// for leaf nodes that are not fresh, mark then to ~nid, // for leaf nodes that are not fresh, mark then to ~nid,
// so that they are ignored in future statistics collection // so that they are ignored in future statistics collection
const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size()); const auto ndata = static_cast<bst_omp_uint>(rowset.Size());
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < ndata; ++i) { for (bst_omp_uint i = 0; i < ndata; ++i) {
const bst_uint ridx = rowset[i]; const bst_uint ridx = rowset[i];
const int nid = this->DecodePosition(ridx); const int nid = this->DecodePosition(ridx);
if (tree[nid].is_leaf()) { if (tree[nid].IsLeaf()) {
// mark finish when it is not a fresh leaf // mark finish when it is not a fresh leaf
if (tree[nid].cright() == -1) { if (tree[nid].RightChild() == -1) {
position[ridx] = ~nid; position_[ridx] = ~nid;
} }
} else { } else {
// push to default branch // push to default branch
if (tree[nid].default_left()) { if (tree[nid].DefaultLeft()) {
this->SetEncodePosition(ridx, tree[nid].cleft()); this->SetEncodePosition(ridx, tree[nid].LeftChild());
} else { } else {
this->SetEncodePosition(ridx, tree[nid].cright()); this->SetEncodePosition(ridx, tree[nid].RightChild());
} }
} }
} }
@ -254,21 +253,21 @@ class BaseMaker: public TreeUpdater {
auto it = std::lower_bound(sorted_split_set.begin(), sorted_split_set.end(), fid); auto it = std::lower_bound(sorted_split_set.begin(), sorted_split_set.end(), fid);
if (it != sorted_split_set.end() && *it == fid) { if (it != sorted_split_set.end() && *it == fid) {
const bst_omp_uint ndata = static_cast<bst_omp_uint>(col.length); const auto ndata = static_cast<bst_omp_uint>(col.length);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < ndata; ++j) { for (bst_omp_uint j = 0; j < ndata; ++j) {
const bst_uint ridx = col[j].index; const bst_uint ridx = col[j].index;
const bst_float fvalue = col[j].fvalue; const bst_float fvalue = col[j].fvalue;
const int nid = this->DecodePosition(ridx); const int nid = this->DecodePosition(ridx);
CHECK(tree[nid].is_leaf()); CHECK(tree[nid].IsLeaf());
int pid = tree[nid].parent(); int pid = tree[nid].Parent();
// go back to parent, correct those who are not default // go back to parent, correct those who are not default
if (!tree[nid].is_root() && tree[pid].split_index() == fid) { if (!tree[nid].IsRoot() && tree[pid].SplitIndex() == fid) {
if (fvalue < tree[pid].split_cond()) { if (fvalue < tree[pid].SplitCond()) {
this->SetEncodePosition(ridx, tree[pid].cleft()); this->SetEncodePosition(ridx, tree[pid].LeftChild());
} else { } else {
this->SetEncodePosition(ridx, tree[pid].cright()); this->SetEncodePosition(ridx, tree[pid].RightChild());
} }
} }
} }
@ -287,10 +286,9 @@ class BaseMaker: public TreeUpdater {
std::vector<unsigned>& fsplits = *out_split_set; std::vector<unsigned>& fsplits = *out_split_set;
fsplits.clear(); fsplits.clear();
// step 1, classify the non-default data into right places // step 1, classify the non-default data into right places
for (size_t i = 0; i < nodes.size(); ++i) { for (int nid : nodes) {
const int nid = nodes[i]; if (!tree[nid].IsLeaf()) {
if (!tree[nid].is_leaf()) { fsplits.push_back(tree[nid].SplitIndex());
fsplits.push_back(tree[nid].split_index());
} }
} }
std::sort(fsplits.begin(), fsplits.end()); std::sort(fsplits.begin(), fsplits.end());
@ -314,18 +312,18 @@ class BaseMaker: public TreeUpdater {
for (size_t i = 0; i < batch.size; ++i) { for (size_t i = 0; i < batch.size; ++i) {
ColBatch::Inst col = batch[i]; ColBatch::Inst col = batch[i];
const bst_uint fid = batch.col_index[i]; const bst_uint fid = batch.col_index[i];
const bst_omp_uint ndata = static_cast<bst_omp_uint>(col.length); const auto ndata = static_cast<bst_omp_uint>(col.length);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < ndata; ++j) { for (bst_omp_uint j = 0; j < ndata; ++j) {
const bst_uint ridx = col[j].index; const bst_uint ridx = col[j].index;
const bst_float fvalue = col[j].fvalue; const bst_float fvalue = col[j].fvalue;
const int nid = this->DecodePosition(ridx); const int nid = this->DecodePosition(ridx);
// go back to parent, correct those who are not default // go back to parent, correct those who are not default
if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) { if (!tree[nid].IsLeaf() && tree[nid].SplitIndex() == fid) {
if (fvalue < tree[nid].split_cond()) { if (fvalue < tree[nid].SplitCond()) {
this->SetEncodePosition(ridx, tree[nid].cleft()); this->SetEncodePosition(ridx, tree[nid].LeftChild());
} else { } else {
this->SetEncodePosition(ridx, tree[nid].cright()); this->SetEncodePosition(ridx, tree[nid].RightChild());
} }
} }
} }
@ -334,39 +332,37 @@ class BaseMaker: public TreeUpdater {
} }
/*! \brief helper function to get statistics from a tree */ /*! \brief helper function to get statistics from a tree */
template<typename TStats> template<typename TStats>
inline void GetNodeStats(const std::vector<bst_gpair> &gpair, inline void GetNodeStats(const std::vector<GradientPair> &gpair,
const DMatrix &fmat, const DMatrix &fmat,
const RegTree &tree, const RegTree &tree,
std::vector< std::vector<TStats> > *p_thread_temp, std::vector< std::vector<TStats> > *p_thread_temp,
std::vector<TStats> *p_node_stats) { std::vector<TStats> *p_node_stats) {
std::vector< std::vector<TStats> > &thread_temp = *p_thread_temp; std::vector< std::vector<TStats> > &thread_temp = *p_thread_temp;
const MetaInfo &info = fmat.info(); const MetaInfo &info = fmat.Info();
thread_temp.resize(omp_get_max_threads()); thread_temp.resize(omp_get_max_threads());
p_node_stats->resize(tree.param.num_nodes); p_node_stats->resize(tree.param.num_nodes);
#pragma omp parallel #pragma omp parallel
{ {
const int tid = omp_get_thread_num(); const int tid = omp_get_thread_num();
thread_temp[tid].resize(tree.param.num_nodes, TStats(param)); thread_temp[tid].resize(tree.param.num_nodes, TStats(param_));
for (size_t i = 0; i < qexpand.size(); ++i) { for (unsigned int nid : qexpand_) {
const unsigned nid = qexpand[i];
thread_temp[tid][nid].Clear(); thread_temp[tid][nid].Clear();
} }
} }
const RowSet &rowset = fmat.buffered_rowset(); const RowSet &rowset = fmat.BufferedRowset();
// setup position // setup position
const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size()); const auto ndata = static_cast<bst_omp_uint>(rowset.Size());
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < ndata; ++i) { for (bst_omp_uint i = 0; i < ndata; ++i) {
const bst_uint ridx = rowset[i]; const bst_uint ridx = rowset[i];
const int nid = position[ridx]; const int nid = position_[ridx];
const int tid = omp_get_thread_num(); const int tid = omp_get_thread_num();
if (nid >= 0) { if (nid >= 0) {
thread_temp[tid][nid].Add(gpair, info, ridx); thread_temp[tid][nid].Add(gpair, info, ridx);
} }
} }
// sum the per thread statistics together // sum the per thread statistics together
for (size_t j = 0; j < qexpand.size(); ++j) { for (int nid : qexpand_) {
const int nid = qexpand[j];
TStats &s = (*p_node_stats)[nid]; TStats &s = (*p_node_stats)[nid];
s.Clear(); s.Clear();
for (size_t tid = 0; tid < thread_temp.size(); ++tid) { for (size_t tid = 0; tid < thread_temp.size(); ++tid) {
@ -461,28 +457,28 @@ class BaseMaker: public TreeUpdater {
} }
}; };
/*! \brief training parameter of tree grower */ /*! \brief training parameter of tree grower */
TrainParam param; TrainParam param_;
/*! \brief queue of nodes to be expanded */ /*! \brief queue of nodes to be expanded */
std::vector<int> qexpand; std::vector<int> qexpand_;
/*! /*!
* \brief map active node to is working index offset in qexpand, * \brief map active node to is working index offset in qexpand,
* can be -1, which means the node is node actively expanding * can be -1, which means the node is node actively expanding
*/ */
std::vector<int> node2workindex; std::vector<int> node2workindex_;
/*! /*!
* \brief position of each instance in the tree * \brief position of each instance in the tree
* can be negative, which means this position is no longer expanding * can be negative, which means this position is no longer expanding
* see also Decode/EncodePosition * see also Decode/EncodePosition
*/ */
std::vector<int> position; std::vector<int> position_;
private: private:
inline void UpdateNode2WorkIndex(const RegTree &tree) { inline void UpdateNode2WorkIndex(const RegTree &tree) {
// update the node2workindex // update the node2workindex
std::fill(node2workindex.begin(), node2workindex.end(), -1); std::fill(node2workindex_.begin(), node2workindex_.end(), -1);
node2workindex.resize(tree.param.num_nodes); node2workindex_.resize(tree.param.num_nodes);
for (size_t i = 0; i < qexpand.size(); ++i) { for (size_t i = 0; i < qexpand_.size(); ++i) {
node2workindex[qexpand[i]] = static_cast<int>(i); node2workindex_[qexpand_[i]] = static_cast<int>(i);
} }
} }
}; };

File diff suppressed because it is too large Load Diff

View File

@ -7,6 +7,7 @@
#include <dmlc/timer.h> #include <dmlc/timer.h>
#include <xgboost/tree_updater.h> #include <xgboost/tree_updater.h>
#include <cmath> #include <cmath>
#include <memory>
#include <vector> #include <vector>
#include <algorithm> #include <algorithm>
#include <queue> #include <queue>
@ -50,47 +51,47 @@ class FastHistMaker: public TreeUpdater {
pruner_.reset(TreeUpdater::Create("prune")); pruner_.reset(TreeUpdater::Create("prune"));
} }
pruner_->Init(args); pruner_->Init(args);
param.InitAllowUnknown(args); param_.InitAllowUnknown(args);
fhparam.InitAllowUnknown(args); fhparam_.InitAllowUnknown(args);
is_gmat_initialized_ = false; is_gmat_initialized_ = false;
} }
void Update(HostDeviceVector<bst_gpair>* gpair, void Update(HostDeviceVector<GradientPair>* gpair,
DMatrix* dmat, DMatrix* dmat,
const std::vector<RegTree*>& trees) override { const std::vector<RegTree*>& trees) override {
TStats::CheckInfo(dmat->info()); TStats::CheckInfo(dmat->Info());
if (is_gmat_initialized_ == false) { if (is_gmat_initialized_ == false) {
double tstart = dmlc::GetTime(); double tstart = dmlc::GetTime();
hmat_.Init(dmat, static_cast<uint32_t>(param.max_bin)); hmat_.Init(dmat, static_cast<uint32_t>(param_.max_bin));
gmat_.cut = &hmat_; gmat_.cut = &hmat_;
gmat_.Init(dmat); gmat_.Init(dmat);
column_matrix_.Init(gmat_, fhparam); column_matrix_.Init(gmat_, fhparam_);
if (fhparam.enable_feature_grouping > 0) { if (fhparam_.enable_feature_grouping > 0) {
gmatb_.Init(gmat_, column_matrix_, fhparam); gmatb_.Init(gmat_, column_matrix_, fhparam_);
} }
is_gmat_initialized_ = true; is_gmat_initialized_ = true;
if (param.debug_verbose > 0) { if (param_.debug_verbose > 0) {
LOG(INFO) << "Generating gmat: " << dmlc::GetTime() - tstart << " sec"; LOG(INFO) << "Generating gmat: " << dmlc::GetTime() - tstart << " sec";
} }
} }
// rescale learning rate according to size of trees // rescale learning rate according to size of trees
float lr = param.learning_rate; float lr = param_.learning_rate;
param.learning_rate = lr / trees.size(); param_.learning_rate = lr / trees.size();
TConstraint::Init(&param, dmat->info().num_col); TConstraint::Init(&param_, dmat->Info().num_col_);
// build tree // build tree
if (!builder_) { if (!builder_) {
builder_.reset(new Builder(param, fhparam, std::move(pruner_))); builder_.reset(new Builder(param_, fhparam_, std::move(pruner_)));
} }
for (size_t i = 0; i < trees.size(); ++i) { for (auto tree : trees) {
builder_->Update builder_->Update
(gmat_, gmatb_, column_matrix_, gpair, dmat, trees[i]); (gmat_, gmatb_, column_matrix_, gpair, dmat, tree);
} }
param.learning_rate = lr; param_.learning_rate = lr;
} }
bool UpdatePredictionCache(const DMatrix* data, bool UpdatePredictionCache(const DMatrix* data,
HostDeviceVector<bst_float>* out_preds) override { HostDeviceVector<bst_float>* out_preds) override {
if (!builder_ || param.subsample < 1.0f) { if (!builder_ || param_.subsample < 1.0f) {
return false; return false;
} else { } else {
return builder_->UpdatePredictionCache(data, out_preds); return builder_->UpdatePredictionCache(data, out_preds);
@ -99,8 +100,8 @@ class FastHistMaker: public TreeUpdater {
protected: protected:
// training parameter // training parameter
TrainParam param; TrainParam param_;
FastHistParam fhparam; FastHistParam fhparam_;
// data sketch // data sketch
HistCutMatrix hmat_; HistCutMatrix hmat_;
// quantized data matrix // quantized data matrix
@ -134,13 +135,13 @@ class FastHistMaker: public TreeUpdater {
explicit Builder(const TrainParam& param, explicit Builder(const TrainParam& param,
const FastHistParam& fhparam, const FastHistParam& fhparam,
std::unique_ptr<TreeUpdater> pruner) std::unique_ptr<TreeUpdater> pruner)
: param(param), fhparam(fhparam), pruner_(std::move(pruner)), : param_(param), fhparam_(fhparam), pruner_(std::move(pruner)),
p_last_tree_(nullptr), p_last_fmat_(nullptr) {} p_last_tree_(nullptr), p_last_fmat_(nullptr) {}
// update one tree, growing // update one tree, growing
virtual void Update(const GHistIndexMatrix& gmat, virtual void Update(const GHistIndexMatrix& gmat,
const GHistIndexBlockMatrix& gmatb, const GHistIndexBlockMatrix& gmatb,
const ColumnMatrix& column_matrix, const ColumnMatrix& column_matrix,
HostDeviceVector<bst_gpair>* gpair, HostDeviceVector<GradientPair>* gpair,
DMatrix* p_fmat, DMatrix* p_fmat,
RegTree* p_tree) { RegTree* p_tree) {
double gstart = dmlc::GetTime(); double gstart = dmlc::GetTime();
@ -155,11 +156,11 @@ class FastHistMaker: public TreeUpdater {
double time_evaluate_split = 0; double time_evaluate_split = 0;
double time_apply_split = 0; double time_apply_split = 0;
std::vector<bst_gpair>& gpair_h = gpair->data_h(); std::vector<GradientPair>& gpair_h = gpair->HostVector();
tstart = dmlc::GetTime(); tstart = dmlc::GetTime();
this->InitData(gmat, gpair_h, *p_fmat, *p_tree); this->InitData(gmat, gpair_h, *p_fmat, *p_tree);
std::vector<bst_uint> feat_set = feat_index; std::vector<bst_uint> feat_set = feat_index_;
time_init_data = dmlc::GetTime() - tstart; time_init_data = dmlc::GetTime() - tstart;
// FIXME(hcho3): this code is broken when param.num_roots > 1. Please fix it // FIXME(hcho3): this code is broken when param.num_roots > 1. Please fix it
@ -179,7 +180,7 @@ class FastHistMaker: public TreeUpdater {
this->EvaluateSplit(nid, gmat, hist_, *p_fmat, *p_tree, feat_set); this->EvaluateSplit(nid, gmat, hist_, *p_fmat, *p_tree, feat_set);
time_evaluate_split += dmlc::GetTime() - tstart; time_evaluate_split += dmlc::GetTime() - tstart;
qexpand_->push(ExpandEntry(nid, p_tree->GetDepth(nid), qexpand_->push(ExpandEntry(nid, p_tree->GetDepth(nid),
snode[nid].best.loss_chg, snode_[nid].best.loss_chg,
timestamp++)); timestamp++));
++num_leaves; ++num_leaves;
} }
@ -188,21 +189,21 @@ class FastHistMaker: public TreeUpdater {
const ExpandEntry candidate = qexpand_->top(); const ExpandEntry candidate = qexpand_->top();
const int nid = candidate.nid; const int nid = candidate.nid;
qexpand_->pop(); qexpand_->pop();
if (candidate.loss_chg <= rt_eps if (candidate.loss_chg <= kRtEps
|| (param.max_depth > 0 && candidate.depth == param.max_depth) || (param_.max_depth > 0 && candidate.depth == param_.max_depth)
|| (param.max_leaves > 0 && num_leaves == param.max_leaves) ) { || (param_.max_leaves > 0 && num_leaves == param_.max_leaves) ) {
(*p_tree)[nid].set_leaf(snode[nid].weight * param.learning_rate); (*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate);
} else { } else {
tstart = dmlc::GetTime(); tstart = dmlc::GetTime();
this->ApplySplit(nid, gmat, column_matrix, hist_, *p_fmat, p_tree); this->ApplySplit(nid, gmat, column_matrix, hist_, *p_fmat, p_tree);
time_apply_split += dmlc::GetTime() - tstart; time_apply_split += dmlc::GetTime() - tstart;
tstart = dmlc::GetTime(); tstart = dmlc::GetTime();
const int cleft = (*p_tree)[nid].cleft(); const int cleft = (*p_tree)[nid].LeftChild();
const int cright = (*p_tree)[nid].cright(); const int cright = (*p_tree)[nid].RightChild();
hist_.AddHistRow(cleft); hist_.AddHistRow(cleft);
hist_.AddHistRow(cright); hist_.AddHistRow(cright);
if (row_set_collection_[cleft].size() < row_set_collection_[cright].size()) { if (row_set_collection_[cleft].Size() < row_set_collection_[cright].Size()) {
BuildHist(gpair_h, row_set_collection_[cleft], gmat, gmatb, feat_set, hist_[cleft]); BuildHist(gpair_h, row_set_collection_[cleft], gmat, gmatb, feat_set, hist_[cleft]);
SubtractionTrick(hist_[cright], hist_[cleft], hist_[nid]); SubtractionTrick(hist_[cright], hist_[cleft], hist_[nid]);
} else { } else {
@ -222,10 +223,10 @@ class FastHistMaker: public TreeUpdater {
time_evaluate_split += dmlc::GetTime() - tstart; time_evaluate_split += dmlc::GetTime() - tstart;
qexpand_->push(ExpandEntry(cleft, p_tree->GetDepth(cleft), qexpand_->push(ExpandEntry(cleft, p_tree->GetDepth(cleft),
snode[cleft].best.loss_chg, snode_[cleft].best.loss_chg,
timestamp++)); timestamp++));
qexpand_->push(ExpandEntry(cright, p_tree->GetDepth(cright), qexpand_->push(ExpandEntry(cright, p_tree->GetDepth(cright),
snode[cright].best.loss_chg, snode_[cright].best.loss_chg,
timestamp++)); timestamp++));
++num_leaves; // give two and take one, as parent is no longer a leaf ++num_leaves; // give two and take one, as parent is no longer a leaf
@ -238,19 +239,19 @@ class FastHistMaker: public TreeUpdater {
while (!qexpand_->empty()) { while (!qexpand_->empty()) {
const int nid = qexpand_->top().nid; const int nid = qexpand_->top().nid;
qexpand_->pop(); qexpand_->pop();
(*p_tree)[nid].set_leaf(snode[nid].weight * param.learning_rate); (*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate);
} }
// remember auxiliary statistics in the tree node // remember auxiliary statistics in the tree node
for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) { for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) {
p_tree->stat(nid).loss_chg = snode[nid].best.loss_chg; p_tree->Stat(nid).loss_chg = snode_[nid].best.loss_chg;
p_tree->stat(nid).base_weight = snode[nid].weight; p_tree->Stat(nid).base_weight = snode_[nid].weight;
p_tree->stat(nid).sum_hess = static_cast<float>(snode[nid].stats.sum_hess); p_tree->Stat(nid).sum_hess = static_cast<float>(snode_[nid].stats.sum_hess);
snode[nid].stats.SetLeafVec(param, p_tree->leafvec(nid)); snode_[nid].stats.SetLeafVec(param_, p_tree->Leafvec(nid));
} }
pruner_->Update(gpair, p_fmat, std::vector<RegTree*>{p_tree}); pruner_->Update(gpair, p_fmat, std::vector<RegTree*>{p_tree});
if (param.debug_verbose > 0) { if (param_.debug_verbose > 0) {
double total_time = dmlc::GetTime() - gstart; double total_time = dmlc::GetTime() - gstart;
LOG(INFO) << "\nInitData: " LOG(INFO) << "\nInitData: "
<< std::fixed << std::setw(6) << std::setprecision(4) << time_init_data << std::fixed << std::setw(6) << std::setprecision(4) << time_init_data
@ -278,13 +279,13 @@ class FastHistMaker: public TreeUpdater {
} }
} }
inline void BuildHist(const std::vector<bst_gpair>& gpair, inline void BuildHist(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices, const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat, const GHistIndexMatrix& gmat,
const GHistIndexBlockMatrix& gmatb, const GHistIndexBlockMatrix& gmatb,
const std::vector<bst_uint>& feat_set, const std::vector<bst_uint>& feat_set,
GHistRow hist) { GHistRow hist) {
if (fhparam.enable_feature_grouping > 0) { if (fhparam_.enable_feature_grouping > 0) {
hist_builder_.BuildBlockHist(gpair, row_indices, gmatb, feat_set, hist); hist_builder_.BuildBlockHist(gpair, row_indices, gmatb, feat_set, hist);
} else { } else {
hist_builder_.BuildHist(gpair, row_indices, gmat, feat_set, hist); hist_builder_.BuildHist(gpair, row_indices, gmat, feat_set, hist);
@ -297,7 +298,7 @@ class FastHistMaker: public TreeUpdater {
inline bool UpdatePredictionCache(const DMatrix* data, inline bool UpdatePredictionCache(const DMatrix* data,
HostDeviceVector<bst_float>* p_out_preds) { HostDeviceVector<bst_float>* p_out_preds) {
std::vector<bst_float>& out_preds = p_out_preds->data_h(); std::vector<bst_float>& out_preds = p_out_preds->HostVector();
// p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in // p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in
// conjunction with Update(). // conjunction with Update().
@ -318,13 +319,13 @@ class FastHistMaker: public TreeUpdater {
bst_float leaf_value; bst_float leaf_value;
// if a node is marked as deleted by the pruner, traverse upward to locate // if a node is marked as deleted by the pruner, traverse upward to locate
// a non-deleted leaf. // a non-deleted leaf.
if ((*p_last_tree_)[nid].is_deleted()) { if ((*p_last_tree_)[nid].IsDeleted()) {
while ((*p_last_tree_)[nid].is_deleted()) { while ((*p_last_tree_)[nid].IsDeleted()) {
nid = (*p_last_tree_)[nid].parent(); nid = (*p_last_tree_)[nid].Parent();
} }
CHECK((*p_last_tree_)[nid].is_leaf()); CHECK((*p_last_tree_)[nid].IsLeaf());
} }
leaf_value = (*p_last_tree_)[nid].leaf_value(); leaf_value = (*p_last_tree_)[nid].LeafValue();
for (const size_t* it = rowset.begin; it < rowset.end; ++it) { for (const size_t* it = rowset.begin; it < rowset.end; ++it) {
out_preds[*it] += leaf_value; out_preds[*it] += leaf_value;
@ -338,19 +339,19 @@ class FastHistMaker: public TreeUpdater {
protected: protected:
// initialize temp data structure // initialize temp data structure
inline void InitData(const GHistIndexMatrix& gmat, inline void InitData(const GHistIndexMatrix& gmat,
const std::vector<bst_gpair>& gpair, const std::vector<GradientPair>& gpair,
const DMatrix& fmat, const DMatrix& fmat,
const RegTree& tree) { const RegTree& tree) {
CHECK_EQ(tree.param.num_nodes, tree.param.num_roots) CHECK_EQ(tree.param.num_nodes, tree.param.num_roots)
<< "ColMakerHist: can only grow new tree"; << "ColMakerHist: can only grow new tree";
CHECK((param.max_depth > 0 || param.max_leaves > 0)) CHECK((param_.max_depth > 0 || param_.max_leaves > 0))
<< "max_depth or max_leaves cannot be both 0 (unlimited); " << "max_depth or max_leaves cannot be both 0 (unlimited); "
<< "at least one should be a positive quantity."; << "at least one should be a positive quantity.";
if (param.grow_policy == TrainParam::kDepthWise) { if (param_.grow_policy == TrainParam::kDepthWise) {
CHECK(param.max_depth > 0) << "max_depth cannot be 0 (unlimited) " CHECK(param_.max_depth > 0) << "max_depth cannot be 0 (unlimited) "
<< "when grow_policy is depthwise."; << "when grow_policy is depthwise.";
} }
const auto& info = fmat.info(); const auto& info = fmat.Info();
{ {
// initialize the row set // initialize the row set
@ -364,23 +365,23 @@ class FastHistMaker: public TreeUpdater {
// initialize histogram builder // initialize histogram builder
#pragma omp parallel #pragma omp parallel
{ {
this->nthread = omp_get_num_threads(); this->nthread_ = omp_get_num_threads();
} }
hist_builder_.Init(this->nthread, nbins); hist_builder_.Init(this->nthread_, nbins);
CHECK_EQ(info.root_index.size(), 0U); CHECK_EQ(info.root_index_.size(), 0U);
std::vector<size_t>& row_indices = row_set_collection_.row_indices_; std::vector<size_t>& row_indices = row_set_collection_.row_indices_;
// mark subsample and build list of member rows // mark subsample and build list of member rows
if (param.subsample < 1.0f) { if (param_.subsample < 1.0f) {
std::bernoulli_distribution coin_flip(param.subsample); std::bernoulli_distribution coin_flip(param_.subsample);
auto& rnd = common::GlobalRandom(); auto& rnd = common::GlobalRandom();
for (size_t i = 0; i < info.num_row; ++i) { for (size_t i = 0; i < info.num_row_; ++i) {
if (gpair[i].GetHess() >= 0.0f && coin_flip(rnd)) { if (gpair[i].GetHess() >= 0.0f && coin_flip(rnd)) {
row_indices.push_back(i); row_indices.push_back(i);
} }
} }
} else { } else {
for (size_t i = 0; i < info.num_row; ++i) { for (size_t i = 0; i < info.num_row_; ++i) {
if (gpair[i].GetHess() >= 0.0f) { if (gpair[i].GetHess() >= 0.0f) {
row_indices.push_back(i); row_indices.push_back(i);
} }
@ -391,9 +392,9 @@ class FastHistMaker: public TreeUpdater {
{ {
/* determine layout of data */ /* determine layout of data */
const size_t nrow = info.num_row; const size_t nrow = info.num_row_;
const size_t ncol = info.num_col; const size_t ncol = info.num_col_;
const size_t nnz = info.num_nonzero; const size_t nnz = info.num_nonzero_;
// number of discrete bins for feature 0 // number of discrete bins for feature 0
const uint32_t nbins_f0 = gmat.cut->row_ptr[1] - gmat.cut->row_ptr[0]; const uint32_t nbins_f0 = gmat.cut->row_ptr[1] - gmat.cut->row_ptr[0];
if (nrow * ncol == nnz) { if (nrow * ncol == nnz) {
@ -413,23 +414,23 @@ class FastHistMaker: public TreeUpdater {
// store a pointer to training data // store a pointer to training data
p_last_fmat_ = &fmat; p_last_fmat_ = &fmat;
// initialize feature index // initialize feature index
bst_uint ncol = static_cast<bst_uint>(info.num_col); auto ncol = static_cast<bst_uint>(info.num_col_);
feat_index.clear(); feat_index_.clear();
if (data_layout_ == kDenseDataOneBased) { if (data_layout_ == kDenseDataOneBased) {
for (bst_uint i = 1; i < ncol; ++i) { for (bst_uint i = 1; i < ncol; ++i) {
feat_index.push_back(i); feat_index_.push_back(i);
} }
} else { } else {
for (bst_uint i = 0; i < ncol; ++i) { for (bst_uint i = 0; i < ncol; ++i) {
feat_index.push_back(i); feat_index_.push_back(i);
} }
} }
bst_uint n = std::max(static_cast<bst_uint>(1), bst_uint n = std::max(static_cast<bst_uint>(1),
static_cast<bst_uint>(param.colsample_bytree * feat_index.size())); static_cast<bst_uint>(param_.colsample_bytree * feat_index_.size()));
std::shuffle(feat_index.begin(), feat_index.end(), common::GlobalRandom()); std::shuffle(feat_index_.begin(), feat_index_.end(), common::GlobalRandom());
CHECK_GT(param.colsample_bytree, 0U) CHECK_GT(param_.colsample_bytree, 0U)
<< "colsample_bytree cannot be zero."; << "colsample_bytree cannot be zero.";
feat_index.resize(n); feat_index_.resize(n);
} }
if (data_layout_ == kDenseDataZeroBased || data_layout_ == kDenseDataOneBased) { if (data_layout_ == kDenseDataZeroBased || data_layout_ == kDenseDataOneBased) {
/* specialized code for dense data: /* specialized code for dense data:
@ -437,7 +438,7 @@ class FastHistMaker: public TreeUpdater {
For dense data (with no missing value), For dense data (with no missing value),
the sum of gradient histogram is equal to snode[nid] */ the sum of gradient histogram is equal to snode[nid] */
const std::vector<uint32_t>& row_ptr = gmat.cut->row_ptr; const std::vector<uint32_t>& row_ptr = gmat.cut->row_ptr;
const bst_uint nfeature = static_cast<bst_uint>(row_ptr.size() - 1); const auto nfeature = static_cast<bst_uint>(row_ptr.size() - 1);
uint32_t min_nbins_per_feature = 0; uint32_t min_nbins_per_feature = 0;
for (bst_uint i = 0; i < nfeature; ++i) { for (bst_uint i = 0; i < nfeature; ++i) {
const uint32_t nbins = row_ptr[i + 1] - row_ptr[i]; const uint32_t nbins = row_ptr[i + 1] - row_ptr[i];
@ -451,14 +452,14 @@ class FastHistMaker: public TreeUpdater {
CHECK_GT(min_nbins_per_feature, 0U); CHECK_GT(min_nbins_per_feature, 0U);
} }
{ {
snode.reserve(256); snode_.reserve(256);
snode.clear(); snode_.clear();
} }
{ {
if (param.grow_policy == TrainParam::kLossGuide) { if (param_.grow_policy == TrainParam::kLossGuide) {
qexpand_.reset(new ExpandQueue(loss_guide)); qexpand_.reset(new ExpandQueue(LossGuide));
} else { } else {
qexpand_.reset(new ExpandQueue(depth_wise)); qexpand_.reset(new ExpandQueue(DepthWise));
} }
} }
} }
@ -470,25 +471,25 @@ class FastHistMaker: public TreeUpdater {
const RegTree& tree, const RegTree& tree,
const std::vector<bst_uint>& feat_set) { const std::vector<bst_uint>& feat_set) {
// start enumeration // start enumeration
const MetaInfo& info = fmat.info(); const MetaInfo& info = fmat.Info();
const bst_uint nfeature = static_cast<bst_uint>(feat_set.size()); const auto nfeature = static_cast<bst_uint>(feat_set.size());
const bst_omp_uint nthread = static_cast<bst_omp_uint>(this->nthread); const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
best_split_tloc_.resize(nthread); best_split_tloc_.resize(nthread);
#pragma omp parallel for schedule(static) num_threads(nthread) #pragma omp parallel for schedule(static) num_threads(nthread)
for (bst_omp_uint tid = 0; tid < nthread; ++tid) { for (bst_omp_uint tid = 0; tid < nthread; ++tid) {
best_split_tloc_[tid] = snode[nid].best; best_split_tloc_[tid] = snode_[nid].best;
} }
#pragma omp parallel for schedule(dynamic) num_threads(nthread) #pragma omp parallel for schedule(dynamic) num_threads(nthread)
for (bst_omp_uint i = 0; i < nfeature; ++i) { for (bst_omp_uint i = 0; i < nfeature; ++i) {
const bst_uint fid = feat_set[i]; const bst_uint fid = feat_set[i];
const unsigned tid = omp_get_thread_num(); const unsigned tid = omp_get_thread_num();
this->EnumerateSplit(-1, gmat, hist[nid], snode[nid], constraints_[nid], info, this->EnumerateSplit(-1, gmat, hist[nid], snode_[nid], constraints_[nid], info,
&best_split_tloc_[tid], fid); &best_split_tloc_[tid], fid);
this->EnumerateSplit(+1, gmat, hist[nid], snode[nid], constraints_[nid], info, this->EnumerateSplit(+1, gmat, hist[nid], snode_[nid], constraints_[nid], info,
&best_split_tloc_[tid], fid); &best_split_tloc_[tid], fid);
} }
for (unsigned tid = 0; tid < nthread; ++tid) { for (unsigned tid = 0; tid < nthread; ++tid) {
snode[nid].best.Update(best_split_tloc_[tid]); snode_[nid].best.Update(best_split_tloc_[tid]);
} }
} }
@ -499,12 +500,13 @@ class FastHistMaker: public TreeUpdater {
const DMatrix& fmat, const DMatrix& fmat,
RegTree* p_tree) { RegTree* p_tree) {
XGBOOST_TYPE_SWITCH(column_matrix.dtype, { XGBOOST_TYPE_SWITCH(column_matrix.dtype, {
ApplySplit_<DType>(nid, gmat, column_matrix, hist, fmat, p_tree); ApplySplitSpecialize<DType>(nid, gmat, column_matrix, hist, fmat,
p_tree);
}); });
} }
template <typename T> template <typename T>
inline void ApplySplit_(int nid, inline void ApplySplitSpecialize(int nid,
const GHistIndexMatrix& gmat, const GHistIndexMatrix& gmat,
const ColumnMatrix& column_matrix, const ColumnMatrix& column_matrix,
const HistCollection& hist, const HistCollection& hist,
@ -513,26 +515,26 @@ class FastHistMaker: public TreeUpdater {
// TODO(hcho3): support feature sampling by levels // TODO(hcho3): support feature sampling by levels
/* 1. Create child nodes */ /* 1. Create child nodes */
NodeEntry& e = snode[nid]; NodeEntry& e = snode_[nid];
p_tree->AddChilds(nid); p_tree->AddChilds(nid);
(*p_tree)[nid].set_split(e.best.split_index(), e.best.split_value, e.best.default_left()); (*p_tree)[nid].SetSplit(e.best.SplitIndex(), e.best.split_value, e.best.DefaultLeft());
// mark right child as 0, to indicate fresh leaf // mark right child as 0, to indicate fresh leaf
int cleft = (*p_tree)[nid].cleft(); int cleft = (*p_tree)[nid].LeftChild();
int cright = (*p_tree)[nid].cright(); int cright = (*p_tree)[nid].RightChild();
(*p_tree)[cleft].set_leaf(0.0f, 0); (*p_tree)[cleft].SetLeaf(0.0f, 0);
(*p_tree)[cright].set_leaf(0.0f, 0); (*p_tree)[cright].SetLeaf(0.0f, 0);
/* 2. Categorize member rows */ /* 2. Categorize member rows */
const bst_omp_uint nthread = static_cast<bst_omp_uint>(this->nthread); const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
row_split_tloc_.resize(nthread); row_split_tloc_.resize(nthread);
for (bst_omp_uint i = 0; i < nthread; ++i) { for (bst_omp_uint i = 0; i < nthread; ++i) {
row_split_tloc_[i].left.clear(); row_split_tloc_[i].left.clear();
row_split_tloc_[i].right.clear(); row_split_tloc_[i].right.clear();
} }
const bool default_left = (*p_tree)[nid].default_left(); const bool default_left = (*p_tree)[nid].DefaultLeft();
const bst_uint fid = (*p_tree)[nid].split_index(); const bst_uint fid = (*p_tree)[nid].SplitIndex();
const bst_float split_pt = (*p_tree)[nid].split_cond(); const bst_float split_pt = (*p_tree)[nid].SplitCond();
const uint32_t lower_bound = gmat.cut->row_ptr[fid]; const uint32_t lower_bound = gmat.cut->row_ptr[fid];
const uint32_t upper_bound = gmat.cut->row_ptr[fid + 1]; const uint32_t upper_bound = gmat.cut->row_ptr[fid + 1];
int32_t split_cond = -1; int32_t split_cond = -1;
@ -558,7 +560,7 @@ class FastHistMaker: public TreeUpdater {
} }
row_set_collection_.AddSplit( row_set_collection_.AddSplit(
nid, row_split_tloc_, (*p_tree)[nid].cleft(), (*p_tree)[nid].cright()); nid, row_split_tloc_, (*p_tree)[nid].LeftChild(), (*p_tree)[nid].RightChild());
} }
template<typename T> template<typename T>
@ -569,24 +571,24 @@ class FastHistMaker: public TreeUpdater {
bst_int split_cond, bst_int split_cond,
bool default_left) { bool default_left) {
std::vector<RowSetCollection::Split>& row_split_tloc = *p_row_split_tloc; std::vector<RowSetCollection::Split>& row_split_tloc = *p_row_split_tloc;
const int K = 8; // loop unrolling factor constexpr int kUnroll = 8; // loop unrolling factor
const size_t nrows = rowset.end - rowset.begin; const size_t nrows = rowset.end - rowset.begin;
const size_t rest = nrows % K; const size_t rest = nrows % kUnroll;
#pragma omp parallel for num_threads(nthread) schedule(static) #pragma omp parallel for num_threads(nthread_) schedule(static)
for (bst_omp_uint i = 0; i < nrows - rest; i += K) { for (bst_omp_uint i = 0; i < nrows - rest; i += kUnroll) {
const bst_uint tid = omp_get_thread_num(); const bst_uint tid = omp_get_thread_num();
auto& left = row_split_tloc[tid].left; auto& left = row_split_tloc[tid].left;
auto& right = row_split_tloc[tid].right; auto& right = row_split_tloc[tid].right;
size_t rid[K]; size_t rid[kUnroll];
T rbin[K]; T rbin[kUnroll];
for (int k = 0; k < K; ++k) { for (int k = 0; k < kUnroll; ++k) {
rid[k] = rowset.begin[i + k]; rid[k] = rowset.begin[i + k];
} }
for (int k = 0; k < K; ++k) { for (int k = 0; k < kUnroll; ++k) {
rbin[k] = column.index[rid[k]]; rbin[k] = column.index[rid[k]];
} }
for (int k = 0; k < K; ++k) { for (int k = 0; k < kUnroll; ++k) { // NOLINT
if (rbin[k] == std::numeric_limits<T>::max()) { // missing value if (rbin[k] == std::numeric_limits<T>::max()) { // missing value
if (default_left) { if (default_left) {
left.push_back(rid[k]); left.push_back(rid[k]);
@ -605,8 +607,8 @@ class FastHistMaker: public TreeUpdater {
} }
} }
for (size_t i = nrows - rest; i < nrows; ++i) { for (size_t i = nrows - rest; i < nrows; ++i) {
auto& left = row_split_tloc[nthread-1].left; auto& left = row_split_tloc[nthread_-1].left;
auto& right = row_split_tloc[nthread-1].right; auto& right = row_split_tloc[nthread_-1].right;
const size_t rid = rowset.begin[i]; const size_t rid = rowset.begin[i];
const T rbin = column.index[rid]; const T rbin = column.index[rid];
if (rbin == std::numeric_limits<T>::max()) { // missing value if (rbin == std::numeric_limits<T>::max()) { // missing value
@ -635,27 +637,27 @@ class FastHistMaker: public TreeUpdater {
bst_int split_cond, bst_int split_cond,
bool default_left) { bool default_left) {
std::vector<RowSetCollection::Split>& row_split_tloc = *p_row_split_tloc; std::vector<RowSetCollection::Split>& row_split_tloc = *p_row_split_tloc;
const int K = 8; // loop unrolling factor constexpr int kUnroll = 8; // loop unrolling factor
const size_t nrows = rowset.end - rowset.begin; const size_t nrows = rowset.end - rowset.begin;
const size_t rest = nrows % K; const size_t rest = nrows % kUnroll;
#pragma omp parallel for num_threads(nthread) schedule(static) #pragma omp parallel for num_threads(nthread_) schedule(static)
for (bst_omp_uint i = 0; i < nrows - rest; i += K) { for (bst_omp_uint i = 0; i < nrows - rest; i += kUnroll) {
size_t rid[K]; size_t rid[kUnroll];
GHistIndexRow row[K]; GHistIndexRow row[kUnroll];
const uint32_t* p[K]; const uint32_t* p[kUnroll];
bst_uint tid = omp_get_thread_num(); bst_uint tid = omp_get_thread_num();
auto& left = row_split_tloc[tid].left; auto& left = row_split_tloc[tid].left;
auto& right = row_split_tloc[tid].right; auto& right = row_split_tloc[tid].right;
for (int k = 0; k < K; ++k) { for (int k = 0; k < kUnroll; ++k) {
rid[k] = rowset.begin[i + k]; rid[k] = rowset.begin[i + k];
} }
for (int k = 0; k < K; ++k) { for (int k = 0; k < kUnroll; ++k) {
row[k] = gmat[rid[k]]; row[k] = gmat[rid[k]];
} }
for (int k = 0; k < K; ++k) { for (int k = 0; k < kUnroll; ++k) {
p[k] = std::lower_bound(row[k].index, row[k].index + row[k].size, lower_bound); p[k] = std::lower_bound(row[k].index, row[k].index + row[k].size, lower_bound);
} }
for (int k = 0; k < K; ++k) { for (int k = 0; k < kUnroll; ++k) {
if (p[k] != row[k].index + row[k].size && *p[k] < upper_bound) { if (p[k] != row[k].index + row[k].size && *p[k] < upper_bound) {
CHECK_LT(*p[k], CHECK_LT(*p[k],
static_cast<uint32_t>(std::numeric_limits<int32_t>::max())); static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
@ -708,11 +710,11 @@ class FastHistMaker: public TreeUpdater {
std::vector<RowSetCollection::Split>& row_split_tloc = *p_row_split_tloc; std::vector<RowSetCollection::Split>& row_split_tloc = *p_row_split_tloc;
const size_t nrows = rowset.end - rowset.begin; const size_t nrows = rowset.end - rowset.begin;
#pragma omp parallel num_threads(nthread) #pragma omp parallel num_threads(nthread_)
{ {
const size_t tid = static_cast<size_t>(omp_get_thread_num()); const auto tid = static_cast<size_t>(omp_get_thread_num());
const size_t ibegin = tid * nrows / nthread; const size_t ibegin = tid * nrows / nthread_;
const size_t iend = (tid + 1) * nrows / nthread; const size_t iend = (tid + 1) * nrows / nthread_;
if (ibegin < iend) { // ensure that [ibegin, iend) is nonempty range if (ibegin < iend) { // ensure that [ibegin, iend) is nonempty range
// search first nonzero row with index >= rowset[ibegin] // search first nonzero row with index >= rowset[ibegin]
const size_t* p = std::lower_bound(column.row_ind, const size_t* p = std::lower_bound(column.row_ind,
@ -769,17 +771,17 @@ class FastHistMaker: public TreeUpdater {
inline void InitNewNode(int nid, inline void InitNewNode(int nid,
const GHistIndexMatrix& gmat, const GHistIndexMatrix& gmat,
const std::vector<bst_gpair>& gpair, const std::vector<GradientPair>& gpair,
const DMatrix& fmat, const DMatrix& fmat,
const RegTree& tree) { const RegTree& tree) {
{ {
snode.resize(tree.param.num_nodes, NodeEntry(param)); snode_.resize(tree.param.num_nodes, NodeEntry(param_));
constraints_.resize(tree.param.num_nodes); constraints_.resize(tree.param.num_nodes);
} }
// setup constraints before calculating the weight // setup constraints before calculating the weight
{ {
auto& stats = snode[nid].stats; auto& stats = snode_[nid].stats;
if (data_layout_ == kDenseDataZeroBased || data_layout_ == kDenseDataOneBased) { if (data_layout_ == kDenseDataZeroBased || data_layout_ == kDenseDataOneBased) {
/* specialized code for dense data /* specialized code for dense data
For dense data (with no missing value), For dense data (with no missing value),
@ -799,22 +801,22 @@ class FastHistMaker: public TreeUpdater {
stats.Add(gpair[*it]); stats.Add(gpair[*it]);
} }
} }
if (!tree[nid].is_root()) { if (!tree[nid].IsRoot()) {
const int pid = tree[nid].parent(); const int pid = tree[nid].Parent();
constraints_[pid].SetChild(param, tree[pid].split_index(), constraints_[pid].SetChild(param_, tree[pid].SplitIndex(),
snode[tree[pid].cleft()].stats, snode_[tree[pid].LeftChild()].stats,
snode[tree[pid].cright()].stats, snode_[tree[pid].RightChild()].stats,
&constraints_[tree[pid].cleft()], &constraints_[tree[pid].LeftChild()],
&constraints_[tree[pid].cright()]); &constraints_[tree[pid].RightChild()]);
} }
} }
// calculating the weights // calculating the weights
{ {
snode[nid].root_gain = static_cast<float>( snode_[nid].root_gain = static_cast<float>(
constraints_[nid].CalcGain(param, snode[nid].stats)); constraints_[nid].CalcGain(param_, snode_[nid].stats));
snode[nid].weight = static_cast<float>( snode_[nid].weight = static_cast<float>(
constraints_[nid].CalcWeight(param, snode[nid].stats)); constraints_[nid].CalcWeight(param_, snode_[nid].stats));
} }
} }
@ -834,8 +836,8 @@ class FastHistMaker: public TreeUpdater {
const std::vector<bst_float>& cut_val = gmat.cut->cut; const std::vector<bst_float>& cut_val = gmat.cut->cut;
// statistics on both sides of split // statistics on both sides of split
TStats c(param); TStats c(param_);
TStats e(param); TStats e(param_);
// best split so far // best split so far
SplitEntry best; SplitEntry best;
@ -846,7 +848,7 @@ class FastHistMaker: public TreeUpdater {
static_cast<uint32_t>(std::numeric_limits<int32_t>::max())); static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
// imin: index (offset) of the minimum value for feature fid // imin: index (offset) of the minimum value for feature fid
// need this for backward enumeration // need this for backward enumeration
const int32_t imin = static_cast<int32_t>(cut_ptr[fid]); const auto imin = static_cast<int32_t>(cut_ptr[fid]);
// ibegin, iend: smallest/largest cut points for feature fid // ibegin, iend: smallest/largest cut points for feature fid
// use int to allow for value -1 // use int to allow for value -1
int32_t ibegin, iend; int32_t ibegin, iend;
@ -862,21 +864,21 @@ class FastHistMaker: public TreeUpdater {
// start working // start working
// try to find a split // try to find a split
e.Add(hist.begin[i].sum_grad, hist.begin[i].sum_hess); e.Add(hist.begin[i].sum_grad, hist.begin[i].sum_hess);
if (e.sum_hess >= param.min_child_weight) { if (e.sum_hess >= param_.min_child_weight) {
c.SetSubstract(snode.stats, e); c.SetSubstract(snode.stats, e);
if (c.sum_hess >= param.min_child_weight) { if (c.sum_hess >= param_.min_child_weight) {
bst_float loss_chg; bst_float loss_chg;
bst_float split_pt; bst_float split_pt;
if (d_step > 0) { if (d_step > 0) {
// forward enumeration: split at right bound of each bin // forward enumeration: split at right bound of each bin
loss_chg = static_cast<bst_float>( loss_chg = static_cast<bst_float>(
constraint.CalcSplitGain(param, param.monotone_constraints[fid], e, c) - constraint.CalcSplitGain(param_, param_.monotone_constraints[fid], e, c) -
snode.root_gain); snode.root_gain);
split_pt = cut_val[i]; split_pt = cut_val[i];
} else { } else {
// backward enumeration: split at left bound of each bin // backward enumeration: split at left bound of each bin
loss_chg = static_cast<bst_float>( loss_chg = static_cast<bst_float>(
constraint.CalcSplitGain(param, param.monotone_constraints[fid], c, e) - constraint.CalcSplitGain(param_, param_.monotone_constraints[fid], c, e) -
snode.root_gain); snode.root_gain);
if (i == imin) { if (i == imin) {
// for leftmost bin, left bound is the smallest feature value // for leftmost bin, left bound is the smallest feature value
@ -901,14 +903,14 @@ class FastHistMaker: public TreeUpdater {
ExpandEntry(int nid, int depth, bst_float loss_chg, unsigned tstmp) ExpandEntry(int nid, int depth, bst_float loss_chg, unsigned tstmp)
: nid(nid), depth(depth), loss_chg(loss_chg), timestamp(tstmp) {} : nid(nid), depth(depth), loss_chg(loss_chg), timestamp(tstmp) {}
}; };
inline static bool depth_wise(ExpandEntry lhs, ExpandEntry rhs) { inline static bool DepthWise(ExpandEntry lhs, ExpandEntry rhs) {
if (lhs.depth == rhs.depth) { if (lhs.depth == rhs.depth) {
return lhs.timestamp > rhs.timestamp; // favor small timestamp return lhs.timestamp > rhs.timestamp; // favor small timestamp
} else { } else {
return lhs.depth > rhs.depth; // favor small depth return lhs.depth > rhs.depth; // favor small depth
} }
} }
inline static bool loss_guide(ExpandEntry lhs, ExpandEntry rhs) { inline static bool LossGuide(ExpandEntry lhs, ExpandEntry rhs) {
if (lhs.loss_chg == rhs.loss_chg) { if (lhs.loss_chg == rhs.loss_chg) {
return lhs.timestamp > rhs.timestamp; // favor small timestamp return lhs.timestamp > rhs.timestamp; // favor small timestamp
} else { } else {
@ -917,19 +919,19 @@ class FastHistMaker: public TreeUpdater {
} }
// --data fields-- // --data fields--
const TrainParam& param; const TrainParam& param_;
const FastHistParam& fhparam; const FastHistParam& fhparam_;
// number of omp thread used during training // number of omp thread used during training
int nthread; int nthread_;
// Per feature: shuffle index of each feature index // Per feature: shuffle index of each feature index
std::vector<bst_uint> feat_index; std::vector<bst_uint> feat_index_;
// the internal row sets // the internal row sets
RowSetCollection row_set_collection_; RowSetCollection row_set_collection_;
// the temp space for split // the temp space for split
std::vector<RowSetCollection::Split> row_split_tloc_; std::vector<RowSetCollection::Split> row_split_tloc_;
std::vector<SplitEntry> best_split_tloc_; std::vector<SplitEntry> best_split_tloc_;
/*! \brief TreeNode Data: statistics for each constructed node */ /*! \brief TreeNode Data: statistics for each constructed node */
std::vector<NodeEntry> snode; std::vector<NodeEntry> snode_;
/*! \brief culmulative histogram of gradients. */ /*! \brief culmulative histogram of gradients. */
HistCollection hist_; HistCollection hist_;
/*! \brief feature with least # of bins. to be used for dense specialization /*! \brief feature with least # of bins. to be used for dense specialization
@ -948,9 +950,9 @@ class FastHistMaker: public TreeUpdater {
// constraint value // constraint value
std::vector<TConstraint> constraints_; std::vector<TConstraint> constraints_;
typedef std::priority_queue<ExpandEntry, using ExpandQueue =
std::vector<ExpandEntry>, std::priority_queue<ExpandEntry, std::vector<ExpandEntry>,
std::function<bool(ExpandEntry, ExpandEntry)>> ExpandQueue; std::function<bool(ExpandEntry, ExpandEntry)>>;
std::unique_ptr<ExpandQueue> qexpand_; std::unique_ptr<ExpandQueue> qexpand_;
enum DataLayout { kDenseDataZeroBased, kDenseDataOneBased, kSparseData }; enum DataLayout { kDenseDataZeroBased, kDenseDataOneBased, kSparseData };
@ -964,14 +966,14 @@ class FastHistMaker: public TreeUpdater {
// simple switch to defer implementation. // simple switch to defer implementation.
class FastHistTreeUpdaterSwitch : public TreeUpdater { class FastHistTreeUpdaterSwitch : public TreeUpdater {
public: public:
FastHistTreeUpdaterSwitch() : monotone_(false) {} FastHistTreeUpdaterSwitch() = default;
void Init(const std::vector<std::pair<std::string, std::string> >& args) override { void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
for (auto &kv : args) { for (auto &kv : args) {
if (kv.first == "monotone_constraints" && kv.second.length() != 0) { if (kv.first == "monotone_constraints" && kv.second.length() != 0) {
monotone_ = true; monotone_ = true;
} }
} }
if (inner_.get() == nullptr) { if (inner_ == nullptr) {
if (monotone_) { if (monotone_) {
inner_.reset(new FastHistMaker<GradStats, ValueConstraint>()); inner_.reset(new FastHistMaker<GradStats, ValueConstraint>());
} else { } else {
@ -982,7 +984,7 @@ class FastHistTreeUpdaterSwitch : public TreeUpdater {
inner_->Init(args); inner_->Init(args);
} }
void Update(HostDeviceVector<bst_gpair>* gpair, void Update(HostDeviceVector<GradientPair>* gpair,
DMatrix* data, DMatrix* data,
const std::vector<RegTree*>& trees) override { const std::vector<RegTree*>& trees) override {
CHECK(inner_ != nullptr); CHECK(inner_ != nullptr);
@ -991,7 +993,7 @@ class FastHistTreeUpdaterSwitch : public TreeUpdater {
private: private:
// monotone constraints // monotone constraints
bool monotone_; bool monotone_{false};
// internal implementation // internal implementation
std::unique_ptr<TreeUpdater> inner_; std::unique_ptr<TreeUpdater> inner_;
}; };

View File

@ -22,25 +22,25 @@ DMLC_REGISTRY_FILE_TAG(updater_gpu);
* @return the uniq key * @return the uniq key
*/ */
static HOST_DEV_INLINE node_id_t abs2uniqKey(int tid, const node_id_t* abs, static HOST_DEV_INLINE NodeIdT abs2uniqKey(int tid, const NodeIdT* abs,
const int* colIds, const int* colIds,
node_id_t nodeStart, int nKeys) { NodeIdT nodeStart, int nKeys) {
int a = abs[tid]; int a = abs[tid];
if (a == UNUSED_NODE) return a; if (a == kUnusedNode) return a;
return ((a - nodeStart) + (colIds[tid] * nKeys)); return ((a - nodeStart) + (colIds[tid] * nKeys));
} }
/** /**
* @struct Pair * @struct Pair
* @brief Pair used for key basd scan operations on bst_gpair * @brief Pair used for key basd scan operations on GradientPair
*/ */
struct Pair { struct Pair {
int key; int key;
bst_gpair value; GradientPair value;
}; };
/** define a key that's not used at all in the entire boosting process */ /** define a key that's not used at all in the entire boosting process */
static const int NONE_KEY = -100; static const int kNoneKey = -100;
/** /**
* @brief Allocate temporary buffers needed for scan operations * @brief Allocate temporary buffers needed for scan operations
@ -49,9 +49,9 @@ static const int NONE_KEY = -100;
* @param size number of elements that will be scanned * @param size number of elements that will be scanned
*/ */
template <int BLKDIM_L1L3 = 256> template <int BLKDIM_L1L3 = 256>
int scanTempBufferSize(int size) { int ScanTempBufferSize(int size) {
int nBlks = dh::div_round_up(size, BLKDIM_L1L3); int num_blocks = dh::DivRoundUp(size, BLKDIM_L1L3);
return nBlks; return num_blocks;
} }
struct AddByKey { struct AddByKey {
@ -76,21 +76,21 @@ struct AddByKey {
* @param instIds instance index buffer * @param instIds instance index buffer
* @return the expected gradient value * @return the expected gradient value
*/ */
HOST_DEV_INLINE bst_gpair get(int id, const bst_gpair* vals, HOST_DEV_INLINE GradientPair get(int id, const GradientPair* vals,
const int* instIds) { const int* instIds) {
id = instIds[id]; id = instIds[id];
return vals[id]; return vals[id];
} }
template <int BLKDIM_L1L3> template <int BLKDIM_L1L3>
__global__ void cubScanByKeyL1(bst_gpair* scans, const bst_gpair* vals, __global__ void cubScanByKeyL1(GradientPair* scans, const GradientPair* vals,
const int* instIds, bst_gpair* mScans, const int* instIds, GradientPair* mScans,
int* mKeys, const node_id_t* keys, int nUniqKeys, int* mKeys, const NodeIdT* keys, int nUniqKeys,
const int* colIds, node_id_t nodeStart, const int* colIds, NodeIdT nodeStart,
const int size) { const int size) {
Pair rootPair = {NONE_KEY, bst_gpair(0.f, 0.f)}; Pair rootPair = {kNoneKey, GradientPair(0.f, 0.f)};
int myKey; int myKey;
bst_gpair myValue; GradientPair myValue;
typedef cub::BlockScan<Pair, BLKDIM_L1L3> BlockScan; typedef cub::BlockScan<Pair, BLKDIM_L1L3> BlockScan;
__shared__ typename BlockScan::TempStorage temp_storage; __shared__ typename BlockScan::TempStorage temp_storage;
Pair threadData; Pair threadData;
@ -99,8 +99,8 @@ __global__ void cubScanByKeyL1(bst_gpair* scans, const bst_gpair* vals,
myKey = abs2uniqKey(tid, keys, colIds, nodeStart, nUniqKeys); myKey = abs2uniqKey(tid, keys, colIds, nodeStart, nUniqKeys);
myValue = get(tid, vals, instIds); myValue = get(tid, vals, instIds);
} else { } else {
myKey = NONE_KEY; myKey = kNoneKey;
myValue = 0.f; myValue = {};
} }
threadData.key = myKey; threadData.key = myKey;
threadData.value = myValue; threadData.value = myValue;
@ -119,14 +119,14 @@ __global__ void cubScanByKeyL1(bst_gpair* scans, const bst_gpair* vals,
} }
if (threadIdx.x == BLKDIM_L1L3 - 1) { if (threadIdx.x == BLKDIM_L1L3 - 1) {
threadData.value = threadData.value =
(myKey == previousKey) ? threadData.value : bst_gpair(0.0f, 0.0f); (myKey == previousKey) ? threadData.value : GradientPair(0.0f, 0.0f);
mKeys[blockIdx.x] = myKey; mKeys[blockIdx.x] = myKey;
mScans[blockIdx.x] = threadData.value + myValue; mScans[blockIdx.x] = threadData.value + myValue;
} }
} }
template <int BLKSIZE> template <int BLKSIZE>
__global__ void cubScanByKeyL2(bst_gpair* mScans, int* mKeys, int mLength) { __global__ void cubScanByKeyL2(GradientPair* mScans, int* mKeys, int mLength) {
typedef cub::BlockScan<Pair, BLKSIZE, cub::BLOCK_SCAN_WARP_SCANS> BlockScan; typedef cub::BlockScan<Pair, BLKSIZE, cub::BLOCK_SCAN_WARP_SCANS> BlockScan;
Pair threadData; Pair threadData;
__shared__ typename BlockScan::TempStorage temp_storage; __shared__ typename BlockScan::TempStorage temp_storage;
@ -140,31 +140,31 @@ __global__ void cubScanByKeyL2(bst_gpair* mScans, int* mKeys, int mLength) {
} }
template <int BLKDIM_L1L3> template <int BLKDIM_L1L3>
__global__ void cubScanByKeyL3(bst_gpair* sums, bst_gpair* scans, __global__ void cubScanByKeyL3(GradientPair* sums, GradientPair* scans,
const bst_gpair* vals, const int* instIds, const GradientPair* vals, const int* instIds,
const bst_gpair* mScans, const int* mKeys, const GradientPair* mScans, const int* mKeys,
const node_id_t* keys, int nUniqKeys, const NodeIdT* keys, int nUniqKeys,
const int* colIds, node_id_t nodeStart, const int* colIds, NodeIdT nodeStart,
const int size) { const int size) {
int relId = threadIdx.x; int relId = threadIdx.x;
int tid = (blockIdx.x * BLKDIM_L1L3) + relId; int tid = (blockIdx.x * BLKDIM_L1L3) + relId;
// to avoid the following warning from nvcc: // to avoid the following warning from nvcc:
// __shared__ memory variable with non-empty constructor or destructor // __shared__ memory variable with non-empty constructor or destructor
// (potential race between threads) // (potential race between threads)
__shared__ char gradBuff[sizeof(bst_gpair)]; __shared__ char gradBuff[sizeof(GradientPair)];
__shared__ int s_mKeys; __shared__ int s_mKeys;
bst_gpair* s_mScans = reinterpret_cast<bst_gpair*>(gradBuff); GradientPair* s_mScans = reinterpret_cast<GradientPair*>(gradBuff);
if (tid >= size) return; if (tid >= size) return;
// cache block-wide partial scan info // cache block-wide partial scan info
if (relId == 0) { if (relId == 0) {
s_mKeys = (blockIdx.x > 0) ? mKeys[blockIdx.x - 1] : NONE_KEY; s_mKeys = (blockIdx.x > 0) ? mKeys[blockIdx.x - 1] : kNoneKey;
s_mScans[0] = (blockIdx.x > 0) ? mScans[blockIdx.x - 1] : bst_gpair(); s_mScans[0] = (blockIdx.x > 0) ? mScans[blockIdx.x - 1] : GradientPair();
} }
int myKey = abs2uniqKey(tid, keys, colIds, nodeStart, nUniqKeys); int myKey = abs2uniqKey(tid, keys, colIds, nodeStart, nUniqKeys);
int previousKey = int previousKey =
tid == 0 ? NONE_KEY tid == 0 ? kNoneKey
: abs2uniqKey(tid - 1, keys, colIds, nodeStart, nUniqKeys); : abs2uniqKey(tid - 1, keys, colIds, nodeStart, nUniqKeys);
bst_gpair myValue = scans[tid]; GradientPair myValue = scans[tid];
__syncthreads(); __syncthreads();
if (blockIdx.x > 0 && s_mKeys == previousKey) { if (blockIdx.x > 0 && s_mKeys == previousKey) {
myValue += s_mScans[0]; myValue += s_mScans[0];
@ -174,7 +174,7 @@ __global__ void cubScanByKeyL3(bst_gpair* sums, bst_gpair* scans,
} }
if ((previousKey != myKey) && (previousKey >= 0)) { if ((previousKey != myKey) && (previousKey >= 0)) {
sums[previousKey] = myValue; sums[previousKey] = myValue;
myValue = bst_gpair(0.0f, 0.0f); myValue = GradientPair(0.0f, 0.0f);
} }
scans[tid] = myValue; scans[tid] = myValue;
} }
@ -200,12 +200,12 @@ __global__ void cubScanByKeyL3(bst_gpair* sums, bst_gpair* scans,
* @param nodeStart index of the leftmost node in the current level * @param nodeStart index of the leftmost node in the current level
*/ */
template <int BLKDIM_L1L3 = 256, int BLKDIM_L2 = 512> template <int BLKDIM_L1L3 = 256, int BLKDIM_L2 = 512>
void reduceScanByKey(bst_gpair* sums, bst_gpair* scans, const bst_gpair* vals, void reduceScanByKey(GradientPair* sums, GradientPair* scans, const GradientPair* vals,
const int* instIds, const node_id_t* keys, int size, const int* instIds, const NodeIdT* keys, int size,
int nUniqKeys, int nCols, bst_gpair* tmpScans, int nUniqKeys, int nCols, GradientPair* tmpScans,
int* tmpKeys, const int* colIds, node_id_t nodeStart) { int* tmpKeys, const int* colIds, NodeIdT nodeStart) {
int nBlks = dh::div_round_up(size, BLKDIM_L1L3); int nBlks = dh::DivRoundUp(size, BLKDIM_L1L3);
cudaMemset(sums, 0, nUniqKeys * nCols * sizeof(bst_gpair)); cudaMemset(sums, 0, nUniqKeys * nCols * sizeof(GradientPair));
cubScanByKeyL1<BLKDIM_L1L3> cubScanByKeyL1<BLKDIM_L1L3>
<<<nBlks, BLKDIM_L1L3>>>(scans, vals, instIds, tmpScans, tmpKeys, keys, <<<nBlks, BLKDIM_L1L3>>>(scans, vals, instIds, tmpScans, tmpKeys, keys,
nUniqKeys, colIds, nodeStart, size); nUniqKeys, colIds, nodeStart, size);
@ -243,13 +243,13 @@ struct ExactSplitCandidate {
*/ */
enum ArgMaxByKeyAlgo { enum ArgMaxByKeyAlgo {
/** simplest, use gmem-atomics for all updates */ /** simplest, use gmem-atomics for all updates */
ABK_GMEM = 0, kAbkGmem = 0,
/** use smem-atomics for updates (when number of keys are less) */ /** use smem-atomics for updates (when number of keys are less) */
ABK_SMEM kAbkSmem
}; };
/** max depth until which to use shared mem based atomics for argmax */ /** max depth until which to use shared mem based atomics for argmax */
static const int MAX_ABK_LEVELS = 3; static const int kMaxAbkLevels = 3;
HOST_DEV_INLINE ExactSplitCandidate maxSplit(ExactSplitCandidate a, HOST_DEV_INLINE ExactSplitCandidate maxSplit(ExactSplitCandidate a,
ExactSplitCandidate b) { ExactSplitCandidate b) {
@ -281,27 +281,27 @@ DEV_INLINE void atomicArgMax(ExactSplitCandidate* address,
} }
DEV_INLINE void argMaxWithAtomics( DEV_INLINE void argMaxWithAtomics(
int id, ExactSplitCandidate* nodeSplits, const bst_gpair* gradScans, int id, ExactSplitCandidate* nodeSplits, const GradientPair* gradScans,
const bst_gpair* gradSums, const float* vals, const int* colIds, const GradientPair* gradSums, const float* vals, const int* colIds,
const node_id_t* nodeAssigns, const DeviceNodeStats* nodes, int nUniqKeys, const NodeIdT* nodeAssigns, const DeviceNodeStats* nodes, int nUniqKeys,
node_id_t nodeStart, int len, const GPUTrainingParam& param) { NodeIdT nodeStart, int len, const GPUTrainingParam& param) {
int nodeId = nodeAssigns[id]; int nodeId = nodeAssigns[id];
// @todo: this is really a bad check! but will be fixed when we move // @todo: this is really a bad check! but will be fixed when we move
// to key-based reduction // to key-based reduction
if ((id == 0) || if ((id == 0) ||
!((nodeId == nodeAssigns[id - 1]) && (colIds[id] == colIds[id - 1]) && !((nodeId == nodeAssigns[id - 1]) && (colIds[id] == colIds[id - 1]) &&
(vals[id] == vals[id - 1]))) { (vals[id] == vals[id - 1]))) {
if (nodeId != UNUSED_NODE) { if (nodeId != kUnusedNode) {
int sumId = abs2uniqKey(id, nodeAssigns, colIds, nodeStart, nUniqKeys); int sumId = abs2uniqKey(id, nodeAssigns, colIds, nodeStart, nUniqKeys);
bst_gpair colSum = gradSums[sumId]; GradientPair colSum = gradSums[sumId];
int uid = nodeId - nodeStart; int uid = nodeId - nodeStart;
DeviceNodeStats n = nodes[nodeId]; DeviceNodeStats n = nodes[nodeId];
bst_gpair parentSum = n.sum_gradients; GradientPair parentSum = n.sum_gradients;
float parentGain = n.root_gain; float parentGain = n.root_gain;
bool tmp; bool tmp;
ExactSplitCandidate s; ExactSplitCandidate s;
bst_gpair missing = parentSum - colSum; GradientPair missing = parentSum - colSum;
s.score = loss_chg_missing(gradScans[id], missing, parentSum, parentGain, s.score = LossChangeMissing(gradScans[id], missing, parentSum, parentGain,
param, tmp); param, tmp);
s.index = id; s.index = id;
atomicArgMax(nodeSplits + uid, s); atomicArgMax(nodeSplits + uid, s);
@ -310,10 +310,10 @@ DEV_INLINE void argMaxWithAtomics(
} }
__global__ void atomicArgMaxByKeyGmem( __global__ void atomicArgMaxByKeyGmem(
ExactSplitCandidate* nodeSplits, const bst_gpair* gradScans, ExactSplitCandidate* nodeSplits, const GradientPair* gradScans,
const bst_gpair* gradSums, const float* vals, const int* colIds, const GradientPair* gradSums, const float* vals, const int* colIds,
const node_id_t* nodeAssigns, const DeviceNodeStats* nodes, int nUniqKeys, const NodeIdT* nodeAssigns, const DeviceNodeStats* nodes, int nUniqKeys,
node_id_t nodeStart, int len, const TrainParam param) { NodeIdT nodeStart, int len, const TrainParam param) {
int id = threadIdx.x + (blockIdx.x * blockDim.x); int id = threadIdx.x + (blockIdx.x * blockDim.x);
const int stride = blockDim.x * gridDim.x; const int stride = blockDim.x * gridDim.x;
for (; id < len; id += stride) { for (; id < len; id += stride) {
@ -324,10 +324,10 @@ __global__ void atomicArgMaxByKeyGmem(
} }
__global__ void atomicArgMaxByKeySmem( __global__ void atomicArgMaxByKeySmem(
ExactSplitCandidate* nodeSplits, const bst_gpair* gradScans, ExactSplitCandidate* nodeSplits, const GradientPair* gradScans,
const bst_gpair* gradSums, const float* vals, const int* colIds, const GradientPair* gradSums, const float* vals, const int* colIds,
const node_id_t* nodeAssigns, const DeviceNodeStats* nodes, int nUniqKeys, const NodeIdT* nodeAssigns, const DeviceNodeStats* nodes, int nUniqKeys,
node_id_t nodeStart, int len, const TrainParam param) { NodeIdT nodeStart, int len, const GPUTrainingParam param) {
extern __shared__ char sArr[]; extern __shared__ char sArr[];
ExactSplitCandidate* sNodeSplits = ExactSplitCandidate* sNodeSplits =
reinterpret_cast<ExactSplitCandidate*>(sArr); reinterpret_cast<ExactSplitCandidate*>(sArr);
@ -368,27 +368,27 @@ __global__ void atomicArgMaxByKeySmem(
* @param algo which algorithm to use for argmax_by_key * @param algo which algorithm to use for argmax_by_key
*/ */
template <int BLKDIM = 256, int ITEMS_PER_THREAD = 4> template <int BLKDIM = 256, int ITEMS_PER_THREAD = 4>
void argMaxByKey(ExactSplitCandidate* nodeSplits, const bst_gpair* gradScans, void argMaxByKey(ExactSplitCandidate* nodeSplits, const GradientPair* gradScans,
const bst_gpair* gradSums, const float* vals, const GradientPair* gradSums, const float* vals,
const int* colIds, const node_id_t* nodeAssigns, const int* colIds, const NodeIdT* nodeAssigns,
const DeviceNodeStats* nodes, int nUniqKeys, const DeviceNodeStats* nodes, int nUniqKeys,
node_id_t nodeStart, int len, const TrainParam param, NodeIdT nodeStart, int len, const TrainParam param,
ArgMaxByKeyAlgo algo) { ArgMaxByKeyAlgo algo) {
dh::fillConst<ExactSplitCandidate, BLKDIM, ITEMS_PER_THREAD>( dh::FillConst<ExactSplitCandidate, BLKDIM, ITEMS_PER_THREAD>(
dh::get_device_idx(param.gpu_id), nodeSplits, nUniqKeys, dh::GetDeviceIdx(param.gpu_id), nodeSplits, nUniqKeys,
ExactSplitCandidate()); ExactSplitCandidate());
int nBlks = dh::div_round_up(len, ITEMS_PER_THREAD * BLKDIM); int nBlks = dh::DivRoundUp(len, ITEMS_PER_THREAD * BLKDIM);
switch (algo) { switch (algo) {
case ABK_GMEM: case kAbkGmem:
atomicArgMaxByKeyGmem<<<nBlks, BLKDIM>>>( atomicArgMaxByKeyGmem<<<nBlks, BLKDIM>>>(
nodeSplits, gradScans, gradSums, vals, colIds, nodeAssigns, nodes, nodeSplits, gradScans, gradSums, vals, colIds, nodeAssigns, nodes,
nUniqKeys, nodeStart, len, param); nUniqKeys, nodeStart, len, param);
break; break;
case ABK_SMEM: case kAbkSmem:
atomicArgMaxByKeySmem<<<nBlks, BLKDIM, atomicArgMaxByKeySmem<<<nBlks, BLKDIM,
sizeof(ExactSplitCandidate) * nUniqKeys>>>( sizeof(ExactSplitCandidate) * nUniqKeys>>>(
nodeSplits, gradScans, gradSums, vals, colIds, nodeAssigns, nodes, nodeSplits, gradScans, gradSums, vals, colIds, nodeAssigns, nodes,
nUniqKeys, nodeStart, len, param); nUniqKeys, nodeStart, len, GPUTrainingParam(param));
break; break;
default: default:
throw std::runtime_error("argMaxByKey: Bad algo passed!"); throw std::runtime_error("argMaxByKey: Bad algo passed!");
@ -404,22 +404,22 @@ __global__ void assignColIds(int* colIds, const int* colOffsets) {
} }
} }
__global__ void fillDefaultNodeIds(node_id_t* nodeIdsPerInst, __global__ void fillDefaultNodeIds(NodeIdT* nodeIdsPerInst,
const DeviceNodeStats* nodes, int nRows) { const DeviceNodeStats* nodes, int nRows) {
int id = threadIdx.x + (blockIdx.x * blockDim.x); int id = threadIdx.x + (blockIdx.x * blockDim.x);
if (id >= nRows) { if (id >= nRows) {
return; return;
} }
// if this element belongs to none of the currently active node-id's // if this element belongs to none of the currently active node-id's
node_id_t nId = nodeIdsPerInst[id]; NodeIdT nId = nodeIdsPerInst[id];
if (nId == UNUSED_NODE) { if (nId == kUnusedNode) {
return; return;
} }
const DeviceNodeStats n = nodes[nId]; const DeviceNodeStats n = nodes[nId];
node_id_t result; NodeIdT result;
if (n.IsLeaf() || n.IsUnused()) { if (n.IsLeaf() || n.IsUnused()) {
result = UNUSED_NODE; result = kUnusedNode;
} else if (n.dir == LeftDir) { } else if (n.dir == kLeftDir) {
result = (2 * n.idx) + 1; result = (2 * n.idx) + 1;
} else { } else {
result = (2 * n.idx) + 2; result = (2 * n.idx) + 2;
@ -427,8 +427,8 @@ __global__ void fillDefaultNodeIds(node_id_t* nodeIdsPerInst,
nodeIdsPerInst[id] = result; nodeIdsPerInst[id] = result;
} }
__global__ void assignNodeIds(node_id_t* nodeIdsPerInst, int* nodeLocations, __global__ void assignNodeIds(NodeIdT* nodeIdsPerInst, int* nodeLocations,
const node_id_t* nodeIds, const int* instId, const NodeIdT* nodeIds, const int* instId,
const DeviceNodeStats* nodes, const DeviceNodeStats* nodes,
const int* colOffsets, const float* vals, const int* colOffsets, const float* vals,
int nVals, int nCols) { int nVals, int nCols) {
@ -441,7 +441,7 @@ __global__ void assignNodeIds(node_id_t* nodeIdsPerInst, int* nodeLocations,
// the nodeIdsPerInst with all default assignments // the nodeIdsPerInst with all default assignments
int nId = nodeIds[id]; int nId = nodeIds[id];
// if this element belongs to none of the currently active node-id's // if this element belongs to none of the currently active node-id's
if (nId != UNUSED_NODE) { if (nId != kUnusedNode) {
const DeviceNodeStats n = nodes[nId]; const DeviceNodeStats n = nodes[nId];
int colId = n.fidx; int colId = n.fidx;
// printf("nid=%d colId=%d id=%d\n", nId, colId, id); // printf("nid=%d colId=%d id=%d\n", nId, colId, id);
@ -449,7 +449,7 @@ __global__ void assignNodeIds(node_id_t* nodeIdsPerInst, int* nodeLocations,
int end = colOffsets[colId + 1]; int end = colOffsets[colId + 1];
// @todo: too much wasteful threads!! // @todo: too much wasteful threads!!
if ((id >= start) && (id < end) && !(n.IsLeaf() || n.IsUnused())) { if ((id >= start) && (id < end) && !(n.IsLeaf() || n.IsUnused())) {
node_id_t result = (2 * n.idx) + 1 + (vals[id] >= n.fvalue); NodeIdT result = (2 * n.idx) + 1 + (vals[id] >= n.fvalue);
nodeIdsPerInst[instId[id]] = result; nodeIdsPerInst[instId[id]] = result;
} }
} }
@ -475,31 +475,31 @@ class GPUMaker : public TreeUpdater {
/** whether we have initialized memory already (so as not to repeat!) */ /** whether we have initialized memory already (so as not to repeat!) */
bool allocated; bool allocated;
/** feature values stored in column-major compressed format */ /** feature values stored in column-major compressed format */
dh::dvec2<float> vals; dh::DVec2<float> vals;
dh::dvec<float> vals_cached; dh::DVec<float> vals_cached;
/** corresponding instance id's of these featutre values */ /** corresponding instance id's of these featutre values */
dh::dvec2<int> instIds; dh::DVec2<int> instIds;
dh::dvec<int> instIds_cached; dh::DVec<int> instIds_cached;
/** column offsets for these feature values */ /** column offsets for these feature values */
dh::dvec<int> colOffsets; dh::DVec<int> colOffsets;
dh::dvec<bst_gpair> gradsInst; dh::DVec<GradientPair> gradsInst;
dh::dvec2<node_id_t> nodeAssigns; dh::DVec2<NodeIdT> nodeAssigns;
dh::dvec2<int> nodeLocations; dh::DVec2<int> nodeLocations;
dh::dvec<DeviceNodeStats> nodes; dh::DVec<DeviceNodeStats> nodes;
dh::dvec<node_id_t> nodeAssignsPerInst; dh::DVec<NodeIdT> nodeAssignsPerInst;
dh::dvec<bst_gpair> gradSums; dh::DVec<GradientPair> gradSums;
dh::dvec<bst_gpair> gradScans; dh::DVec<GradientPair> gradScans;
dh::dvec<ExactSplitCandidate> nodeSplits; dh::DVec<ExactSplitCandidate> nodeSplits;
int nVals; int nVals;
int nRows; int nRows;
int nCols; int nCols;
int maxNodes; int maxNodes;
int maxLeaves; int maxLeaves;
dh::CubMemory tmp_mem; dh::CubMemory tmp_mem;
dh::dvec<bst_gpair> tmpScanGradBuff; dh::DVec<GradientPair> tmpScanGradBuff;
dh::dvec<int> tmpScanKeyBuff; dh::DVec<int> tmpScanKeyBuff;
dh::dvec<int> colIds; dh::DVec<int> colIds;
dh::bulk_allocator<dh::memory_type::DEVICE> ba; dh::BulkAllocator<dh::MemoryType::kDevice> ba;
public: public:
GPUMaker() : allocated(false) {} GPUMaker() : allocated(false) {}
@ -512,9 +512,9 @@ class GPUMaker : public TreeUpdater {
maxLeaves = 1 << param.max_depth; maxLeaves = 1 << param.max_depth;
} }
void Update(HostDeviceVector<bst_gpair>* gpair, DMatrix* dmat, void Update(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
const std::vector<RegTree*>& trees) override { const std::vector<RegTree*>& trees) override {
GradStats::CheckInfo(dmat->info()); GradStats::CheckInfo(dmat->Info());
// rescale learning rate according to size of trees // rescale learning rate according to size of trees
float lr = param.learning_rate; float lr = param.learning_rate;
param.learning_rate = lr / trees.size(); param.learning_rate = lr / trees.size();
@ -530,7 +530,7 @@ class GPUMaker : public TreeUpdater {
param.learning_rate = lr; param.learning_rate = lr;
} }
/// @note: Update should be only after Init!! /// @note: Update should be only after Init!!
void UpdateTree(HostDeviceVector<bst_gpair>* gpair, DMatrix* dmat, void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
RegTree* hTree) { RegTree* hTree) {
if (!allocated) { if (!allocated) {
setupOneTimeData(dmat); setupOneTimeData(dmat);
@ -538,33 +538,33 @@ class GPUMaker : public TreeUpdater {
for (int i = 0; i < param.max_depth; ++i) { for (int i = 0; i < param.max_depth; ++i) {
if (i == 0) { if (i == 0) {
// make sure to start on a fresh tree with sorted values! // make sure to start on a fresh tree with sorted values!
vals.current_dvec() = vals_cached; vals.CurrentDVec() = vals_cached;
instIds.current_dvec() = instIds_cached; instIds.CurrentDVec() = instIds_cached;
transferGrads(gpair); transferGrads(gpair);
} }
int nNodes = 1 << i; int nNodes = 1 << i;
node_id_t nodeStart = nNodes - 1; NodeIdT nodeStart = nNodes - 1;
initNodeData(i, nodeStart, nNodes); initNodeData(i, nodeStart, nNodes);
findSplit(i, nodeStart, nNodes); findSplit(i, nodeStart, nNodes);
} }
// mark all the used nodes with unused children as leaf nodes // mark all the used nodes with unused children as leaf nodes
markLeaves(); markLeaves();
dense2sparse_tree(hTree, nodes, param); Dense2SparseTree(hTree, nodes, param);
} }
void split2node(int nNodes, node_id_t nodeStart) { void split2node(int nNodes, NodeIdT nodeStart) {
auto d_nodes = nodes.data(); auto d_nodes = nodes.Data();
auto d_gradScans = gradScans.data(); auto d_gradScans = gradScans.Data();
auto d_gradSums = gradSums.data(); auto d_gradSums = gradSums.Data();
auto d_nodeAssigns = nodeAssigns.current(); auto d_nodeAssigns = nodeAssigns.Current();
auto d_colIds = colIds.data(); auto d_colIds = colIds.Data();
auto d_vals = vals.current(); auto d_vals = vals.Current();
auto d_nodeSplits = nodeSplits.data(); auto d_nodeSplits = nodeSplits.Data();
int nUniqKeys = nNodes; int nUniqKeys = nNodes;
float min_split_loss = param.min_split_loss; float min_split_loss = param.min_split_loss;
auto gpu_param = GPUTrainingParam(param); auto gpu_param = GPUTrainingParam(param);
dh::launch_n(param.gpu_id, nNodes, [=] __device__(int uid) { dh::LaunchN(param.gpu_id, nNodes, [=] __device__(int uid) {
int absNodeId = uid + nodeStart; int absNodeId = uid + nodeStart;
ExactSplitCandidate s = d_nodeSplits[uid]; ExactSplitCandidate s = d_nodeSplits[uid];
if (s.isSplittable(min_split_loss)) { if (s.isSplittable(min_split_loss)) {
@ -573,26 +573,26 @@ class GPUMaker : public TreeUpdater {
abs2uniqKey(idx, d_nodeAssigns, d_colIds, nodeStart, nUniqKeys); abs2uniqKey(idx, d_nodeAssigns, d_colIds, nodeStart, nUniqKeys);
bool missingLeft = true; bool missingLeft = true;
const DeviceNodeStats& n = d_nodes[absNodeId]; const DeviceNodeStats& n = d_nodes[absNodeId];
bst_gpair gradScan = d_gradScans[idx]; GradientPair gradScan = d_gradScans[idx];
bst_gpair gradSum = d_gradSums[nodeInstId]; GradientPair gradSum = d_gradSums[nodeInstId];
float thresh = d_vals[idx]; float thresh = d_vals[idx];
int colId = d_colIds[idx]; int colId = d_colIds[idx];
// get the default direction for the current node // get the default direction for the current node
bst_gpair missing = n.sum_gradients - gradSum; GradientPair missing = n.sum_gradients - gradSum;
loss_chg_missing(gradScan, missing, n.sum_gradients, n.root_gain, LossChangeMissing(gradScan, missing, n.sum_gradients, n.root_gain,
gpu_param, missingLeft); gpu_param, missingLeft);
// get the score/weight/id/gradSum for left and right child nodes // get the score/weight/id/gradSum for left and right child nodes
bst_gpair lGradSum = missingLeft ? gradScan + missing : gradScan; GradientPair lGradSum = missingLeft ? gradScan + missing : gradScan;
bst_gpair rGradSum = n.sum_gradients - lGradSum; GradientPair rGradSum = n.sum_gradients - lGradSum;
// Create children // Create children
d_nodes[left_child_nidx(absNodeId)] = d_nodes[LeftChildNodeIdx(absNodeId)] =
DeviceNodeStats(lGradSum, left_child_nidx(absNodeId), gpu_param); DeviceNodeStats(lGradSum, LeftChildNodeIdx(absNodeId), gpu_param);
d_nodes[right_child_nidx(absNodeId)] = d_nodes[RightChildNodeIdx(absNodeId)] =
DeviceNodeStats(rGradSum, right_child_nidx(absNodeId), gpu_param); DeviceNodeStats(rGradSum, RightChildNodeIdx(absNodeId), gpu_param);
// Set split for parent // Set split for parent
d_nodes[absNodeId].SetSplit(thresh, colId, d_nodes[absNodeId].SetSplit(thresh, colId,
missingLeft ? LeftDir : RightDir, lGradSum, missingLeft ? kLeftDir : kRightDir, lGradSum,
rGradSum); rGradSum);
} else { } else {
// cannot be split further, so this node is a leaf! // cannot be split further, so this node is a leaf!
@ -601,21 +601,21 @@ class GPUMaker : public TreeUpdater {
}); });
} }
void findSplit(int level, node_id_t nodeStart, int nNodes) { void findSplit(int level, NodeIdT nodeStart, int nNodes) {
reduceScanByKey(gradSums.data(), gradScans.data(), gradsInst.data(), reduceScanByKey(gradSums.Data(), gradScans.Data(), gradsInst.Data(),
instIds.current(), nodeAssigns.current(), nVals, nNodes, instIds.Current(), nodeAssigns.Current(), nVals, nNodes,
nCols, tmpScanGradBuff.data(), tmpScanKeyBuff.data(), nCols, tmpScanGradBuff.Data(), tmpScanKeyBuff.Data(),
colIds.data(), nodeStart); colIds.Data(), nodeStart);
argMaxByKey(nodeSplits.data(), gradScans.data(), gradSums.data(), argMaxByKey(nodeSplits.Data(), gradScans.Data(), gradSums.Data(),
vals.current(), colIds.data(), nodeAssigns.current(), vals.Current(), colIds.Data(), nodeAssigns.Current(),
nodes.data(), nNodes, nodeStart, nVals, param, nodes.Data(), nNodes, nodeStart, nVals, param,
level <= MAX_ABK_LEVELS ? ABK_SMEM : ABK_GMEM); level <= kMaxAbkLevels ? kAbkSmem : kAbkGmem);
split2node(nNodes, nodeStart); split2node(nNodes, nodeStart);
} }
void allocateAllData(int offsetSize) { void allocateAllData(int offsetSize) {
int tmpBuffSize = scanTempBufferSize(nVals); int tmpBuffSize = ScanTempBufferSize(nVals);
ba.allocate(dh::get_device_idx(param.gpu_id), param.silent, &vals, nVals, ba.Allocate(dh::GetDeviceIdx(param.gpu_id), param.silent, &vals, nVals,
&vals_cached, nVals, &instIds, nVals, &instIds_cached, nVals, &vals_cached, nVals, &instIds, nVals, &instIds_cached, nVals,
&colOffsets, offsetSize, &gradsInst, nRows, &nodeAssigns, nVals, &colOffsets, offsetSize, &gradsInst, nRows, &nodeAssigns, nVals,
&nodeLocations, nVals, &nodes, maxNodes, &nodeAssignsPerInst, &nodeLocations, nVals, &nodes, maxNodes, &nodeAssignsPerInst,
@ -625,7 +625,7 @@ class GPUMaker : public TreeUpdater {
} }
void setupOneTimeData(DMatrix* dmat) { void setupOneTimeData(DMatrix* dmat) {
size_t free_memory = dh::available_memory(dh::get_device_idx(param.gpu_id)); size_t free_memory = dh::AvailableMemory(dh::GetDeviceIdx(param.gpu_id));
if (!dmat->SingleColBlock()) { if (!dmat->SingleColBlock()) {
throw std::runtime_error("exact::GPUBuilder - must have 1 column block"); throw std::runtime_error("exact::GPUBuilder - must have 1 column block");
} }
@ -640,11 +640,11 @@ class GPUMaker : public TreeUpdater {
void convertToCsc(DMatrix* dmat, std::vector<float>* fval, void convertToCsc(DMatrix* dmat, std::vector<float>* fval,
std::vector<int>* fId, std::vector<size_t>* offset) { std::vector<int>* fId, std::vector<size_t>* offset) {
MetaInfo info = dmat->info(); MetaInfo info = dmat->Info();
CHECK(info.num_col < std::numeric_limits<int>::max()); CHECK(info.num_col_ < std::numeric_limits<int>::max());
CHECK(info.num_row < std::numeric_limits<int>::max()); CHECK(info.num_row_ < std::numeric_limits<int>::max());
nRows = static_cast<int>(info.num_row); nRows = static_cast<int>(info.num_row_);
nCols = static_cast<int>(info.num_col); nCols = static_cast<int>(info.num_col_);
offset->reserve(nCols + 1); offset->reserve(nCols + 1);
offset->push_back(0); offset->push_back(0);
fval->reserve(nCols * nRows); fval->reserve(nCols * nRows);
@ -677,56 +677,56 @@ class GPUMaker : public TreeUpdater {
void transferAndSortData(const std::vector<float>& fval, void transferAndSortData(const std::vector<float>& fval,
const std::vector<int>& fId, const std::vector<int>& fId,
const std::vector<size_t>& offset) { const std::vector<size_t>& offset) {
vals.current_dvec() = fval; vals.CurrentDVec() = fval;
instIds.current_dvec() = fId; instIds.CurrentDVec() = fId;
colOffsets = offset; colOffsets = offset;
dh::segmentedSort<float, int>(&tmp_mem, &vals, &instIds, nVals, nCols, dh::SegmentedSort<float, int>(&tmp_mem, &vals, &instIds, nVals, nCols,
colOffsets); colOffsets);
vals_cached = vals.current_dvec(); vals_cached = vals.CurrentDVec();
instIds_cached = instIds.current_dvec(); instIds_cached = instIds.CurrentDVec();
assignColIds<<<nCols, 512>>>(colIds.data(), colOffsets.data()); assignColIds<<<nCols, 512>>>(colIds.Data(), colOffsets.Data());
} }
void transferGrads(HostDeviceVector<bst_gpair>* gpair) { void transferGrads(HostDeviceVector<GradientPair>* gpair) {
// HACK // HACK
dh::safe_cuda(cudaMemcpy(gradsInst.data(), gpair->ptr_d(param.gpu_id), dh::safe_cuda(cudaMemcpy(gradsInst.Data(), gpair->DevicePointer(param.gpu_id),
sizeof(bst_gpair) * nRows, sizeof(GradientPair) * nRows,
cudaMemcpyDefault)); cudaMemcpyDefault));
// evaluate the full-grad reduction for the root node // evaluate the full-grad reduction for the root node
dh::sumReduction<bst_gpair>(tmp_mem, gradsInst, gradSums, nRows); dh::SumReduction<GradientPair>(tmp_mem, gradsInst, gradSums, nRows);
} }
void initNodeData(int level, node_id_t nodeStart, int nNodes) { void initNodeData(int level, NodeIdT nodeStart, int nNodes) {
// all instances belong to root node at the beginning! // all instances belong to root node at the beginning!
if (level == 0) { if (level == 0) {
nodes.fill(DeviceNodeStats()); nodes.Fill(DeviceNodeStats());
nodeAssigns.current_dvec().fill(0); nodeAssigns.CurrentDVec().Fill(0);
nodeAssignsPerInst.fill(0); nodeAssignsPerInst.Fill(0);
// for root node, just update the gradient/score/weight/id info // for root node, just update the gradient/score/weight/id info
// before splitting it! Currently all data is on GPU, hence this // before splitting it! Currently all data is on GPU, hence this
// stupid little kernel // stupid little kernel
auto d_nodes = nodes.data(); auto d_nodes = nodes.Data();
auto d_sums = gradSums.data(); auto d_sums = gradSums.Data();
auto gpu_params = GPUTrainingParam(param); auto gpu_params = GPUTrainingParam(param);
dh::launch_n(param.gpu_id, 1, [=] __device__(int idx) { dh::LaunchN(param.gpu_id, 1, [=] __device__(int idx) {
d_nodes[0] = DeviceNodeStats(d_sums[0], 0, gpu_params); d_nodes[0] = DeviceNodeStats(d_sums[0], 0, gpu_params);
}); });
} else { } else {
const int BlkDim = 256; const int BlkDim = 256;
const int ItemsPerThread = 4; const int ItemsPerThread = 4;
// assign default node ids first // assign default node ids first
int nBlks = dh::div_round_up(nRows, BlkDim); int nBlks = dh::DivRoundUp(nRows, BlkDim);
fillDefaultNodeIds<<<nBlks, BlkDim>>>(nodeAssignsPerInst.data(), fillDefaultNodeIds<<<nBlks, BlkDim>>>(nodeAssignsPerInst.Data(),
nodes.data(), nRows); nodes.Data(), nRows);
// evaluate the correct child indices of non-missing values next // evaluate the correct child indices of non-missing values next
nBlks = dh::div_round_up(nVals, BlkDim * ItemsPerThread); nBlks = dh::DivRoundUp(nVals, BlkDim * ItemsPerThread);
assignNodeIds<<<nBlks, BlkDim>>>( assignNodeIds<<<nBlks, BlkDim>>>(
nodeAssignsPerInst.data(), nodeLocations.current(), nodeAssignsPerInst.Data(), nodeLocations.Current(),
nodeAssigns.current(), instIds.current(), nodes.data(), nodeAssigns.Current(), instIds.Current(), nodes.Data(),
colOffsets.data(), vals.current(), nVals, nCols); colOffsets.Data(), vals.Current(), nVals, nCols);
// gather the node assignments across all other columns too // gather the node assignments across all other columns too
dh::gather(dh::get_device_idx(param.gpu_id), nodeAssigns.current(), dh::Gather(dh::GetDeviceIdx(param.gpu_id), nodeAssigns.Current(),
nodeAssignsPerInst.data(), instIds.current(), nVals); nodeAssignsPerInst.Data(), instIds.Current(), nVals);
sortKeys(level); sortKeys(level);
} }
} }
@ -734,19 +734,19 @@ class GPUMaker : public TreeUpdater {
void sortKeys(int level) { void sortKeys(int level) {
// segmented-sort the arrays based on node-id's // segmented-sort the arrays based on node-id's
// but we don't need more than level+1 bits for sorting! // but we don't need more than level+1 bits for sorting!
segmentedSort(&tmp_mem, &nodeAssigns, &nodeLocations, nVals, nCols, SegmentedSort(&tmp_mem, &nodeAssigns, &nodeLocations, nVals, nCols,
colOffsets, 0, level + 1); colOffsets, 0, level + 1);
dh::gather<float, int>(dh::get_device_idx(param.gpu_id), vals.other(), dh::Gather<float, int>(dh::GetDeviceIdx(param.gpu_id), vals.other(),
vals.current(), instIds.other(), instIds.current(), vals.Current(), instIds.other(), instIds.Current(),
nodeLocations.current(), nVals); nodeLocations.Current(), nVals);
vals.buff().selector ^= 1; vals.buff().selector ^= 1;
instIds.buff().selector ^= 1; instIds.buff().selector ^= 1;
} }
void markLeaves() { void markLeaves() {
const int BlkDim = 128; const int BlkDim = 128;
int nBlks = dh::div_round_up(maxNodes, BlkDim); int nBlks = dh::DivRoundUp(maxNodes, BlkDim);
markLeavesKernel<<<nBlks, BlkDim>>>(nodes.data(), maxNodes); markLeavesKernel<<<nBlks, BlkDim>>>(nodes.Data(), maxNodes);
} }
}; };

View File

@ -15,7 +15,7 @@
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
#else #else
__device__ __forceinline__ double atomicAdd(double* address, double val) { XGBOOST_DEVICE __forceinline__ double atomicAdd(double* address, double val) {
unsigned long long int* address_as_ull = unsigned long long int* address_as_ull =
(unsigned long long int*)address; // NOLINT (unsigned long long int*)address; // NOLINT
unsigned long long int old = *address_as_ull, assumed; // NOLINT unsigned long long int old = *address_as_ull, assumed; // NOLINT
@ -37,8 +37,8 @@ namespace xgboost {
namespace tree { namespace tree {
// Atomic add function for double precision gradients // Atomic add function for double precision gradients
__device__ __forceinline__ void AtomicAddGpair(bst_gpair_precise* dest, __device__ __forceinline__ void AtomicAddGpair(GradientPairPrecise* dest,
const bst_gpair& gpair) { const GradientPair& gpair) {
auto dst_ptr = reinterpret_cast<double*>(dest); auto dst_ptr = reinterpret_cast<double*>(dest);
atomicAdd(dst_ptr, static_cast<double>(gpair.GetGrad())); atomicAdd(dst_ptr, static_cast<double>(gpair.GetGrad()));
@ -46,11 +46,11 @@ __device__ __forceinline__ void AtomicAddGpair(bst_gpair_precise* dest,
} }
// For integer gradients // For integer gradients
__device__ __forceinline__ void AtomicAddGpair(bst_gpair_integer* dest, __device__ __forceinline__ void AtomicAddGpair(GradientPairInteger* dest,
const bst_gpair& gpair) { const GradientPair& gpair) {
auto dst_ptr = reinterpret_cast<unsigned long long int*>(dest); // NOLINT auto dst_ptr = reinterpret_cast<unsigned long long int*>(dest); // NOLINT
bst_gpair_integer tmp(gpair.GetGrad(), gpair.GetHess()); GradientPairInteger tmp(gpair.GetGrad(), gpair.GetHess());
auto src_ptr = reinterpret_cast<bst_gpair_integer::value_t*>(&tmp); auto src_ptr = reinterpret_cast<GradientPairInteger::ValueT*>(&tmp);
atomicAdd(dst_ptr, atomicAdd(dst_ptr,
static_cast<unsigned long long int>(*src_ptr)); // NOLINT static_cast<unsigned long long int>(*src_ptr)); // NOLINT
@ -59,13 +59,11 @@ __device__ __forceinline__ void AtomicAddGpair(bst_gpair_integer* dest,
} }
/** /**
* \fn void CheckGradientMax(const dh::dvec<bst_gpair>& gpair)
*
* \brief Check maximum gradient value is below 2^16. This is to prevent * \brief Check maximum gradient value is below 2^16. This is to prevent
* overflow when using integer gradient summation. * overflow when using integer gradient summation.
*/ */
inline void CheckGradientMax(const std::vector<bst_gpair>& gpair) { inline void CheckGradientMax(const std::vector<GradientPair>& gpair) {
auto* ptr = reinterpret_cast<const float*>(gpair.data()); auto* ptr = reinterpret_cast<const float*>(gpair.data());
float abs_max = float abs_max =
std::accumulate(ptr, ptr + (gpair.size() * 2), 0.f, std::accumulate(ptr, ptr + (gpair.size() * 2), 0.f,
@ -87,19 +85,19 @@ struct GPUTrainingParam {
// default=0 means no constraint on weight delta // default=0 means no constraint on weight delta
float max_delta_step; float max_delta_step;
__host__ __device__ GPUTrainingParam() {} GPUTrainingParam() = default;
__host__ __device__ GPUTrainingParam(const TrainParam& param) XGBOOST_DEVICE explicit GPUTrainingParam(const TrainParam& param)
: min_child_weight(param.min_child_weight), : min_child_weight(param.min_child_weight),
reg_lambda(param.reg_lambda), reg_lambda(param.reg_lambda),
reg_alpha(param.reg_alpha), reg_alpha(param.reg_alpha),
max_delta_step(param.max_delta_step) {} max_delta_step(param.max_delta_step) {}
}; };
typedef int node_id_t; using NodeIdT = int;
/** used to assign default id to a Node */ /** used to assign default id to a Node */
static const int UNUSED_NODE = -1; static const int kUnusedNode = -1;
/** /**
* @enum DefaultDirection node.cuh * @enum DefaultDirection node.cuh
@ -107,9 +105,9 @@ static const int UNUSED_NODE = -1;
*/ */
enum DefaultDirection { enum DefaultDirection {
/** move to left child */ /** move to left child */
LeftDir = 0, kLeftDir = 0,
/** move to right child */ /** move to right child */
RightDir kRightDir
}; };
struct DeviceSplitCandidate { struct DeviceSplitCandidate {
@ -117,15 +115,15 @@ struct DeviceSplitCandidate {
DefaultDirection dir; DefaultDirection dir;
float fvalue; float fvalue;
int findex; int findex;
bst_gpair_integer left_sum; GradientPair left_sum;
bst_gpair_integer right_sum; GradientPair right_sum;
__host__ __device__ DeviceSplitCandidate() XGBOOST_DEVICE DeviceSplitCandidate()
: loss_chg(-FLT_MAX), dir(LeftDir), fvalue(0), findex(-1) {} : loss_chg(-FLT_MAX), dir(kLeftDir), fvalue(0), findex(-1) {}
template <typename param_t> template <typename ParamT>
__host__ __device__ void Update(const DeviceSplitCandidate& other, XGBOOST_DEVICE void Update(const DeviceSplitCandidate& other,
const param_t& param) { const ParamT& param) {
if (other.loss_chg > loss_chg && if (other.loss_chg > loss_chg &&
other.left_sum.GetHess() >= param.min_child_weight && other.left_sum.GetHess() >= param.min_child_weight &&
other.right_sum.GetHess() >= param.min_child_weight) { other.right_sum.GetHess() >= param.min_child_weight) {
@ -133,10 +131,10 @@ struct DeviceSplitCandidate {
} }
} }
__device__ void Update(float loss_chg_in, DefaultDirection dir_in, XGBOOST_DEVICE void Update(float loss_chg_in, DefaultDirection dir_in,
float fvalue_in, int findex_in, float fvalue_in, int findex_in,
bst_gpair_integer left_sum_in, GradientPair left_sum_in,
bst_gpair_integer right_sum_in, GradientPair right_sum_in,
const GPUTrainingParam& param) { const GPUTrainingParam& param) {
if (loss_chg_in > loss_chg && if (loss_chg_in > loss_chg &&
left_sum_in.GetHess() >= param.min_child_weight && left_sum_in.GetHess() >= param.min_child_weight &&
@ -149,11 +147,11 @@ struct DeviceSplitCandidate {
findex = findex_in; findex = findex_in;
} }
} }
__device__ bool IsValid() const { return loss_chg > 0.0f; } XGBOOST_DEVICE bool IsValid() const { return loss_chg > 0.0f; }
}; };
struct DeviceNodeStats { struct DeviceNodeStats {
bst_gpair sum_gradients; GradientPair sum_gradients;
float root_gain; float root_gain;
float weight; float weight;
@ -161,31 +159,31 @@ struct DeviceNodeStats {
DefaultDirection dir; DefaultDirection dir;
/** threshold value for comparison */ /** threshold value for comparison */
float fvalue; float fvalue;
bst_gpair left_sum; GradientPair left_sum;
bst_gpair right_sum; GradientPair right_sum;
/** \brief The feature index. */ /** \brief The feature index. */
int fidx; int fidx;
/** node id (used as key for reduce/scan) */ /** node id (used as key for reduce/scan) */
node_id_t idx; NodeIdT idx;
HOST_DEV_INLINE DeviceNodeStats() HOST_DEV_INLINE DeviceNodeStats()
: sum_gradients(), : sum_gradients(),
root_gain(-FLT_MAX), root_gain(-FLT_MAX),
weight(-FLT_MAX), weight(-FLT_MAX),
dir(LeftDir), dir(kLeftDir),
fvalue(0.f), fvalue(0.f),
left_sum(), left_sum(),
right_sum(), right_sum(),
fidx(UNUSED_NODE), fidx(kUnusedNode),
idx(UNUSED_NODE) {} idx(kUnusedNode) {}
template <typename param_t> template <typename ParamT>
HOST_DEV_INLINE DeviceNodeStats(bst_gpair sum_gradients, node_id_t nidx, HOST_DEV_INLINE DeviceNodeStats(GradientPair sum_gradients, NodeIdT nidx,
const param_t& param) const ParamT& param)
: sum_gradients(sum_gradients), : sum_gradients(sum_gradients),
dir(LeftDir), dir(kLeftDir),
fvalue(0.f), fvalue(0.f),
fidx(UNUSED_NODE), fidx(kUnusedNode),
idx(nidx) { idx(nidx) {
this->root_gain = this->root_gain =
CalcGain(param, sum_gradients.GetGrad(), sum_gradients.GetHess()); CalcGain(param, sum_gradients.GetGrad(), sum_gradients.GetHess());
@ -194,7 +192,7 @@ struct DeviceNodeStats {
} }
HOST_DEV_INLINE void SetSplit(float fvalue, int fidx, DefaultDirection dir, HOST_DEV_INLINE void SetSplit(float fvalue, int fidx, DefaultDirection dir,
bst_gpair left_sum, bst_gpair right_sum) { GradientPair left_sum, GradientPair right_sum) {
this->fvalue = fvalue; this->fvalue = fvalue;
this->fidx = fidx; this->fidx = fidx;
this->dir = dir; this->dir = dir;
@ -208,11 +206,11 @@ struct DeviceNodeStats {
} }
/** Tells whether this node is part of the decision tree */ /** Tells whether this node is part of the decision tree */
HOST_DEV_INLINE bool IsUnused() const { return (idx == UNUSED_NODE); } HOST_DEV_INLINE bool IsUnused() const { return (idx == kUnusedNode); }
/** Tells whether this node is a leaf of the decision tree */ /** Tells whether this node is a leaf of the decision tree */
HOST_DEV_INLINE bool IsLeaf() const { HOST_DEV_INLINE bool IsLeaf() const {
return (!IsUnused() && (fidx == UNUSED_NODE)); return (!IsUnused() && (fidx == kUnusedNode));
} }
}; };
@ -221,37 +219,37 @@ struct SumCallbackOp {
// Running prefix // Running prefix
T running_total; T running_total;
// Constructor // Constructor
__device__ SumCallbackOp() : running_total(T()) {} XGBOOST_DEVICE SumCallbackOp() : running_total(T()) {}
__device__ T operator()(T block_aggregate) { XGBOOST_DEVICE T operator()(T block_aggregate) {
T old_prefix = running_total; T old_prefix = running_total;
running_total += block_aggregate; running_total += block_aggregate;
return old_prefix; return old_prefix;
} }
}; };
template <typename gpair_t> template <typename GradientPairT>
__device__ inline float device_calc_loss_chg(const GPUTrainingParam& param, XGBOOST_DEVICE inline float DeviceCalcLossChange(const GPUTrainingParam& param,
const gpair_t& left, const GradientPairT& left,
const gpair_t& parent_sum, const GradientPairT& parent_sum,
const float& parent_gain) { const float& parent_gain) {
gpair_t right = parent_sum - left; GradientPairT right = parent_sum - left;
float left_gain = CalcGain(param, left.GetGrad(), left.GetHess()); float left_gain = CalcGain(param, left.GetGrad(), left.GetHess());
float right_gain = CalcGain(param, right.GetGrad(), right.GetHess()); float right_gain = CalcGain(param, right.GetGrad(), right.GetHess());
return left_gain + right_gain - parent_gain; return left_gain + right_gain - parent_gain;
} }
// Without constraints // Without constraints
template <typename gpair_t> template <typename GradientPairT>
__device__ float inline loss_chg_missing(const gpair_t& scan, XGBOOST_DEVICE float inline LossChangeMissing(const GradientPairT& scan,
const gpair_t& missing, const GradientPairT& missing,
const gpair_t& parent_sum, const GradientPairT& parent_sum,
const float& parent_gain, const float& parent_gain,
const GPUTrainingParam& param, const GPUTrainingParam& param,
bool& missing_left_out) { // NOLINT bool& missing_left_out) { // NOLINT
float missing_left_loss = float missing_left_loss =
device_calc_loss_chg(param, scan + missing, parent_sum, parent_gain); DeviceCalcLossChange(param, scan + missing, parent_sum, parent_gain);
float missing_right_loss = float missing_right_loss =
device_calc_loss_chg(param, scan, parent_sum, parent_gain); DeviceCalcLossChange(param, scan, parent_sum, parent_gain);
if (missing_left_loss >= missing_right_loss) { if (missing_left_loss >= missing_right_loss) {
missing_left_out = true; missing_left_out = true;
@ -263,9 +261,9 @@ __device__ float inline loss_chg_missing(const gpair_t& scan,
} }
// With constraints // With constraints
template <typename gpair_t> template <typename GradientPairT>
__device__ float inline loss_chg_missing( XGBOOST_DEVICE float inline LossChangeMissing(
const gpair_t& scan, const gpair_t& missing, const gpair_t& parent_sum, const GradientPairT& scan, const GradientPairT& missing, const GradientPairT& parent_sum,
const float& parent_gain, const GPUTrainingParam& param, int constraint, const float& parent_gain, const GPUTrainingParam& param, int constraint,
const ValueConstraint& value_constraint, const ValueConstraint& value_constraint,
bool& missing_left_out) { // NOLINT bool& missing_left_out) { // NOLINT
@ -285,54 +283,54 @@ __device__ float inline loss_chg_missing(
} }
// Total number of nodes in tree, given depth // Total number of nodes in tree, given depth
__host__ __device__ inline int n_nodes(int depth) { XGBOOST_DEVICE inline int MaxNodesDepth(int depth) {
return (1 << (depth + 1)) - 1; return (1 << (depth + 1)) - 1;
} }
// Number of nodes at this level of the tree // Number of nodes at this level of the tree
__host__ __device__ inline int n_nodes_level(int depth) { return 1 << depth; } XGBOOST_DEVICE inline int MaxNodesLevel(int depth) { return 1 << depth; }
// Whether a node is currently being processed at current depth // Whether a node is currently being processed at current depth
__host__ __device__ inline bool is_active(int nidx, int depth) { XGBOOST_DEVICE inline bool IsNodeActive(int nidx, int depth) {
return nidx >= n_nodes(depth - 1); return nidx >= MaxNodesDepth(depth - 1);
} }
__host__ __device__ inline int parent_nidx(int nidx) { return (nidx - 1) / 2; } XGBOOST_DEVICE inline int ParentNodeIdx(int nidx) { return (nidx - 1) / 2; }
__host__ __device__ inline int left_child_nidx(int nidx) { XGBOOST_DEVICE inline int LeftChildNodeIdx(int nidx) {
return nidx * 2 + 1; return nidx * 2 + 1;
} }
__host__ __device__ inline int right_child_nidx(int nidx) { XGBOOST_DEVICE inline int RightChildNodeIdx(int nidx) {
return nidx * 2 + 2; return nidx * 2 + 2;
} }
__host__ __device__ inline bool is_left_child(int nidx) { XGBOOST_DEVICE inline bool IsLeftChild(int nidx) {
return nidx % 2 == 1; return nidx % 2 == 1;
} }
// Copy gpu dense representation of tree to xgboost sparse representation // Copy gpu dense representation of tree to xgboost sparse representation
inline void dense2sparse_tree(RegTree* p_tree, inline void Dense2SparseTree(RegTree* p_tree,
const dh::dvec<DeviceNodeStats>& nodes, const dh::DVec<DeviceNodeStats>& nodes,
const TrainParam& param) { const TrainParam& param) {
RegTree& tree = *p_tree; RegTree& tree = *p_tree;
std::vector<DeviceNodeStats> h_nodes = nodes.as_vector(); std::vector<DeviceNodeStats> h_nodes = nodes.AsVector();
int nid = 0; int nid = 0;
for (int gpu_nid = 0; gpu_nid < h_nodes.size(); gpu_nid++) { for (int gpu_nid = 0; gpu_nid < h_nodes.size(); gpu_nid++) {
const DeviceNodeStats& n = h_nodes[gpu_nid]; const DeviceNodeStats& n = h_nodes[gpu_nid];
if (!n.IsUnused() && !n.IsLeaf()) { if (!n.IsUnused() && !n.IsLeaf()) {
tree.AddChilds(nid); tree.AddChilds(nid);
tree[nid].set_split(n.fidx, n.fvalue, n.dir == LeftDir); tree[nid].SetSplit(n.fidx, n.fvalue, n.dir == kLeftDir);
tree.stat(nid).loss_chg = n.root_gain; tree.Stat(nid).loss_chg = n.root_gain;
tree.stat(nid).base_weight = n.weight; tree.Stat(nid).base_weight = n.weight;
tree.stat(nid).sum_hess = n.sum_gradients.GetHess(); tree.Stat(nid).sum_hess = n.sum_gradients.GetHess();
tree[tree[nid].cleft()].set_leaf(0); tree[tree[nid].LeftChild()].SetLeaf(0);
tree[tree[nid].cright()].set_leaf(0); tree[tree[nid].RightChild()].SetLeaf(0);
nid++; nid++;
} else if (n.IsLeaf()) { } else if (n.IsLeaf()) {
tree[nid].set_leaf(n.weight * param.learning_rate); tree[nid].SetLeaf(n.weight * param.learning_rate);
tree.stat(nid).sum_hess = n.sum_gradients.GetHess(); tree.Stat(nid).sum_hess = n.sum_gradients.GetHess();
nid++; nid++;
} }
} }
@ -346,11 +344,11 @@ struct BernoulliRng {
float p; float p;
uint32_t seed; uint32_t seed;
__host__ __device__ BernoulliRng(float p, size_t seed_) : p(p) { XGBOOST_DEVICE BernoulliRng(float p, size_t seed_) : p(p) {
seed = static_cast<uint32_t>(seed_); seed = static_cast<uint32_t>(seed_);
} }
__host__ __device__ bool operator()(const int i) const { XGBOOST_DEVICE bool operator()(const int i) const {
thrust::default_random_engine rng(seed); thrust::default_random_engine rng(seed);
thrust::uniform_real_distribution<float> dist; thrust::uniform_real_distribution<float> dist;
rng.discard(i); rng.discard(i);
@ -359,25 +357,25 @@ struct BernoulliRng {
}; };
// Set gradient pair to 0 with p = 1 - subsample // Set gradient pair to 0 with p = 1 - subsample
inline void subsample_gpair(dh::dvec<bst_gpair>* p_gpair, float subsample, inline void SubsampleGradientPair(dh::DVec<GradientPair>* p_gpair, float subsample,
int offset = 0) { int offset = 0) {
if (subsample == 1.0) { if (subsample == 1.0) {
return; return;
} }
dh::dvec<bst_gpair>& gpair = *p_gpair; dh::DVec<GradientPair>& gpair = *p_gpair;
auto d_gpair = gpair.data(); auto d_gpair = gpair.Data();
BernoulliRng rng(subsample, common::GlobalRandom()()); BernoulliRng rng(subsample, common::GlobalRandom()());
dh::launch_n(gpair.device_idx(), gpair.size(), [=] __device__(int i) { dh::LaunchN(gpair.DeviceIdx(), gpair.Size(), [=] XGBOOST_DEVICE(int i) {
if (!rng(i + offset)) { if (!rng(i + offset)) {
d_gpair[i] = bst_gpair(); d_gpair[i] = GradientPair();
} }
}); });
} }
inline std::vector<int> col_sample(std::vector<int> features, float colsample) { inline std::vector<int> ColSample(std::vector<int> features, float colsample) {
CHECK_GT(features.size(), 0); CHECK_GT(features.size(), 0);
int n = std::max(1, static_cast<int>(colsample * features.size())); int n = std::max(1, static_cast<int>(colsample * features.size()));
@ -397,9 +395,9 @@ inline std::vector<int> col_sample(std::vector<int> features, float colsample) {
*/ */
class ColumnSampler { class ColumnSampler {
std::vector<int> feature_set_tree; std::vector<int> feature_set_tree_;
std::map<int, std::vector<int>> feature_set_level; std::map<int, std::vector<int>> feature_set_level_;
TrainParam param; TrainParam param_;
public: public:
/** /**
@ -413,10 +411,10 @@ class ColumnSampler {
void Init(int64_t num_col, const TrainParam& param) { void Init(int64_t num_col, const TrainParam& param) {
this->Reset(); this->Reset();
this->param = param; this->param_ = param;
feature_set_tree.resize(num_col); feature_set_tree_.resize(num_col);
std::iota(feature_set_tree.begin(), feature_set_tree.end(), 0); std::iota(feature_set_tree_.begin(), feature_set_tree_.end(), 0);
feature_set_tree = col_sample(feature_set_tree, param.colsample_bytree); feature_set_tree_ = ColSample(feature_set_tree_, param.colsample_bytree);
} }
/** /**
@ -426,8 +424,8 @@ class ColumnSampler {
*/ */
void Reset() { void Reset() {
feature_set_tree.clear(); feature_set_tree_.clear();
feature_set_level.clear(); feature_set_level_.clear();
} }
/** /**
@ -442,13 +440,13 @@ class ColumnSampler {
*/ */
bool ColumnUsed(int column, int depth) { bool ColumnUsed(int column, int depth) {
if (feature_set_level.count(depth) == 0) { if (feature_set_level_.count(depth) == 0) {
feature_set_level[depth] = feature_set_level_[depth] =
col_sample(feature_set_tree, param.colsample_bylevel); ColSample(feature_set_tree_, param_.colsample_bylevel);
} }
return std::binary_search(feature_set_level[depth].begin(), return std::binary_search(feature_set_level_[depth].begin(),
feature_set_level[depth].end(), column); feature_set_level_[depth].end(), column);
} }
}; };

View File

@ -23,22 +23,22 @@ namespace tree {
DMLC_REGISTRY_FILE_TAG(updater_gpu_hist); DMLC_REGISTRY_FILE_TAG(updater_gpu_hist);
typedef bst_gpair_precise gpair_sum_t; using GradientPairSumT = GradientPairPrecise;
template <int BLOCK_THREADS, typename reduce_t, typename temp_storage_t> template <int BLOCK_THREADS, typename ReduceT, typename TempStorageT>
__device__ gpair_sum_t ReduceFeature(const gpair_sum_t* begin, __device__ GradientPairSumT ReduceFeature(const GradientPairSumT* begin,
const gpair_sum_t* end, const GradientPairSumT* end,
temp_storage_t* temp_storage) { TempStorageT* temp_storage) {
__shared__ cub::Uninitialized<gpair_sum_t> uninitialized_sum; __shared__ cub::Uninitialized<GradientPairSumT> uninitialized_sum;
gpair_sum_t& shared_sum = uninitialized_sum.Alias(); GradientPairSumT& shared_sum = uninitialized_sum.Alias();
gpair_sum_t local_sum = gpair_sum_t(); GradientPairSumT local_sum = GradientPairSumT();
for (auto itr = begin; itr < end; itr += BLOCK_THREADS) { for (auto itr = begin; itr < end; itr += BLOCK_THREADS) {
bool thread_active = itr + threadIdx.x < end; bool thread_active = itr + threadIdx.x < end;
// Scan histogram // Scan histogram
gpair_sum_t bin = thread_active ? *(itr + threadIdx.x) : gpair_sum_t(); GradientPairSumT bin = thread_active ? *(itr + threadIdx.x) : GradientPairSumT();
local_sum += reduce_t(temp_storage->sum_reduce).Reduce(bin, cub::Sum()); local_sum += ReduceT(temp_storage->sum_reduce).Reduce(bin, cub::Sum());
} }
if (threadIdx.x == 0) { if (threadIdx.x == 0) {
@ -49,41 +49,41 @@ __device__ gpair_sum_t ReduceFeature(const gpair_sum_t* begin,
return shared_sum; return shared_sum;
} }
template <int BLOCK_THREADS, typename reduce_t, typename scan_t, template <int BLOCK_THREADS, typename ReduceT, typename scan_t,
typename max_reduce_t, typename temp_storage_t> typename max_ReduceT, typename TempStorageT>
__device__ void EvaluateFeature(int fidx, const gpair_sum_t* hist, __device__ void EvaluateFeature(int fidx, const GradientPairSumT* hist,
const int* feature_segments, float min_fvalue, const int* feature_segments, float min_fvalue,
const float* gidx_fvalue_map, const float* gidx_fvalue_map,
DeviceSplitCandidate* best_split, DeviceSplitCandidate* best_split,
const DeviceNodeStats& node, const DeviceNodeStats& node,
const GPUTrainingParam& param, const GPUTrainingParam& param,
temp_storage_t* temp_storage, int constraint, TempStorageT* temp_storage, int constraint,
const ValueConstraint& value_constraint) { const ValueConstraint& value_constraint) {
int gidx_begin = feature_segments[fidx]; int gidx_begin = feature_segments[fidx];
int gidx_end = feature_segments[fidx + 1]; int gidx_end = feature_segments[fidx + 1];
gpair_sum_t feature_sum = ReduceFeature<BLOCK_THREADS, reduce_t>( GradientPairSumT feature_sum = ReduceFeature<BLOCK_THREADS, ReduceT>(
hist + gidx_begin, hist + gidx_end, temp_storage); hist + gidx_begin, hist + gidx_end, temp_storage);
auto prefix_op = SumCallbackOp<gpair_sum_t>(); auto prefix_op = SumCallbackOp<GradientPairSumT>();
for (int scan_begin = gidx_begin; scan_begin < gidx_end; for (int scan_begin = gidx_begin; scan_begin < gidx_end;
scan_begin += BLOCK_THREADS) { scan_begin += BLOCK_THREADS) {
bool thread_active = scan_begin + threadIdx.x < gidx_end; bool thread_active = scan_begin + threadIdx.x < gidx_end;
gpair_sum_t bin = GradientPairSumT bin =
thread_active ? hist[scan_begin + threadIdx.x] : gpair_sum_t(); thread_active ? hist[scan_begin + threadIdx.x] : GradientPairSumT();
scan_t(temp_storage->scan).ExclusiveScan(bin, bin, cub::Sum(), prefix_op); scan_t(temp_storage->scan).ExclusiveScan(bin, bin, cub::Sum(), prefix_op);
// Calculate gain // Calculate gain
gpair_sum_t parent_sum = gpair_sum_t(node.sum_gradients); GradientPairSumT parent_sum = GradientPairSumT(node.sum_gradients);
gpair_sum_t missing = parent_sum - feature_sum; GradientPairSumT missing = parent_sum - feature_sum;
bool missing_left = true; bool missing_left = true;
const float null_gain = -FLT_MAX; const float null_gain = -FLT_MAX;
float gain = null_gain; float gain = null_gain;
if (thread_active) { if (thread_active) {
gain = loss_chg_missing(bin, missing, parent_sum, node.root_gain, param, gain = LossChangeMissing(bin, missing, parent_sum, node.root_gain, param,
constraint, value_constraint, missing_left); constraint, value_constraint, missing_left);
} }
@ -92,7 +92,7 @@ __device__ void EvaluateFeature(int fidx, const gpair_sum_t* hist,
// Find thread with best gain // Find thread with best gain
cub::KeyValuePair<int, float> tuple(threadIdx.x, gain); cub::KeyValuePair<int, float> tuple(threadIdx.x, gain);
cub::KeyValuePair<int, float> best = cub::KeyValuePair<int, float> best =
max_reduce_t(temp_storage->max_reduce).Reduce(tuple, cub::ArgMax()); max_ReduceT(temp_storage->max_reduce).Reduce(tuple, cub::ArgMax());
__shared__ cub::KeyValuePair<int, float> block_max; __shared__ cub::KeyValuePair<int, float> block_max;
if (threadIdx.x == 0) { if (threadIdx.x == 0) {
@ -107,11 +107,11 @@ __device__ void EvaluateFeature(int fidx, const gpair_sum_t* hist,
float fvalue = float fvalue =
gidx == gidx_begin ? min_fvalue : gidx_fvalue_map[gidx - 1]; gidx == gidx_begin ? min_fvalue : gidx_fvalue_map[gidx - 1];
gpair_sum_t left = missing_left ? bin + missing : bin; GradientPairSumT left = missing_left ? bin + missing : bin;
gpair_sum_t right = parent_sum - left; GradientPairSumT right = parent_sum - left;
best_split->Update(gain, missing_left ? LeftDir : RightDir, fvalue, fidx, best_split->Update(gain, missing_left ? kLeftDir : kRightDir, fvalue, fidx,
left, right, param); GradientPair(left), GradientPair(right), param);
} }
__syncthreads(); __syncthreads();
} }
@ -119,17 +119,17 @@ __device__ void EvaluateFeature(int fidx, const gpair_sum_t* hist,
template <int BLOCK_THREADS> template <int BLOCK_THREADS>
__global__ void evaluate_split_kernel( __global__ void evaluate_split_kernel(
const gpair_sum_t* d_hist, int nidx, uint64_t n_features, const GradientPairSumT* d_hist, int nidx, uint64_t n_features,
DeviceNodeStats nodes, const int* d_feature_segments, DeviceNodeStats nodes, const int* d_feature_segments,
const float* d_fidx_min_map, const float* d_gidx_fvalue_map, const float* d_fidx_min_map, const float* d_gidx_fvalue_map,
GPUTrainingParam gpu_param, DeviceSplitCandidate* d_split, GPUTrainingParam gpu_param, DeviceSplitCandidate* d_split,
ValueConstraint value_constraint, int* d_monotonic_constraints) { ValueConstraint value_constraint, int* d_monotonic_constraints) {
typedef cub::KeyValuePair<int, float> ArgMaxT; typedef cub::KeyValuePair<int, float> ArgMaxT;
typedef cub::BlockScan<gpair_sum_t, BLOCK_THREADS, cub::BLOCK_SCAN_WARP_SCANS> typedef cub::BlockScan<GradientPairSumT, BLOCK_THREADS, cub::BLOCK_SCAN_WARP_SCANS>
BlockScanT; BlockScanT;
typedef cub::BlockReduce<ArgMaxT, BLOCK_THREADS> MaxReduceT; typedef cub::BlockReduce<ArgMaxT, BLOCK_THREADS> MaxReduceT;
typedef cub::BlockReduce<gpair_sum_t, BLOCK_THREADS> SumReduceT; typedef cub::BlockReduce<GradientPairSumT, BLOCK_THREADS> SumReduceT;
union TempStorage { union TempStorage {
typename BlockScanT::TempStorage scan; typename BlockScanT::TempStorage scan;
@ -163,8 +163,8 @@ __global__ void evaluate_split_kernel(
} }
// Find a gidx value for a given feature otherwise return -1 if not found // Find a gidx value for a given feature otherwise return -1 if not found
template <typename gidx_iter_t> template <typename GidxIterT>
__device__ int BinarySearchRow(bst_uint begin, bst_uint end, gidx_iter_t data, __device__ int BinarySearchRow(bst_uint begin, bst_uint end, GidxIterT data,
int fidx_begin, int fidx_end) { int fidx_begin, int fidx_end) {
bst_uint previous_middle = UINT32_MAX; bst_uint previous_middle = UINT32_MAX;
while (end != begin) { while (end != begin) {
@ -189,19 +189,19 @@ __device__ int BinarySearchRow(bst_uint begin, bst_uint end, gidx_iter_t data,
} }
struct DeviceHistogram { struct DeviceHistogram {
dh::bulk_allocator<dh::memory_type::DEVICE> ba; dh::BulkAllocator<dh::MemoryType::kDevice> ba;
dh::dvec<gpair_sum_t> data; dh::DVec<GradientPairSumT> data;
int n_bins; int n_bins;
void Init(int device_idx, int max_nodes, int n_bins, bool silent) { void Init(int device_idx, int max_nodes, int n_bins, bool silent) {
this->n_bins = n_bins; this->n_bins = n_bins;
ba.allocate(device_idx, silent, &data, size_t(max_nodes) * size_t(n_bins)); ba.Allocate(device_idx, silent, &data, size_t(max_nodes) * size_t(n_bins));
} }
void Reset() { data.fill(gpair_sum_t()); } void Reset() { data.Fill(GradientPairSumT()); }
gpair_sum_t* GetHistPtr(int nidx) { return data.data() + nidx * n_bins; } GradientPairSumT* GetHistPtr(int nidx) { return data.Data() + nidx * n_bins; }
void PrintNidx(int nidx) const { void PrintNidx(int nidx) const {
auto h_data = data.as_vector(); auto h_data = data.AsVector();
std::cout << "nidx " << nidx << ":\n"; std::cout << "nidx " << nidx << ":\n";
for (int i = n_bins * nidx; i < n_bins * (nidx + 1); i++) { for (int i = n_bins * nidx; i < n_bins * (nidx + 1); i++) {
std::cout << h_data[i] << " "; std::cout << h_data[i] << " ";
@ -216,7 +216,7 @@ struct CalcWeightTrainParam {
float reg_lambda; float reg_lambda;
float max_delta_step; float max_delta_step;
float learning_rate; float learning_rate;
__host__ __device__ CalcWeightTrainParam(const TrainParam& p) XGBOOST_DEVICE explicit CalcWeightTrainParam(const TrainParam& p)
: min_child_weight(p.min_child_weight), : min_child_weight(p.min_child_weight),
reg_alpha(p.reg_alpha), reg_alpha(p.reg_alpha),
reg_lambda(p.reg_lambda), reg_lambda(p.reg_lambda),
@ -240,19 +240,19 @@ struct DeviceShard {
int device_idx; int device_idx;
int normalised_device_idx; // Device index counting from param.gpu_id int normalised_device_idx; // Device index counting from param.gpu_id
dh::bulk_allocator<dh::memory_type::DEVICE> ba; dh::BulkAllocator<dh::MemoryType::kDevice> ba;
dh::dvec<common::compressed_byte_t> gidx_buffer; dh::DVec<common::CompressedByteT> gidx_buffer;
dh::dvec<bst_gpair> gpair; dh::DVec<GradientPair> gpair;
dh::dvec2<bst_uint> ridx; // Row index relative to this shard dh::DVec2<bst_uint> ridx; // Row index relative to this shard
dh::dvec2<int> position; dh::DVec2<int> position;
std::vector<Segment> ridx_segments; std::vector<Segment> ridx_segments;
dh::dvec<int> feature_segments; dh::DVec<int> feature_segments;
dh::dvec<float> gidx_fvalue_map; dh::DVec<float> gidx_fvalue_map;
dh::dvec<float> min_fvalue; dh::DVec<float> min_fvalue;
dh::dvec<int> monotone_constraints; dh::DVec<int> monotone_constraints;
dh::dvec<bst_float> prediction_cache; dh::DVec<bst_float> prediction_cache;
std::vector<bst_gpair> node_sum_gradients; std::vector<GradientPair> node_sum_gradients;
dh::dvec<bst_gpair> node_sum_gradients_d; dh::DVec<GradientPair> node_sum_gradients_d;
common::CompressedIterator<uint32_t> gidx; common::CompressedIterator<uint32_t> gidx;
int row_stride; int row_stride;
bst_uint row_begin_idx; // The row offset for this shard bst_uint row_begin_idx; // The row offset for this shard
@ -311,8 +311,8 @@ struct DeviceShard {
<< "Max leaves and max depth cannot both be unconstrained for " << "Max leaves and max depth cannot both be unconstrained for "
"gpu_hist."; "gpu_hist.";
int max_nodes = int max_nodes =
param.max_leaves > 0 ? param.max_leaves * 2 : n_nodes(param.max_depth); param.max_leaves > 0 ? param.max_leaves * 2 : MaxNodesDepth(param.max_depth);
ba.allocate(device_idx, param.silent, &gidx_buffer, compressed_size_bytes, ba.Allocate(device_idx, param.silent, &gidx_buffer, compressed_size_bytes,
&gpair, n_rows, &ridx, n_rows, &position, n_rows, &gpair, n_rows, &ridx, n_rows, &position, n_rows,
&prediction_cache, n_rows, &node_sum_gradients_d, max_nodes, &prediction_cache, n_rows, &node_sum_gradients_d, max_nodes,
&feature_segments, gmat.cut->row_ptr.size(), &gidx_fvalue_map, &feature_segments, gmat.cut->row_ptr.size(), &gidx_fvalue_map,
@ -328,11 +328,11 @@ struct DeviceShard {
// Compress gidx // Compress gidx
common::CompressedBufferWriter cbw(num_symbols); common::CompressedBufferWriter cbw(num_symbols);
std::vector<common::compressed_byte_t> host_buffer(gidx_buffer.size()); std::vector<common::CompressedByteT> host_buffer(gidx_buffer.Size());
cbw.Write(host_buffer.data(), ellpack_matrix.begin(), ellpack_matrix.end()); cbw.Write(host_buffer.data(), ellpack_matrix.begin(), ellpack_matrix.end());
gidx_buffer = host_buffer; gidx_buffer = host_buffer;
gidx = gidx =
common::CompressedIterator<uint32_t>(gidx_buffer.data(), num_symbols); common::CompressedIterator<uint32_t>(gidx_buffer.Data(), num_symbols);
common::CompressedIterator<uint32_t> ci_host(host_buffer.data(), common::CompressedIterator<uint32_t> ci_host(host_buffer.data(),
num_symbols); num_symbols);
@ -369,19 +369,19 @@ struct DeviceShard {
} }
// Reset values for each update iteration // Reset values for each update iteration
void Reset(HostDeviceVector<bst_gpair>* dh_gpair, int device) { void Reset(HostDeviceVector<GradientPair>* dh_gpair, int device) {
auto begin = dh_gpair->tbegin(device); auto begin = dh_gpair->tbegin(device);
dh::safe_cuda(cudaSetDevice(device_idx)); dh::safe_cuda(cudaSetDevice(device_idx));
position.current_dvec().fill(0); position.CurrentDVec().Fill(0);
std::fill(node_sum_gradients.begin(), node_sum_gradients.end(), std::fill(node_sum_gradients.begin(), node_sum_gradients.end(),
bst_gpair()); GradientPair());
thrust::sequence(ridx.current_dvec().tbegin(), ridx.current_dvec().tend()); thrust::sequence(ridx.CurrentDVec().tbegin(), ridx.CurrentDVec().tend());
std::fill(ridx_segments.begin(), ridx_segments.end(), Segment(0, 0)); std::fill(ridx_segments.begin(), ridx_segments.end(), Segment(0, 0));
ridx_segments.front() = Segment(0, ridx.size()); ridx_segments.front() = Segment(0, ridx.Size());
this->gpair.copy(begin + row_begin_idx, begin + row_end_idx); this->gpair.copy(begin + row_begin_idx, begin + row_end_idx);
subsample_gpair(&gpair, param.subsample, row_begin_idx); SubsampleGradientPair(&gpair, param.subsample, row_begin_idx);
hist.Reset(); hist.Reset();
} }
@ -389,13 +389,13 @@ struct DeviceShard {
auto segment = ridx_segments[nidx]; auto segment = ridx_segments[nidx];
auto d_node_hist = hist.GetHistPtr(nidx); auto d_node_hist = hist.GetHistPtr(nidx);
auto d_gidx = gidx; auto d_gidx = gidx;
auto d_ridx = ridx.current(); auto d_ridx = ridx.Current();
auto d_gpair = gpair.data(); auto d_gpair = gpair.Data();
auto row_stride = this->row_stride; auto row_stride = this->row_stride;
auto null_gidx_value = this->null_gidx_value; auto null_gidx_value = this->null_gidx_value;
auto n_elements = segment.Size() * row_stride; auto n_elements = segment.Size() * row_stride;
dh::launch_n(device_idx, n_elements, [=] __device__(size_t idx) { dh::LaunchN(device_idx, n_elements, [=] __device__(size_t idx) {
int ridx = d_ridx[(idx / row_stride) + segment.begin]; int ridx = d_ridx[(idx / row_stride) + segment.begin];
int gidx = d_gidx[ridx * row_stride + idx % row_stride]; int gidx = d_gidx[ridx * row_stride + idx % row_stride];
@ -410,7 +410,7 @@ struct DeviceShard {
auto d_node_hist_histogram = hist.GetHistPtr(nidx_histogram); auto d_node_hist_histogram = hist.GetHistPtr(nidx_histogram);
auto d_node_hist_subtraction = hist.GetHistPtr(nidx_subtraction); auto d_node_hist_subtraction = hist.GetHistPtr(nidx_subtraction);
dh::launch_n(device_idx, hist.n_bins, [=] __device__(size_t idx) { dh::LaunchN(device_idx, hist.n_bins, [=] __device__(size_t idx) {
d_node_hist_subtraction[idx] = d_node_hist_subtraction[idx] =
d_node_hist_parent[idx] - d_node_hist_histogram[idx]; d_node_hist_parent[idx] - d_node_hist_histogram[idx];
}); });
@ -432,11 +432,11 @@ struct DeviceShard {
auto d_left_count = temp_memory.Pointer<int64_t>(); auto d_left_count = temp_memory.Pointer<int64_t>();
dh::safe_cuda(cudaMemset(d_left_count, 0, sizeof(int64_t))); dh::safe_cuda(cudaMemset(d_left_count, 0, sizeof(int64_t)));
auto segment = ridx_segments[nidx]; auto segment = ridx_segments[nidx];
auto d_ridx = ridx.current(); auto d_ridx = ridx.Current();
auto d_position = position.current(); auto d_position = position.Current();
auto d_gidx = gidx; auto d_gidx = gidx;
auto row_stride = this->row_stride; auto row_stride = this->row_stride;
dh::launch_n<1, 512>( dh::LaunchN<1, 512>(
device_idx, segment.Size(), [=] __device__(bst_uint idx) { device_idx, segment.Size(), [=] __device__(bst_uint idx) {
idx += segment.begin; idx += segment.begin;
auto ridx = d_ridx[idx]; auto ridx = d_ridx[idx];
@ -482,22 +482,22 @@ struct DeviceShard {
size_t temp_storage_bytes = 0; size_t temp_storage_bytes = 0;
cub::DeviceRadixSort::SortPairs( cub::DeviceRadixSort::SortPairs(
nullptr, temp_storage_bytes, position.current() + segment.begin, nullptr, temp_storage_bytes, position.Current() + segment.begin,
position.other() + segment.begin, ridx.current() + segment.begin, position.other() + segment.begin, ridx.Current() + segment.begin,
ridx.other() + segment.begin, segment.Size(), min_bits, max_bits); ridx.other() + segment.begin, segment.Size(), min_bits, max_bits);
temp_memory.LazyAllocate(temp_storage_bytes); temp_memory.LazyAllocate(temp_storage_bytes);
cub::DeviceRadixSort::SortPairs( cub::DeviceRadixSort::SortPairs(
temp_memory.d_temp_storage, temp_memory.temp_storage_bytes, temp_memory.d_temp_storage, temp_memory.temp_storage_bytes,
position.current() + segment.begin, position.other() + segment.begin, position.Current() + segment.begin, position.other() + segment.begin,
ridx.current() + segment.begin, ridx.other() + segment.begin, ridx.Current() + segment.begin, ridx.other() + segment.begin,
segment.Size(), min_bits, max_bits); segment.Size(), min_bits, max_bits);
dh::safe_cuda(cudaMemcpy( dh::safe_cuda(cudaMemcpy(
position.current() + segment.begin, position.other() + segment.begin, position.Current() + segment.begin, position.other() + segment.begin,
segment.Size() * sizeof(int), cudaMemcpyDeviceToDevice)); segment.Size() * sizeof(int), cudaMemcpyDeviceToDevice));
dh::safe_cuda(cudaMemcpy( dh::safe_cuda(cudaMemcpy(
ridx.current() + segment.begin, ridx.other() + segment.begin, ridx.Current() + segment.begin, ridx.other() + segment.begin,
segment.Size() * sizeof(bst_uint), cudaMemcpyDeviceToDevice)); segment.Size() * sizeof(bst_uint), cudaMemcpyDeviceToDevice));
} }
@ -505,8 +505,8 @@ struct DeviceShard {
dh::safe_cuda(cudaSetDevice(device_idx)); dh::safe_cuda(cudaSetDevice(device_idx));
if (!prediction_cache_initialised) { if (!prediction_cache_initialised) {
dh::safe_cuda(cudaMemcpy( dh::safe_cuda(cudaMemcpy(
prediction_cache.data(), &out_preds_d[row_begin_idx], prediction_cache.Data(), &out_preds_d[row_begin_idx],
prediction_cache.size() * sizeof(bst_float), cudaMemcpyDefault)); prediction_cache.Size() * sizeof(bst_float), cudaMemcpyDefault));
} }
prediction_cache_initialised = true; prediction_cache_initialised = true;
@ -514,13 +514,13 @@ struct DeviceShard {
thrust::copy(node_sum_gradients.begin(), node_sum_gradients.end(), thrust::copy(node_sum_gradients.begin(), node_sum_gradients.end(),
node_sum_gradients_d.tbegin()); node_sum_gradients_d.tbegin());
auto d_position = position.current(); auto d_position = position.Current();
auto d_ridx = ridx.current(); auto d_ridx = ridx.Current();
auto d_node_sum_gradients = node_sum_gradients_d.data(); auto d_node_sum_gradients = node_sum_gradients_d.Data();
auto d_prediction_cache = prediction_cache.data(); auto d_prediction_cache = prediction_cache.Data();
dh::launch_n( dh::LaunchN(
device_idx, prediction_cache.size(), [=] __device__(int local_idx) { device_idx, prediction_cache.Size(), [=] __device__(int local_idx) {
int pos = d_position[local_idx]; int pos = d_position[local_idx];
bst_float weight = CalcWeight(param_d, d_node_sum_gradients[pos]); bst_float weight = CalcWeight(param_d, d_node_sum_gradients[pos]);
d_prediction_cache[d_ridx[local_idx]] += d_prediction_cache[d_ridx[local_idx]] +=
@ -528,8 +528,8 @@ struct DeviceShard {
}); });
dh::safe_cuda(cudaMemcpy( dh::safe_cuda(cudaMemcpy(
&out_preds_d[row_begin_idx], prediction_cache.data(), &out_preds_d[row_begin_idx], prediction_cache.Data(),
prediction_cache.size() * sizeof(bst_float), cudaMemcpyDefault)); prediction_cache.Size() * sizeof(bst_float), cudaMemcpyDefault));
} }
}; };
@ -537,33 +537,32 @@ class GPUHistMaker : public TreeUpdater {
public: public:
struct ExpandEntry; struct ExpandEntry;
GPUHistMaker() : initialised(false), p_last_fmat_(nullptr) {} GPUHistMaker() : initialised_(false), p_last_fmat_(nullptr) {}
~GPUHistMaker() {}
void Init( void Init(
const std::vector<std::pair<std::string, std::string>>& args) override { const std::vector<std::pair<std::string, std::string>>& args) override {
param.InitAllowUnknown(args); param_.InitAllowUnknown(args);
CHECK(param.n_gpus != 0) << "Must have at least one device"; CHECK(param_.n_gpus != 0) << "Must have at least one device";
n_devices = param.n_gpus; n_devices_ = param_.n_gpus;
dh::check_compute_capability(); dh::CheckComputeCapability();
if (param.grow_policy == TrainParam::kLossGuide) { if (param_.grow_policy == TrainParam::kLossGuide) {
qexpand_.reset(new ExpandQueue(loss_guide)); qexpand_.reset(new ExpandQueue(LossGuide));
} else { } else {
qexpand_.reset(new ExpandQueue(depth_wise)); qexpand_.reset(new ExpandQueue(DepthWise));
} }
monitor.Init("updater_gpu_hist", param.debug_verbose); monitor_.Init("updater_gpu_hist", param_.debug_verbose);
} }
void Update(HostDeviceVector<bst_gpair>* gpair, DMatrix* dmat, void Update(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
const std::vector<RegTree*>& trees) override { const std::vector<RegTree*>& trees) override {
monitor.Start("Update", dList); monitor_.Start("Update", device_list_);
GradStats::CheckInfo(dmat->info()); GradStats::CheckInfo(dmat->Info());
// rescale learning rate according to size of trees // rescale learning rate according to size of trees
float lr = param.learning_rate; float lr = param_.learning_rate;
param.learning_rate = lr / trees.size(); param_.learning_rate = lr / trees.size();
ValueConstraint::Init(&param, dmat->info().num_col); ValueConstraint::Init(&param_, dmat->Info().num_col_);
// build tree // build tree
try { try {
for (size_t i = 0; i < trees.size(); ++i) { for (size_t i = 0; i < trees.size(); ++i) {
@ -572,97 +571,97 @@ class GPUHistMaker : public TreeUpdater {
} catch (const std::exception& e) { } catch (const std::exception& e) {
LOG(FATAL) << "GPU plugin exception: " << e.what() << std::endl; LOG(FATAL) << "GPU plugin exception: " << e.what() << std::endl;
} }
param.learning_rate = lr; param_.learning_rate = lr;
monitor.Stop("Update", dList); monitor_.Stop("Update", device_list_);
} }
void InitDataOnce(DMatrix* dmat) { void InitDataOnce(DMatrix* dmat) {
info = &dmat->info(); info_ = &dmat->Info();
monitor.Start("Quantiles", dList); monitor_.Start("Quantiles", device_list_);
hmat_.Init(dmat, param.max_bin); hmat_.Init(dmat, param_.max_bin);
gmat_.cut = &hmat_; gmat_.cut = &hmat_;
gmat_.Init(dmat); gmat_.Init(dmat);
monitor.Stop("Quantiles", dList); monitor_.Stop("Quantiles", device_list_);
n_bins = hmat_.row_ptr.back(); n_bins_ = hmat_.row_ptr.back();
int n_devices = dh::n_devices(param.n_gpus, info->num_row); int n_devices = dh::NDevices(param_.n_gpus, info_->num_row_);
bst_uint row_begin = 0; bst_uint row_begin = 0;
bst_uint shard_size = bst_uint shard_size =
std::ceil(static_cast<double>(info->num_row) / n_devices); std::ceil(static_cast<double>(info_->num_row_) / n_devices);
dList.resize(n_devices); device_list_.resize(n_devices);
for (int d_idx = 0; d_idx < n_devices; ++d_idx) { for (int d_idx = 0; d_idx < n_devices; ++d_idx) {
int device_idx = (param.gpu_id + d_idx) % dh::n_visible_devices(); int device_idx = (param_.gpu_id + d_idx) % dh::NVisibleDevices();
dList[d_idx] = device_idx; device_list_[d_idx] = device_idx;
} }
reducer.Init(dList); reducer_.Init(device_list_);
// Partition input matrix into row segments // Partition input matrix into row segments
std::vector<size_t> row_segments; std::vector<size_t> row_segments;
shards.resize(n_devices); shards_.resize(n_devices);
row_segments.push_back(0); row_segments.push_back(0);
for (int d_idx = 0; d_idx < n_devices; ++d_idx) { for (int d_idx = 0; d_idx < n_devices; ++d_idx) {
bst_uint row_end = bst_uint row_end =
std::min(static_cast<size_t>(row_begin + shard_size), info->num_row); std::min(static_cast<size_t>(row_begin + shard_size), info_->num_row_);
row_segments.push_back(row_end); row_segments.push_back(row_end);
row_begin = row_end; row_begin = row_end;
} }
// Create device shards // Create device shards
omp_set_num_threads(shards.size()); omp_set_num_threads(shards_.size());
#pragma omp parallel #pragma omp parallel
{ {
auto cpu_thread_id = omp_get_thread_num(); auto cpu_thread_id = omp_get_thread_num();
shards[cpu_thread_id] = std::unique_ptr<DeviceShard>( shards_[cpu_thread_id] = std::unique_ptr<DeviceShard>(
new DeviceShard(dList[cpu_thread_id], cpu_thread_id, gmat_, new DeviceShard(device_list_[cpu_thread_id], cpu_thread_id, gmat_,
row_segments[cpu_thread_id], row_segments[cpu_thread_id],
row_segments[cpu_thread_id + 1], n_bins, param)); row_segments[cpu_thread_id + 1], n_bins_, param_));
} }
p_last_fmat_ = dmat; p_last_fmat_ = dmat;
initialised = true; initialised_ = true;
} }
void InitData(HostDeviceVector<bst_gpair>* gpair, DMatrix* dmat, void InitData(HostDeviceVector<GradientPair>* gpair, DMatrix* dmat,
const RegTree& tree) { const RegTree& tree) {
monitor.Start("InitDataOnce", dList); monitor_.Start("InitDataOnce", device_list_);
if (!initialised) { if (!initialised_) {
this->InitDataOnce(dmat); this->InitDataOnce(dmat);
} }
monitor.Stop("InitDataOnce", dList); monitor_.Stop("InitDataOnce", device_list_);
column_sampler.Init(info->num_col, param); column_sampler_.Init(info_->num_col_, param_);
// Copy gpair & reset memory // Copy gpair & reset memory
monitor.Start("InitDataReset", dList); monitor_.Start("InitDataReset", device_list_);
omp_set_num_threads(shards.size()); omp_set_num_threads(shards_.size());
// TODO(canonizer): make it parallel again once HostDeviceVector is // TODO(canonizer): make it parallel again once HostDeviceVector is
// thread-safe // thread-safe
for (int shard = 0; shard < shards.size(); ++shard) for (int shard = 0; shard < shards_.size(); ++shard)
shards[shard]->Reset(gpair, param.gpu_id); shards_[shard]->Reset(gpair, param_.gpu_id);
monitor.Stop("InitDataReset", dList); monitor_.Stop("InitDataReset", device_list_);
} }
void AllReduceHist(int nidx) { void AllReduceHist(int nidx) {
for (auto& shard : shards) { for (auto& shard : shards_) {
auto d_node_hist = shard->hist.GetHistPtr(nidx); auto d_node_hist = shard->hist.GetHistPtr(nidx);
reducer.AllReduceSum( reducer_.AllReduceSum(
shard->normalised_device_idx, shard->normalised_device_idx,
reinterpret_cast<gpair_sum_t::value_t*>(d_node_hist), reinterpret_cast<GradientPairSumT::ValueT*>(d_node_hist),
reinterpret_cast<gpair_sum_t::value_t*>(d_node_hist), reinterpret_cast<GradientPairSumT::ValueT*>(d_node_hist),
n_bins * (sizeof(gpair_sum_t) / sizeof(gpair_sum_t::value_t))); n_bins_ * (sizeof(GradientPairSumT) / sizeof(GradientPairSumT::ValueT)));
} }
reducer.Synchronize(); reducer_.Synchronize();
} }
void BuildHistLeftRight(int nidx_parent, int nidx_left, int nidx_right) { void BuildHistLeftRight(int nidx_parent, int nidx_left, int nidx_right) {
size_t left_node_max_elements = 0; size_t left_node_max_elements = 0;
size_t right_node_max_elements = 0; size_t right_node_max_elements = 0;
for (auto& shard : shards) { for (auto& shard : shards_) {
left_node_max_elements = (std::max)( left_node_max_elements = (std::max)(
left_node_max_elements, shard->ridx_segments[nidx_left].Size()); left_node_max_elements, shard->ridx_segments[nidx_left].Size());
right_node_max_elements = (std::max)( right_node_max_elements = (std::max)(
@ -677,13 +676,13 @@ class GPUHistMaker : public TreeUpdater {
subtraction_trick_nidx = nidx_left; subtraction_trick_nidx = nidx_left;
} }
for (auto& shard : shards) { for (auto& shard : shards_) {
shard->BuildHist(build_hist_nidx); shard->BuildHist(build_hist_nidx);
} }
this->AllReduceHist(build_hist_nidx); this->AllReduceHist(build_hist_nidx);
for (auto& shard : shards) { for (auto& shard : shards_) {
shard->SubtractionTrick(nidx_parent, build_hist_nidx, shard->SubtractionTrick(nidx_parent, build_hist_nidx,
subtraction_trick_nidx); subtraction_trick_nidx);
} }
@ -692,12 +691,12 @@ class GPUHistMaker : public TreeUpdater {
// Returns best loss // Returns best loss
std::vector<DeviceSplitCandidate> EvaluateSplits( std::vector<DeviceSplitCandidate> EvaluateSplits(
const std::vector<int>& nidx_set, RegTree* p_tree) { const std::vector<int>& nidx_set, RegTree* p_tree) {
auto columns = info->num_col; auto columns = info_->num_col_;
std::vector<DeviceSplitCandidate> best_splits(nidx_set.size()); std::vector<DeviceSplitCandidate> best_splits(nidx_set.size());
std::vector<DeviceSplitCandidate> candidate_splits(nidx_set.size() * std::vector<DeviceSplitCandidate> candidate_splits(nidx_set.size() *
columns); columns);
// Use first device // Use first device
auto& shard = shards.front(); auto& shard = shards_.front();
dh::safe_cuda(cudaSetDevice(shard->device_idx)); dh::safe_cuda(cudaSetDevice(shard->device_idx));
shard->temp_memory.LazyAllocate(sizeof(DeviceSplitCandidate) * columns * shard->temp_memory.LazyAllocate(sizeof(DeviceSplitCandidate) * columns *
nidx_set.size()); nidx_set.size());
@ -708,16 +707,16 @@ class GPUHistMaker : public TreeUpdater {
// Use streams to process nodes concurrently // Use streams to process nodes concurrently
for (auto i = 0; i < nidx_set.size(); i++) { for (auto i = 0; i < nidx_set.size(); i++) {
auto nidx = nidx_set[i]; auto nidx = nidx_set[i];
DeviceNodeStats node(shard->node_sum_gradients[nidx], nidx, param); DeviceNodeStats node(shard->node_sum_gradients[nidx], nidx, param_);
const int BLOCK_THREADS = 256; const int BLOCK_THREADS = 256;
evaluate_split_kernel<BLOCK_THREADS> evaluate_split_kernel<BLOCK_THREADS>
<<<uint32_t(columns), BLOCK_THREADS, 0, streams[i]>>>( <<<uint32_t(columns), BLOCK_THREADS, 0, streams[i]>>>(
shard->hist.GetHistPtr(nidx), nidx, info->num_col, node, shard->hist.GetHistPtr(nidx), nidx, info_->num_col_, node,
shard->feature_segments.data(), shard->min_fvalue.data(), shard->feature_segments.Data(), shard->min_fvalue.Data(),
shard->gidx_fvalue_map.data(), GPUTrainingParam(param), shard->gidx_fvalue_map.Data(), GPUTrainingParam(param_),
d_split + i * columns, node_value_constraints_[nidx], d_split + i * columns, node_value_constraints_[nidx],
shard->monotone_constraints.data()); shard->monotone_constraints.Data());
} }
dh::safe_cuda( dh::safe_cuda(
@ -730,9 +729,9 @@ class GPUHistMaker : public TreeUpdater {
DeviceSplitCandidate nidx_best; DeviceSplitCandidate nidx_best;
for (auto fidx = 0; fidx < columns; fidx++) { for (auto fidx = 0; fidx < columns; fidx++) {
auto& candidate = candidate_splits[i * columns + fidx]; auto& candidate = candidate_splits[i * columns + fidx];
if (column_sampler.ColumnUsed(candidate.findex, if (column_sampler_.ColumnUsed(candidate.findex,
p_tree->GetDepth(nidx))) { p_tree->GetDepth(nidx))) {
nidx_best.Update(candidate_splits[i * columns + fidx], param); nidx_best.Update(candidate_splits[i * columns + fidx], param_);
} }
} }
best_splits[i] = nidx_best; best_splits[i] = nidx_best;
@ -743,34 +742,34 @@ class GPUHistMaker : public TreeUpdater {
void InitRoot(RegTree* p_tree) { void InitRoot(RegTree* p_tree) {
auto root_nidx = 0; auto root_nidx = 0;
// Sum gradients // Sum gradients
std::vector<bst_gpair> tmp_sums(shards.size()); std::vector<GradientPair> tmp_sums(shards_.size());
omp_set_num_threads(shards.size()); omp_set_num_threads(shards_.size());
#pragma omp parallel #pragma omp parallel
{ {
auto cpu_thread_id = omp_get_thread_num(); auto cpu_thread_id = omp_get_thread_num();
auto& shard = shards[cpu_thread_id]; auto& shard = shards_[cpu_thread_id];
dh::safe_cuda(cudaSetDevice(shard->device_idx)); dh::safe_cuda(cudaSetDevice(shard->device_idx));
tmp_sums[cpu_thread_id] = dh::sumReduction( tmp_sums[cpu_thread_id] = dh::SumReduction(
shard->temp_memory, shard->gpair.data(), shard->gpair.size()); shard->temp_memory, shard->gpair.Data(), shard->gpair.Size());
} }
auto sum_gradient = auto sum_gradient =
std::accumulate(tmp_sums.begin(), tmp_sums.end(), bst_gpair_precise()); std::accumulate(tmp_sums.begin(), tmp_sums.end(), GradientPair());
// Generate root histogram // Generate root histogram
for (auto& shard : shards) { for (auto& shard : shards_) {
shard->BuildHist(root_nidx); shard->BuildHist(root_nidx);
} }
this->AllReduceHist(root_nidx); this->AllReduceHist(root_nidx);
// Remember root stats // Remember root stats
p_tree->stat(root_nidx).sum_hess = sum_gradient.GetHess(); p_tree->Stat(root_nidx).sum_hess = sum_gradient.GetHess();
auto weight = CalcWeight(param, sum_gradient); auto weight = CalcWeight(param_, sum_gradient);
p_tree->stat(root_nidx).base_weight = weight; p_tree->Stat(root_nidx).base_weight = weight;
(*p_tree)[root_nidx].set_leaf(param.learning_rate * weight); (*p_tree)[root_nidx].SetLeaf(param_.learning_rate * weight);
// Store sum gradients // Store sum gradients
for (auto& shard : shards) { for (auto& shard : shards_) {
shard->node_sum_gradients[root_nidx] = sum_gradient; shard->node_sum_gradients[root_nidx] = sum_gradient;
} }
@ -785,14 +784,14 @@ class GPUHistMaker : public TreeUpdater {
void UpdatePosition(const ExpandEntry& candidate, RegTree* p_tree) { void UpdatePosition(const ExpandEntry& candidate, RegTree* p_tree) {
auto nidx = candidate.nid; auto nidx = candidate.nid;
auto left_nidx = (*p_tree)[nidx].cleft(); auto left_nidx = (*p_tree)[nidx].LeftChild();
auto right_nidx = (*p_tree)[nidx].cright(); auto right_nidx = (*p_tree)[nidx].RightChild();
// convert floating-point split_pt into corresponding bin_id // convert floating-point split_pt into corresponding bin_id
// split_cond = -1 indicates that split_pt is less than all known cut points // split_cond = -1 indicates that split_pt is less than all known cut points
auto split_gidx = -1; auto split_gidx = -1;
auto fidx = candidate.split.findex; auto fidx = candidate.split.findex;
auto default_dir_left = candidate.split.dir == LeftDir; auto default_dir_left = candidate.split.dir == kLeftDir;
auto fidx_begin = hmat_.row_ptr[fidx]; auto fidx_begin = hmat_.row_ptr[fidx];
auto fidx_end = hmat_.row_ptr[fidx + 1]; auto fidx_end = hmat_.row_ptr[fidx + 1];
for (auto i = fidx_begin; i < fidx_end; ++i) { for (auto i = fidx_begin; i < fidx_end; ++i) {
@ -801,13 +800,13 @@ class GPUHistMaker : public TreeUpdater {
} }
} }
auto is_dense = info->num_nonzero == info->num_row * info->num_col; auto is_dense = info_->num_nonzero_ == info_->num_row_ * info_->num_col_;
omp_set_num_threads(shards.size()); omp_set_num_threads(shards_.size());
#pragma omp parallel #pragma omp parallel
{ {
auto cpu_thread_id = omp_get_thread_num(); auto cpu_thread_id = omp_get_thread_num();
shards[cpu_thread_id]->UpdatePosition(nidx, left_nidx, right_nidx, fidx, shards_[cpu_thread_id]->UpdatePosition(nidx, left_nidx, right_nidx, fidx,
split_gidx, default_dir_left, split_gidx, default_dir_left,
is_dense, fidx_begin, fidx_end); is_dense, fidx_begin, fidx_end);
} }
@ -818,55 +817,55 @@ class GPUHistMaker : public TreeUpdater {
RegTree& tree = *p_tree; RegTree& tree = *p_tree;
tree.AddChilds(candidate.nid); tree.AddChilds(candidate.nid);
auto& parent = tree[candidate.nid]; auto& parent = tree[candidate.nid];
parent.set_split(candidate.split.findex, candidate.split.fvalue, parent.SetSplit(candidate.split.findex, candidate.split.fvalue,
candidate.split.dir == LeftDir); candidate.split.dir == kLeftDir);
tree.stat(candidate.nid).loss_chg = candidate.split.loss_chg; tree.Stat(candidate.nid).loss_chg = candidate.split.loss_chg;
// Set up child constraints // Set up child constraints
node_value_constraints_.resize(tree.GetNodes().size()); node_value_constraints_.resize(tree.GetNodes().size());
GradStats left_stats(param); GradStats left_stats(param_);
left_stats.Add(candidate.split.left_sum); left_stats.Add(candidate.split.left_sum);
GradStats right_stats(param); GradStats right_stats(param_);
right_stats.Add(candidate.split.right_sum); right_stats.Add(candidate.split.right_sum);
node_value_constraints_[candidate.nid].SetChild( node_value_constraints_[candidate.nid].SetChild(
param, parent.split_index(), left_stats, right_stats, param_, parent.SplitIndex(), left_stats, right_stats,
&node_value_constraints_[parent.cleft()], &node_value_constraints_[parent.LeftChild()],
&node_value_constraints_[parent.cright()]); &node_value_constraints_[parent.RightChild()]);
// Configure left child // Configure left child
auto left_weight = auto left_weight =
node_value_constraints_[parent.cleft()].CalcWeight(param, left_stats); node_value_constraints_[parent.LeftChild()].CalcWeight(param_, left_stats);
tree[parent.cleft()].set_leaf(left_weight * param.learning_rate, 0); tree[parent.LeftChild()].SetLeaf(left_weight * param_.learning_rate, 0);
tree.stat(parent.cleft()).base_weight = left_weight; tree.Stat(parent.LeftChild()).base_weight = left_weight;
tree.stat(parent.cleft()).sum_hess = candidate.split.left_sum.GetHess(); tree.Stat(parent.LeftChild()).sum_hess = candidate.split.left_sum.GetHess();
// Configure right child // Configure right child
auto right_weight = auto right_weight =
node_value_constraints_[parent.cright()].CalcWeight(param, right_stats); node_value_constraints_[parent.RightChild()].CalcWeight(param_, right_stats);
tree[parent.cright()].set_leaf(right_weight * param.learning_rate, 0); tree[parent.RightChild()].SetLeaf(right_weight * param_.learning_rate, 0);
tree.stat(parent.cright()).base_weight = right_weight; tree.Stat(parent.RightChild()).base_weight = right_weight;
tree.stat(parent.cright()).sum_hess = candidate.split.right_sum.GetHess(); tree.Stat(parent.RightChild()).sum_hess = candidate.split.right_sum.GetHess();
// Store sum gradients // Store sum gradients
for (auto& shard : shards) { for (auto& shard : shards_) {
shard->node_sum_gradients[parent.cleft()] = candidate.split.left_sum; shard->node_sum_gradients[parent.LeftChild()] = candidate.split.left_sum;
shard->node_sum_gradients[parent.cright()] = candidate.split.right_sum; shard->node_sum_gradients[parent.RightChild()] = candidate.split.right_sum;
} }
this->UpdatePosition(candidate, p_tree); this->UpdatePosition(candidate, p_tree);
} }
void UpdateTree(HostDeviceVector<bst_gpair>* gpair, DMatrix* p_fmat, void UpdateTree(HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat,
RegTree* p_tree) { RegTree* p_tree) {
// Temporarily store number of threads so we can change it back later // Temporarily store number of threads so we can change it back later
int nthread = omp_get_max_threads(); int nthread = omp_get_max_threads();
auto& tree = *p_tree; auto& tree = *p_tree;
monitor.Start("InitData", dList); monitor_.Start("InitData", device_list_);
this->InitData(gpair, p_fmat, *p_tree); this->InitData(gpair, p_fmat, *p_tree);
monitor.Stop("InitData", dList); monitor_.Stop("InitData", device_list_);
monitor.Start("InitRoot", dList); monitor_.Start("InitRoot", device_list_);
this->InitRoot(p_tree); this->InitRoot(p_tree);
monitor.Stop("InitRoot", dList); monitor_.Stop("InitRoot", device_list_);
auto timestamp = qexpand_->size(); auto timestamp = qexpand_->size();
auto num_leaves = 1; auto num_leaves = 1;
@ -874,25 +873,25 @@ class GPUHistMaker : public TreeUpdater {
while (!qexpand_->empty()) { while (!qexpand_->empty()) {
auto candidate = qexpand_->top(); auto candidate = qexpand_->top();
qexpand_->pop(); qexpand_->pop();
if (!candidate.IsValid(param, num_leaves)) continue; if (!candidate.IsValid(param_, num_leaves)) continue;
// std::cout << candidate; // std::cout << candidate;
monitor.Start("ApplySplit", dList); monitor_.Start("ApplySplit", device_list_);
this->ApplySplit(candidate, p_tree); this->ApplySplit(candidate, p_tree);
monitor.Stop("ApplySplit", dList); monitor_.Stop("ApplySplit", device_list_);
num_leaves++; num_leaves++;
auto left_child_nidx = tree[candidate.nid].cleft(); auto left_child_nidx = tree[candidate.nid].LeftChild();
auto right_child_nidx = tree[candidate.nid].cright(); auto right_child_nidx = tree[candidate.nid].RightChild();
// Only create child entries if needed // Only create child entries if needed
if (ExpandEntry::ChildIsValid(param, tree.GetDepth(left_child_nidx), if (ExpandEntry::ChildIsValid(param_, tree.GetDepth(left_child_nidx),
num_leaves)) { num_leaves)) {
monitor.Start("BuildHist", dList); monitor_.Start("BuildHist", device_list_);
this->BuildHistLeftRight(candidate.nid, left_child_nidx, this->BuildHistLeftRight(candidate.nid, left_child_nidx,
right_child_nidx); right_child_nidx);
monitor.Stop("BuildHist", dList); monitor_.Stop("BuildHist", device_list_);
monitor.Start("EvaluateSplits", dList); monitor_.Start("EvaluateSplits", device_list_);
auto splits = auto splits =
this->EvaluateSplits({left_child_nidx, right_child_nidx}, p_tree); this->EvaluateSplits({left_child_nidx, right_child_nidx}, p_tree);
qexpand_->push(ExpandEntry(left_child_nidx, qexpand_->push(ExpandEntry(left_child_nidx,
@ -901,7 +900,7 @@ class GPUHistMaker : public TreeUpdater {
qexpand_->push(ExpandEntry(right_child_nidx, qexpand_->push(ExpandEntry(right_child_nidx,
tree.GetDepth(right_child_nidx), splits[1], tree.GetDepth(right_child_nidx), splits[1],
timestamp++)); timestamp++));
monitor.Stop("EvaluateSplits", dList); monitor_.Stop("EvaluateSplits", device_list_);
} }
} }
// Reset omp num threads // Reset omp num threads
@ -910,17 +909,17 @@ class GPUHistMaker : public TreeUpdater {
bool UpdatePredictionCache( bool UpdatePredictionCache(
const DMatrix* data, HostDeviceVector<bst_float>* p_out_preds) override { const DMatrix* data, HostDeviceVector<bst_float>* p_out_preds) override {
monitor.Start("UpdatePredictionCache", dList); monitor_.Start("UpdatePredictionCache", device_list_);
if (shards.empty() || p_last_fmat_ == nullptr || p_last_fmat_ != data) if (shards_.empty() || p_last_fmat_ == nullptr || p_last_fmat_ != data)
return false; return false;
bst_float* out_preds_d = p_out_preds->ptr_d(param.gpu_id); bst_float* out_preds_d = p_out_preds->DevicePointer(param_.gpu_id);
#pragma omp parallel for schedule(static, 1) #pragma omp parallel for schedule(static, 1)
for (int shard = 0; shard < shards.size(); ++shard) { for (int shard = 0; shard < shards_.size(); ++shard) {
shards[shard]->UpdatePredictionCache(out_preds_d); shards_[shard]->UpdatePredictionCache(out_preds_d);
} }
monitor.Stop("UpdatePredictionCache", dList); monitor_.Stop("UpdatePredictionCache", device_list_);
return true; return true;
} }
@ -933,7 +932,7 @@ class GPUHistMaker : public TreeUpdater {
uint64_t timestamp) uint64_t timestamp)
: nid(nid), depth(depth), split(split), timestamp(timestamp) {} : nid(nid), depth(depth), split(split), timestamp(timestamp) {}
bool IsValid(const TrainParam& param, int num_leaves) const { bool IsValid(const TrainParam& param, int num_leaves) const {
if (split.loss_chg <= rt_eps) return false; if (split.loss_chg <= kRtEps) return false;
if (split.left_sum.GetHess() == 0 || split.right_sum.GetHess() == 0) if (split.left_sum.GetHess() == 0 || split.right_sum.GetHess() == 0)
return false; return false;
if (param.max_depth > 0 && depth == param.max_depth) return false; if (param.max_depth > 0 && depth == param.max_depth) return false;
@ -959,38 +958,38 @@ class GPUHistMaker : public TreeUpdater {
} }
}; };
inline static bool depth_wise(ExpandEntry lhs, ExpandEntry rhs) { inline static bool DepthWise(ExpandEntry lhs, ExpandEntry rhs) {
if (lhs.depth == rhs.depth) { if (lhs.depth == rhs.depth) {
return lhs.timestamp > rhs.timestamp; // favor small timestamp return lhs.timestamp > rhs.timestamp; // favor small timestamp
} else { } else {
return lhs.depth > rhs.depth; // favor small depth return lhs.depth > rhs.depth; // favor small depth
} }
} }
inline static bool loss_guide(ExpandEntry lhs, ExpandEntry rhs) { inline static bool LossGuide(ExpandEntry lhs, ExpandEntry rhs) {
if (lhs.split.loss_chg == rhs.split.loss_chg) { if (lhs.split.loss_chg == rhs.split.loss_chg) {
return lhs.timestamp > rhs.timestamp; // favor small timestamp return lhs.timestamp > rhs.timestamp; // favor small timestamp
} else { } else {
return lhs.split.loss_chg < rhs.split.loss_chg; // favor large loss_chg return lhs.split.loss_chg < rhs.split.loss_chg; // favor large loss_chg
} }
} }
TrainParam param; TrainParam param_;
common::HistCutMatrix hmat_; common::HistCutMatrix hmat_;
common::GHistIndexMatrix gmat_; common::GHistIndexMatrix gmat_;
MetaInfo* info; MetaInfo* info_;
bool initialised; bool initialised_;
int n_devices; int n_devices_;
int n_bins; int n_bins_;
std::vector<std::unique_ptr<DeviceShard>> shards; std::vector<std::unique_ptr<DeviceShard>> shards_;
ColumnSampler column_sampler; ColumnSampler column_sampler_;
typedef std::priority_queue<ExpandEntry, std::vector<ExpandEntry>, typedef std::priority_queue<ExpandEntry, std::vector<ExpandEntry>,
std::function<bool(ExpandEntry, ExpandEntry)>> std::function<bool(ExpandEntry, ExpandEntry)>>
ExpandQueue; ExpandQueue;
std::unique_ptr<ExpandQueue> qexpand_; std::unique_ptr<ExpandQueue> qexpand_;
common::Monitor monitor; common::Monitor monitor_;
dh::AllReducer reducer; dh::AllReducer reducer_;
std::vector<ValueConstraint> node_value_constraints_; std::vector<ValueConstraint> node_value_constraints_;
std::vector<int> dList; std::vector<int> device_list_;
DMatrix* p_last_fmat_; DMatrix* p_last_fmat_;
}; };

View File

@ -21,18 +21,18 @@ DMLC_REGISTRY_FILE_TAG(updater_histmaker);
template<typename TStats> template<typename TStats>
class HistMaker: public BaseMaker { class HistMaker: public BaseMaker {
public: public:
void Update(HostDeviceVector<bst_gpair> *gpair, void Update(HostDeviceVector<GradientPair> *gpair,
DMatrix *p_fmat, DMatrix *p_fmat,
const std::vector<RegTree*> &trees) override { const std::vector<RegTree*> &trees) override {
TStats::CheckInfo(p_fmat->info()); TStats::CheckInfo(p_fmat->Info());
// rescale learning rate according to size of trees // rescale learning rate according to size of trees
float lr = param.learning_rate; float lr = param_.learning_rate;
param.learning_rate = lr / trees.size(); param_.learning_rate = lr / trees.size();
// build tree // build tree
for (size_t i = 0; i < trees.size(); ++i) { for (auto tree : trees) {
this->Update(gpair->data_h(), p_fmat, trees[i]); this->Update(gpair->HostVector(), p_fmat, tree);
} }
param.learning_rate = lr; param_.learning_rate = lr;
} }
protected: protected:
@ -45,13 +45,13 @@ class HistMaker: public BaseMaker {
/*! \brief size of histogram */ /*! \brief size of histogram */
unsigned size; unsigned size;
// default constructor // default constructor
HistUnit() {} HistUnit() = default;
// constructor // constructor
HistUnit(const bst_float *cut, TStats *data, unsigned size) HistUnit(const bst_float *cut, TStats *data, unsigned size)
: cut(cut), data(data), size(size) {} : cut(cut), data(data), size(size) {}
/*! \brief add a histogram to data */ /*! \brief add a histogram to data */
inline void Add(bst_float fv, inline void Add(bst_float fv,
const std::vector<bst_gpair> &gpair, const std::vector<GradientPair> &gpair,
const MetaInfo &info, const MetaInfo &info,
const bst_uint ridx) { const bst_uint ridx) {
unsigned i = std::upper_bound(cut, cut + size, fv) - cut; unsigned i = std::upper_bound(cut, cut + size, fv) - cut;
@ -116,44 +116,44 @@ class HistMaker: public BaseMaker {
} }
}; };
// workspace of thread // workspace of thread
ThreadWSpace wspace; ThreadWSpace wspace_;
// reducer for histogram // reducer for histogram
rabit::Reducer<TStats, TStats::Reduce> histred; rabit::Reducer<TStats, TStats::Reduce> histred_;
// set of working features // set of working features
std::vector<bst_uint> fwork_set; std::vector<bst_uint> fwork_set_;
// update function implementation // update function implementation
virtual void Update(const std::vector<bst_gpair> &gpair, virtual void Update(const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, DMatrix *p_fmat,
RegTree *p_tree) { RegTree *p_tree) {
this->InitData(gpair, *p_fmat, *p_tree); this->InitData(gpair, *p_fmat, *p_tree);
this->InitWorkSet(p_fmat, *p_tree, &fwork_set); this->InitWorkSet(p_fmat, *p_tree, &fwork_set_);
// mark root node as fresh. // mark root node as fresh.
for (int i = 0; i < p_tree->param.num_roots; ++i) { for (int i = 0; i < p_tree->param.num_roots; ++i) {
(*p_tree)[i].set_leaf(0.0f, 0); (*p_tree)[i].SetLeaf(0.0f, 0);
} }
for (int depth = 0; depth < param.max_depth; ++depth) { for (int depth = 0; depth < param_.max_depth; ++depth) {
// reset and propose candidate split // reset and propose candidate split
this->ResetPosAndPropose(gpair, p_fmat, fwork_set, *p_tree); this->ResetPosAndPropose(gpair, p_fmat, fwork_set_, *p_tree);
// create histogram // create histogram
this->CreateHist(gpair, p_fmat, fwork_set, *p_tree); this->CreateHist(gpair, p_fmat, fwork_set_, *p_tree);
// find split based on histogram statistics // find split based on histogram statistics
this->FindSplit(depth, gpair, p_fmat, fwork_set, p_tree); this->FindSplit(depth, gpair, p_fmat, fwork_set_, p_tree);
// reset position after split // reset position after split
this->ResetPositionAfterSplit(p_fmat, *p_tree); this->ResetPositionAfterSplit(p_fmat, *p_tree);
this->UpdateQueueExpand(*p_tree); this->UpdateQueueExpand(*p_tree);
// if nothing left to be expand, break // if nothing left to be expand, break
if (qexpand.size() == 0) break; if (qexpand_.size() == 0) break;
} }
for (size_t i = 0; i < qexpand.size(); ++i) { for (size_t i = 0; i < qexpand_.size(); ++i) {
const int nid = qexpand[i]; const int nid = qexpand_[i];
(*p_tree)[nid].set_leaf(p_tree->stat(nid).base_weight * param.learning_rate); (*p_tree)[nid].SetLeaf(p_tree->Stat(nid).base_weight * param_.learning_rate);
} }
} }
// this function does two jobs // this function does two jobs
// (1) reset the position in array position, to be the latest leaf id // (1) reset the position in array position, to be the latest leaf id
// (2) propose a set of candidate cuts and set wspace.rptr wspace.cut correctly // (2) propose a set of candidate cuts and set wspace.rptr wspace.cut correctly
virtual void ResetPosAndPropose(const std::vector<bst_gpair> &gpair, virtual void ResetPosAndPropose(const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, DMatrix *p_fmat,
const std::vector <bst_uint> &fset, const std::vector <bst_uint> &fset,
const RegTree &tree) = 0; const RegTree &tree) = 0;
@ -170,7 +170,7 @@ class HistMaker: public BaseMaker {
virtual void ResetPositionAfterSplit(DMatrix *p_fmat, virtual void ResetPositionAfterSplit(DMatrix *p_fmat,
const RegTree &tree) { const RegTree &tree) {
} }
virtual void CreateHist(const std::vector<bst_gpair> &gpair, virtual void CreateHist(const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, DMatrix *p_fmat,
const std::vector <bst_uint> &fset, const std::vector <bst_uint> &fset,
const RegTree &tree) = 0; const RegTree &tree) = 0;
@ -183,14 +183,14 @@ class HistMaker: public BaseMaker {
TStats *left_sum) { TStats *left_sum) {
if (hist.size == 0) return; if (hist.size == 0) return;
double root_gain = node_sum.CalcGain(param); double root_gain = node_sum.CalcGain(param_);
TStats s(param), c(param); TStats s(param_), c(param_);
for (bst_uint i = 0; i < hist.size; ++i) { for (bst_uint i = 0; i < hist.size; ++i) {
s.Add(hist.data[i]); s.Add(hist.data[i]);
if (s.sum_hess >= param.min_child_weight) { if (s.sum_hess >= param_.min_child_weight) {
c.SetSubstract(node_sum, s); c.SetSubstract(node_sum, s);
if (c.sum_hess >= param.min_child_weight) { if (c.sum_hess >= param_.min_child_weight) {
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; double loss_chg = s.CalcGain(param_) + c.CalcGain(param_) - root_gain;
if (best->Update(static_cast<bst_float>(loss_chg), fid, hist.cut[i], false)) { if (best->Update(static_cast<bst_float>(loss_chg), fid, hist.cut[i], false)) {
*left_sum = s; *left_sum = s;
} }
@ -200,10 +200,10 @@ class HistMaker: public BaseMaker {
s.Clear(); s.Clear();
for (bst_uint i = hist.size - 1; i != 0; --i) { for (bst_uint i = hist.size - 1; i != 0; --i) {
s.Add(hist.data[i]); s.Add(hist.data[i]);
if (s.sum_hess >= param.min_child_weight) { if (s.sum_hess >= param_.min_child_weight) {
c.SetSubstract(node_sum, s); c.SetSubstract(node_sum, s);
if (c.sum_hess >= param.min_child_weight) { if (c.sum_hess >= param_.min_child_weight) {
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; double loss_chg = s.CalcGain(param_) + c.CalcGain(param_) - root_gain;
if (best->Update(static_cast<bst_float>(loss_chg), fid, hist.cut[i-1], true)) { if (best->Update(static_cast<bst_float>(loss_chg), fid, hist.cut[i-1], true)) {
*left_sum = c; *left_sum = c;
} }
@ -212,65 +212,64 @@ class HistMaker: public BaseMaker {
} }
} }
inline void FindSplit(int depth, inline void FindSplit(int depth,
const std::vector<bst_gpair> &gpair, const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, DMatrix *p_fmat,
const std::vector <bst_uint> &fset, const std::vector <bst_uint> &fset,
RegTree *p_tree) { RegTree *p_tree) {
const size_t num_feature = fset.size(); const size_t num_feature = fset.size();
// get the best split condition for each node // get the best split condition for each node
std::vector<SplitEntry> sol(qexpand.size()); std::vector<SplitEntry> sol(qexpand_.size());
std::vector<TStats> left_sum(qexpand.size()); std::vector<TStats> left_sum(qexpand_.size());
bst_omp_uint nexpand = static_cast<bst_omp_uint>(qexpand.size()); auto nexpand = static_cast<bst_omp_uint>(qexpand_.size());
#pragma omp parallel for schedule(dynamic, 1) #pragma omp parallel for schedule(dynamic, 1)
for (bst_omp_uint wid = 0; wid < nexpand; ++wid) { for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
const int nid = qexpand[wid]; const int nid = qexpand_[wid];
CHECK_EQ(node2workindex[nid], static_cast<int>(wid)); CHECK_EQ(node2workindex_[nid], static_cast<int>(wid));
SplitEntry &best = sol[wid]; SplitEntry &best = sol[wid];
TStats &node_sum = wspace.hset[0][num_feature + wid * (num_feature + 1)].data[0]; TStats &node_sum = wspace_.hset[0][num_feature + wid * (num_feature + 1)].data[0];
for (size_t i = 0; i < fset.size(); ++i) { for (size_t i = 0; i < fset.size(); ++i) {
EnumerateSplit(this->wspace.hset[0][i + wid * (num_feature+1)], EnumerateSplit(this->wspace_.hset[0][i + wid * (num_feature+1)],
node_sum, fset[i], &best, &left_sum[wid]); node_sum, fset[i], &best, &left_sum[wid]);
} }
} }
// get the best result, we can synchronize the solution // get the best result, we can synchronize the solution
for (bst_omp_uint wid = 0; wid < nexpand; ++wid) { for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
const int nid = qexpand[wid]; const int nid = qexpand_[wid];
const SplitEntry &best = sol[wid]; const SplitEntry &best = sol[wid];
const TStats &node_sum = wspace.hset[0][num_feature + wid * (num_feature + 1)].data[0]; const TStats &node_sum = wspace_.hset[0][num_feature + wid * (num_feature + 1)].data[0];
this->SetStats(p_tree, nid, node_sum); this->SetStats(p_tree, nid, node_sum);
// set up the values // set up the values
p_tree->stat(nid).loss_chg = best.loss_chg; p_tree->Stat(nid).loss_chg = best.loss_chg;
// now we know the solution in snode[nid], set split // now we know the solution in snode[nid], set split
if (best.loss_chg > rt_eps) { if (best.loss_chg > kRtEps) {
p_tree->AddChilds(nid); p_tree->AddChilds(nid);
(*p_tree)[nid].set_split(best.split_index(), (*p_tree)[nid].SetSplit(best.SplitIndex(),
best.split_value, best.default_left()); best.split_value, best.DefaultLeft());
// mark right child as 0, to indicate fresh leaf // mark right child as 0, to indicate fresh leaf
(*p_tree)[(*p_tree)[nid].cleft()].set_leaf(0.0f, 0); (*p_tree)[(*p_tree)[nid].LeftChild()].SetLeaf(0.0f, 0);
(*p_tree)[(*p_tree)[nid].cright()].set_leaf(0.0f, 0); (*p_tree)[(*p_tree)[nid].RightChild()].SetLeaf(0.0f, 0);
// right side sum // right side sum
TStats right_sum; TStats right_sum;
right_sum.SetSubstract(node_sum, left_sum[wid]); right_sum.SetSubstract(node_sum, left_sum[wid]);
this->SetStats(p_tree, (*p_tree)[nid].cleft(), left_sum[wid]); this->SetStats(p_tree, (*p_tree)[nid].LeftChild(), left_sum[wid]);
this->SetStats(p_tree, (*p_tree)[nid].cright(), right_sum); this->SetStats(p_tree, (*p_tree)[nid].RightChild(), right_sum);
} else { } else {
(*p_tree)[nid].set_leaf(p_tree->stat(nid).base_weight * param.learning_rate); (*p_tree)[nid].SetLeaf(p_tree->Stat(nid).base_weight * param_.learning_rate);
} }
} }
} }
inline void SetStats(RegTree *p_tree, int nid, const TStats &node_sum) { inline void SetStats(RegTree *p_tree, int nid, const TStats &node_sum) {
p_tree->stat(nid).base_weight = static_cast<bst_float>(node_sum.CalcWeight(param)); p_tree->Stat(nid).base_weight = static_cast<bst_float>(node_sum.CalcWeight(param_));
p_tree->stat(nid).sum_hess = static_cast<bst_float>(node_sum.sum_hess); p_tree->Stat(nid).sum_hess = static_cast<bst_float>(node_sum.sum_hess);
node_sum.SetLeafVec(param, p_tree->leafvec(nid)); node_sum.SetLeafVec(param_, p_tree->Leafvec(nid));
} }
}; };
template<typename TStats> template<typename TStats>
class CQHistMaker: public HistMaker<TStats> { class CQHistMaker: public HistMaker<TStats> {
public: public:
CQHistMaker() : cache_dmatrix_(nullptr) { CQHistMaker() = default;
}
protected: protected:
struct HistEntry { struct HistEntry {
@ -281,7 +280,7 @@ class CQHistMaker: public HistMaker<TStats> {
* do linear scan, start from istart * do linear scan, start from istart
*/ */
inline void Add(bst_float fv, inline void Add(bst_float fv,
const std::vector<bst_gpair> &gpair, const std::vector<GradientPair> &gpair,
const MetaInfo &info, const MetaInfo &info,
const bst_uint ridx) { const bst_uint ridx) {
while (istart < hist.size && !(fv < hist.cut[istart])) ++istart; while (istart < hist.size && !(fv < hist.cut[istart])) ++istart;
@ -293,7 +292,7 @@ class CQHistMaker: public HistMaker<TStats> {
* do linear scan, start from istart * do linear scan, start from istart
*/ */
inline void Add(bst_float fv, inline void Add(bst_float fv,
bst_gpair gstats) { GradientPair gstats) {
if (fv < hist.cut[istart]) { if (fv < hist.cut[istart]) {
hist.data[istart].Add(gstats); hist.data[istart].Add(gstats);
} else { } else {
@ -311,190 +310,190 @@ class CQHistMaker: public HistMaker<TStats> {
} }
}; };
// sketch type used for this // sketch type used for this
typedef common::WXQuantileSketch<bst_float, bst_float> WXQSketch; using WXQSketch = common::WXQuantileSketch<bst_float, bst_float>;
// initialize the work set of tree // initialize the work set of tree
void InitWorkSet(DMatrix *p_fmat, void InitWorkSet(DMatrix *p_fmat,
const RegTree &tree, const RegTree &tree,
std::vector<bst_uint> *p_fset) override { std::vector<bst_uint> *p_fset) override {
if (p_fmat != cache_dmatrix_) { if (p_fmat != cache_dmatrix_) {
feat_helper.InitByCol(p_fmat, tree); feat_helper_.InitByCol(p_fmat, tree);
cache_dmatrix_ = p_fmat; cache_dmatrix_ = p_fmat;
} }
feat_helper.SyncInfo(); feat_helper_.SyncInfo();
feat_helper.SampleCol(this->param.colsample_bytree, p_fset); feat_helper_.SampleCol(this->param_.colsample_bytree, p_fset);
} }
// code to create histogram // code to create histogram
void CreateHist(const std::vector<bst_gpair> &gpair, void CreateHist(const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, DMatrix *p_fmat,
const std::vector<bst_uint> &fset, const std::vector<bst_uint> &fset,
const RegTree &tree) override { const RegTree &tree) override {
const MetaInfo &info = p_fmat->info(); const MetaInfo &info = p_fmat->Info();
// fill in reverse map // fill in reverse map
feat2workindex.resize(tree.param.num_feature); feat2workindex_.resize(tree.param.num_feature);
std::fill(feat2workindex.begin(), feat2workindex.end(), -1); std::fill(feat2workindex_.begin(), feat2workindex_.end(), -1);
for (size_t i = 0; i < fset.size(); ++i) { for (size_t i = 0; i < fset.size(); ++i) {
feat2workindex[fset[i]] = static_cast<int>(i); feat2workindex_[fset[i]] = static_cast<int>(i);
} }
// start to work // start to work
this->wspace.Init(this->param, 1); this->wspace_.Init(this->param_, 1);
// if it is C++11, use lazy evaluation for Allreduce, // if it is C++11, use lazy evaluation for Allreduce,
// to gain speedup in recovery // to gain speedup in recovery
#if __cplusplus >= 201103L #if __cplusplus >= 201103L
auto lazy_get_hist = [&]() auto lazy_get_hist = [&]()
#endif #endif
{ {
thread_hist.resize(omp_get_max_threads()); thread_hist_.resize(omp_get_max_threads());
// start accumulating statistics // start accumulating statistics
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(fset); dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(fset);
iter->BeforeFirst(); iter->BeforeFirst();
while (iter->Next()) { while (iter->Next()) {
const ColBatch &batch = iter->Value(); const ColBatch &batch = iter->Value();
// start enumeration // start enumeration
const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size); const auto nsize = static_cast<bst_omp_uint>(batch.size);
#pragma omp parallel for schedule(dynamic, 1) #pragma omp parallel for schedule(dynamic, 1)
for (bst_omp_uint i = 0; i < nsize; ++i) { for (bst_omp_uint i = 0; i < nsize; ++i) {
int offset = feat2workindex[batch.col_index[i]]; int offset = feat2workindex_[batch.col_index[i]];
if (offset >= 0) { if (offset >= 0) {
this->UpdateHistCol(gpair, batch[i], info, tree, this->UpdateHistCol(gpair, batch[i], info, tree,
fset, offset, fset, offset,
&thread_hist[omp_get_thread_num()]); &thread_hist_[omp_get_thread_num()]);
} }
} }
} }
// update node statistics. // update node statistics.
this->GetNodeStats(gpair, *p_fmat, tree, this->GetNodeStats(gpair, *p_fmat, tree,
&thread_stats, &node_stats); &thread_stats_, &node_stats_);
for (size_t i = 0; i < this->qexpand.size(); ++i) { for (size_t i = 0; i < this->qexpand_.size(); ++i) {
const int nid = this->qexpand[i]; const int nid = this->qexpand_[i];
const int wid = this->node2workindex[nid]; const int wid = this->node2workindex_[nid];
this->wspace.hset[0][fset.size() + wid * (fset.size()+1)] this->wspace_.hset[0][fset.size() + wid * (fset.size()+1)]
.data[0] = node_stats[nid]; .data[0] = node_stats_[nid];
} }
}; };
// sync the histogram // sync the histogram
// if it is C++11, use lazy evaluation for Allreduce // if it is C++11, use lazy evaluation for Allreduce
#if __cplusplus >= 201103L #if __cplusplus >= 201103L
this->histred.Allreduce(dmlc::BeginPtr(this->wspace.hset[0].data), this->histred_.Allreduce(dmlc::BeginPtr(this->wspace_.hset[0].data),
this->wspace.hset[0].data.size(), lazy_get_hist); this->wspace_.hset[0].data.size(), lazy_get_hist);
#else #else
this->histred.Allreduce(dmlc::BeginPtr(this->wspace.hset[0].data), this->histred_.Allreduce(dmlc::BeginPtr(this->wspace_.hset[0].data),
this->wspace.hset[0].data.size()); this->wspace_.hset[0].data.size());
#endif #endif
} }
void ResetPositionAfterSplit(DMatrix *p_fmat, void ResetPositionAfterSplit(DMatrix *p_fmat,
const RegTree &tree) override { const RegTree &tree) override {
this->GetSplitSet(this->qexpand, tree, &fsplit_set); this->GetSplitSet(this->qexpand_, tree, &fsplit_set_);
} }
void ResetPosAndPropose(const std::vector<bst_gpair> &gpair, void ResetPosAndPropose(const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, DMatrix *p_fmat,
const std::vector<bst_uint> &fset, const std::vector<bst_uint> &fset,
const RegTree &tree) override { const RegTree &tree) override {
const MetaInfo &info = p_fmat->info(); const MetaInfo &info = p_fmat->Info();
// fill in reverse map // fill in reverse map
feat2workindex.resize(tree.param.num_feature); feat2workindex_.resize(tree.param.num_feature);
std::fill(feat2workindex.begin(), feat2workindex.end(), -1); std::fill(feat2workindex_.begin(), feat2workindex_.end(), -1);
work_set.clear(); work_set_.clear();
for (size_t i = 0; i < fset.size(); ++i) { for (auto fidx : fset) {
if (feat_helper.Type(fset[i]) == 2) { if (feat_helper_.Type(fidx) == 2) {
feat2workindex[fset[i]] = static_cast<int>(work_set.size()); feat2workindex_[fidx] = static_cast<int>(work_set_.size());
work_set.push_back(fset[i]); work_set_.push_back(fidx);
} else { } else {
feat2workindex[fset[i]] = -2; feat2workindex_[fidx] = -2;
} }
} }
const size_t work_set_size = work_set.size(); const size_t work_set_size = work_set_.size();
sketchs.resize(this->qexpand.size() * work_set_size); sketchs_.resize(this->qexpand_.size() * work_set_size);
for (size_t i = 0; i < sketchs.size(); ++i) { for (size_t i = 0; i < sketchs_.size(); ++i) {
sketchs[i].Init(info.num_row, this->param.sketch_eps); sketchs_[i].Init(info.num_row_, this->param_.sketch_eps);
} }
// intitialize the summary array // intitialize the summary array
summary_array.resize(sketchs.size()); summary_array_.resize(sketchs_.size());
// setup maximum size // setup maximum size
unsigned max_size = this->param.max_sketch_size(); unsigned max_size = this->param_.MaxSketchSize();
for (size_t i = 0; i < sketchs.size(); ++i) { for (size_t i = 0; i < sketchs_.size(); ++i) {
summary_array[i].Reserve(max_size); summary_array_[i].Reserve(max_size);
} }
{ {
// get smmary // get smmary
thread_sketch.resize(omp_get_max_threads()); thread_sketch_.resize(omp_get_max_threads());
// TWOPASS: use the real set + split set in the column iteration. // TWOPASS: use the real set + split set in the column iteration.
this->SetDefaultPostion(p_fmat, tree); this->SetDefaultPostion(p_fmat, tree);
work_set.insert(work_set.end(), fsplit_set.begin(), fsplit_set.end()); work_set_.insert(work_set_.end(), fsplit_set_.begin(), fsplit_set_.end());
std::sort(work_set.begin(), work_set.end()); std::sort(work_set_.begin(), work_set_.end());
work_set.resize(std::unique(work_set.begin(), work_set.end()) - work_set.begin()); work_set_.resize(std::unique(work_set_.begin(), work_set_.end()) - work_set_.begin());
// start accumulating statistics // start accumulating statistics
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(work_set); dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(work_set_);
iter->BeforeFirst(); iter->BeforeFirst();
while (iter->Next()) { while (iter->Next()) {
const ColBatch &batch = iter->Value(); const ColBatch &batch = iter->Value();
// TWOPASS: use the real set + split set in the column iteration. // TWOPASS: use the real set + split set in the column iteration.
this->CorrectNonDefaultPositionByBatch(batch, fsplit_set, tree); this->CorrectNonDefaultPositionByBatch(batch, fsplit_set_, tree);
// start enumeration // start enumeration
const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size); const auto nsize = static_cast<bst_omp_uint>(batch.size);
#pragma omp parallel for schedule(dynamic, 1) #pragma omp parallel for schedule(dynamic, 1)
for (bst_omp_uint i = 0; i < nsize; ++i) { for (bst_omp_uint i = 0; i < nsize; ++i) {
int offset = feat2workindex[batch.col_index[i]]; int offset = feat2workindex_[batch.col_index[i]];
if (offset >= 0) { if (offset >= 0) {
this->UpdateSketchCol(gpair, batch[i], tree, this->UpdateSketchCol(gpair, batch[i], tree,
work_set_size, offset, work_set_size, offset,
&thread_sketch[omp_get_thread_num()]); &thread_sketch_[omp_get_thread_num()]);
} }
} }
} }
for (size_t i = 0; i < sketchs.size(); ++i) { for (size_t i = 0; i < sketchs_.size(); ++i) {
common::WXQuantileSketch<bst_float, bst_float>::SummaryContainer out; common::WXQuantileSketch<bst_float, bst_float>::SummaryContainer out;
sketchs[i].GetSummary(&out); sketchs_[i].GetSummary(&out);
summary_array[i].SetPrune(out, max_size); summary_array_[i].SetPrune(out, max_size);
} }
CHECK_EQ(summary_array.size(), sketchs.size()); CHECK_EQ(summary_array_.size(), sketchs_.size());
} }
if (summary_array.size() != 0) { if (summary_array_.size() != 0) {
size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size); size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size);
sreducer.Allreduce(dmlc::BeginPtr(summary_array), nbytes, summary_array.size()); sreducer_.Allreduce(dmlc::BeginPtr(summary_array_), nbytes, summary_array_.size());
} }
// now we get the final result of sketch, setup the cut // now we get the final result of sketch, setup the cut
this->wspace.cut.clear(); this->wspace_.cut.clear();
this->wspace.rptr.clear(); this->wspace_.rptr.clear();
this->wspace.rptr.push_back(0); this->wspace_.rptr.push_back(0);
for (size_t wid = 0; wid < this->qexpand.size(); ++wid) { for (size_t wid = 0; wid < this->qexpand_.size(); ++wid) {
for (size_t i = 0; i < fset.size(); ++i) { for (unsigned int i : fset) {
int offset = feat2workindex[fset[i]]; int offset = feat2workindex_[i];
if (offset >= 0) { if (offset >= 0) {
const WXQSketch::Summary &a = summary_array[wid * work_set_size + offset]; const WXQSketch::Summary &a = summary_array_[wid * work_set_size + offset];
for (size_t i = 1; i < a.size; ++i) { for (size_t i = 1; i < a.size; ++i) {
bst_float cpt = a.data[i].value - rt_eps; bst_float cpt = a.data[i].value - kRtEps;
if (i == 1 || cpt > this->wspace.cut.back()) { if (i == 1 || cpt > this->wspace_.cut.back()) {
this->wspace.cut.push_back(cpt); this->wspace_.cut.push_back(cpt);
} }
} }
// push a value that is greater than anything // push a value that is greater than anything
if (a.size != 0) { if (a.size != 0) {
bst_float cpt = a.data[a.size - 1].value; bst_float cpt = a.data[a.size - 1].value;
// this must be bigger than last value in a scale // this must be bigger than last value in a scale
bst_float last = cpt + fabs(cpt) + rt_eps; bst_float last = cpt + fabs(cpt) + kRtEps;
this->wspace.cut.push_back(last); this->wspace_.cut.push_back(last);
} }
this->wspace.rptr.push_back(static_cast<unsigned>(this->wspace.cut.size())); this->wspace_.rptr.push_back(static_cast<unsigned>(this->wspace_.cut.size()));
} else { } else {
CHECK_EQ(offset, -2); CHECK_EQ(offset, -2);
bst_float cpt = feat_helper.MaxValue(fset[i]); bst_float cpt = feat_helper_.MaxValue(i);
this->wspace.cut.push_back(cpt + fabs(cpt) + rt_eps); this->wspace_.cut.push_back(cpt + fabs(cpt) + kRtEps);
this->wspace.rptr.push_back(static_cast<unsigned>(this->wspace.cut.size())); this->wspace_.rptr.push_back(static_cast<unsigned>(this->wspace_.cut.size()));
} }
} }
// reserve last value for global statistics // reserve last value for global statistics
this->wspace.cut.push_back(0.0f); this->wspace_.cut.push_back(0.0f);
this->wspace.rptr.push_back(static_cast<unsigned>(this->wspace.cut.size())); this->wspace_.rptr.push_back(static_cast<unsigned>(this->wspace_.cut.size()));
} }
CHECK_EQ(this->wspace.rptr.size(), CHECK_EQ(this->wspace_.rptr.size(),
(fset.size() + 1) * this->qexpand.size() + 1); (fset.size() + 1) * this->qexpand_.size() + 1);
} }
inline void UpdateHistCol(const std::vector<bst_gpair> &gpair, inline void UpdateHistCol(const std::vector<GradientPair> &gpair,
const ColBatch::Inst &c, const ColBatch::Inst &c,
const MetaInfo &info, const MetaInfo &info,
const RegTree &tree, const RegTree &tree,
@ -505,21 +504,21 @@ class CQHistMaker: public HistMaker<TStats> {
// initialize sbuilder for use // initialize sbuilder for use
std::vector<HistEntry> &hbuilder = *p_temp; std::vector<HistEntry> &hbuilder = *p_temp;
hbuilder.resize(tree.param.num_nodes); hbuilder.resize(tree.param.num_nodes);
for (size_t i = 0; i < this->qexpand.size(); ++i) { for (size_t i = 0; i < this->qexpand_.size(); ++i) {
const unsigned nid = this->qexpand[i]; const unsigned nid = this->qexpand_[i];
const unsigned wid = this->node2workindex[nid]; const unsigned wid = this->node2workindex_[nid];
hbuilder[nid].istart = 0; hbuilder[nid].istart = 0;
hbuilder[nid].hist = this->wspace.hset[0][fid_offset + wid * (fset.size()+1)]; hbuilder[nid].hist = this->wspace_.hset[0][fid_offset + wid * (fset.size()+1)];
} }
if (TStats::kSimpleStats != 0 && this->param.cache_opt != 0) { if (TStats::kSimpleStats != 0 && this->param_.cache_opt != 0) {
const bst_uint kBuffer = 32; constexpr bst_uint kBuffer = 32;
bst_uint align_length = c.length / kBuffer * kBuffer; bst_uint align_length = c.length / kBuffer * kBuffer;
int buf_position[kBuffer]; int buf_position[kBuffer];
bst_gpair buf_gpair[kBuffer]; GradientPair buf_gpair[kBuffer];
for (bst_uint j = 0; j < align_length; j += kBuffer) { for (bst_uint j = 0; j < align_length; j += kBuffer) {
for (bst_uint i = 0; i < kBuffer; ++i) { for (bst_uint i = 0; i < kBuffer; ++i) {
bst_uint ridx = c[j + i].index; bst_uint ridx = c[j + i].index;
buf_position[i] = this->position[ridx]; buf_position[i] = this->position_[ridx];
buf_gpair[i] = gpair[ridx]; buf_gpair[i] = gpair[ridx];
} }
for (bst_uint i = 0; i < kBuffer; ++i) { for (bst_uint i = 0; i < kBuffer; ++i) {
@ -531,7 +530,7 @@ class CQHistMaker: public HistMaker<TStats> {
} }
for (bst_uint j = align_length; j < c.length; ++j) { for (bst_uint j = align_length; j < c.length; ++j) {
const bst_uint ridx = c[j].index; const bst_uint ridx = c[j].index;
const int nid = this->position[ridx]; const int nid = this->position_[ridx];
if (nid >= 0) { if (nid >= 0) {
hbuilder[nid].Add(c[j].fvalue, gpair[ridx]); hbuilder[nid].Add(c[j].fvalue, gpair[ridx]);
} }
@ -539,14 +538,14 @@ class CQHistMaker: public HistMaker<TStats> {
} else { } else {
for (bst_uint j = 0; j < c.length; ++j) { for (bst_uint j = 0; j < c.length; ++j) {
const bst_uint ridx = c[j].index; const bst_uint ridx = c[j].index;
const int nid = this->position[ridx]; const int nid = this->position_[ridx];
if (nid >= 0) { if (nid >= 0) {
hbuilder[nid].Add(c[j].fvalue, gpair, info, ridx); hbuilder[nid].Add(c[j].fvalue, gpair, info, ridx);
} }
} }
} }
} }
inline void UpdateSketchCol(const std::vector<bst_gpair> &gpair, inline void UpdateSketchCol(const std::vector<GradientPair> &gpair,
const ColBatch::Inst &c, const ColBatch::Inst &c,
const RegTree &tree, const RegTree &tree,
size_t work_set_size, size_t work_set_size,
@ -556,45 +555,45 @@ class CQHistMaker: public HistMaker<TStats> {
// initialize sbuilder for use // initialize sbuilder for use
std::vector<BaseMaker::SketchEntry> &sbuilder = *p_temp; std::vector<BaseMaker::SketchEntry> &sbuilder = *p_temp;
sbuilder.resize(tree.param.num_nodes); sbuilder.resize(tree.param.num_nodes);
for (size_t i = 0; i < this->qexpand.size(); ++i) { for (size_t i = 0; i < this->qexpand_.size(); ++i) {
const unsigned nid = this->qexpand[i]; const unsigned nid = this->qexpand_[i];
const unsigned wid = this->node2workindex[nid]; const unsigned wid = this->node2workindex_[nid];
sbuilder[nid].sum_total = 0.0f; sbuilder[nid].sum_total = 0.0f;
sbuilder[nid].sketch = &sketchs[wid * work_set_size + offset]; sbuilder[nid].sketch = &sketchs_[wid * work_set_size + offset];
} }
// first pass, get sum of weight, TODO, optimization to skip first pass // first pass, get sum of weight, TODO, optimization to skip first pass
for (bst_uint j = 0; j < c.length; ++j) { for (bst_uint j = 0; j < c.length; ++j) {
const bst_uint ridx = c[j].index; const bst_uint ridx = c[j].index;
const int nid = this->position[ridx]; const int nid = this->position_[ridx];
if (nid >= 0) { if (nid >= 0) {
sbuilder[nid].sum_total += gpair[ridx].GetHess(); sbuilder[nid].sum_total += gpair[ridx].GetHess();
} }
} }
// if only one value, no need to do second pass // if only one value, no need to do second pass
if (c[0].fvalue == c[c.length-1].fvalue) { if (c[0].fvalue == c[c.length-1].fvalue) {
for (size_t i = 0; i < this->qexpand.size(); ++i) { for (size_t i = 0; i < this->qexpand_.size(); ++i) {
const int nid = this->qexpand[i]; const int nid = this->qexpand_[i];
sbuilder[nid].sketch->Push(c[0].fvalue, static_cast<bst_float>(sbuilder[nid].sum_total)); sbuilder[nid].sketch->Push(c[0].fvalue, static_cast<bst_float>(sbuilder[nid].sum_total));
} }
return; return;
} }
// two pass scan // two pass scan
unsigned max_size = this->param.max_sketch_size(); unsigned max_size = this->param_.MaxSketchSize();
for (size_t i = 0; i < this->qexpand.size(); ++i) { for (size_t i = 0; i < this->qexpand_.size(); ++i) {
const int nid = this->qexpand[i]; const int nid = this->qexpand_[i];
sbuilder[nid].Init(max_size); sbuilder[nid].Init(max_size);
} }
// second pass, build the sketch // second pass, build the sketch
if (TStats::kSimpleStats != 0 && this->param.cache_opt != 0) { if (TStats::kSimpleStats != 0 && this->param_.cache_opt != 0) {
const bst_uint kBuffer = 32; constexpr bst_uint kBuffer = 32;
bst_uint align_length = c.length / kBuffer * kBuffer; bst_uint align_length = c.length / kBuffer * kBuffer;
int buf_position[kBuffer]; int buf_position[kBuffer];
bst_float buf_hess[kBuffer]; bst_float buf_hess[kBuffer];
for (bst_uint j = 0; j < align_length; j += kBuffer) { for (bst_uint j = 0; j < align_length; j += kBuffer) {
for (bst_uint i = 0; i < kBuffer; ++i) { for (bst_uint i = 0; i < kBuffer; ++i) {
bst_uint ridx = c[j + i].index; bst_uint ridx = c[j + i].index;
buf_position[i] = this->position[ridx]; buf_position[i] = this->position_[ridx];
buf_hess[i] = gpair[ridx].GetHess(); buf_hess[i] = gpair[ridx].GetHess();
} }
for (bst_uint i = 0; i < kBuffer; ++i) { for (bst_uint i = 0; i < kBuffer; ++i) {
@ -606,7 +605,7 @@ class CQHistMaker: public HistMaker<TStats> {
} }
for (bst_uint j = align_length; j < c.length; ++j) { for (bst_uint j = align_length; j < c.length; ++j) {
const bst_uint ridx = c[j].index; const bst_uint ridx = c[j].index;
const int nid = this->position[ridx]; const int nid = this->position_[ridx];
if (nid >= 0) { if (nid >= 0) {
sbuilder[nid].Push(c[j].fvalue, gpair[ridx].GetHess(), max_size); sbuilder[nid].Push(c[j].fvalue, gpair[ridx].GetHess(), max_size);
} }
@ -614,136 +613,137 @@ class CQHistMaker: public HistMaker<TStats> {
} else { } else {
for (bst_uint j = 0; j < c.length; ++j) { for (bst_uint j = 0; j < c.length; ++j) {
const bst_uint ridx = c[j].index; const bst_uint ridx = c[j].index;
const int nid = this->position[ridx]; const int nid = this->position_[ridx];
if (nid >= 0) { if (nid >= 0) {
sbuilder[nid].Push(c[j].fvalue, gpair[ridx].GetHess(), max_size); sbuilder[nid].Push(c[j].fvalue, gpair[ridx].GetHess(), max_size);
} }
} }
} }
for (size_t i = 0; i < this->qexpand.size(); ++i) { for (size_t i = 0; i < this->qexpand_.size(); ++i) {
const int nid = this->qexpand[i]; const int nid = this->qexpand_[i];
sbuilder[nid].Finalize(max_size); sbuilder[nid].Finalize(max_size);
} }
} }
// cached dmatrix where we initialized the feature on. // cached dmatrix where we initialized the feature on.
const DMatrix* cache_dmatrix_; const DMatrix* cache_dmatrix_{nullptr};
// feature helper // feature helper
BaseMaker::FMetaHelper feat_helper; BaseMaker::FMetaHelper feat_helper_;
// temp space to map feature id to working index // temp space to map feature id to working index
std::vector<int> feat2workindex; std::vector<int> feat2workindex_;
// set of index from fset that are current work set // set of index from fset that are current work set
std::vector<bst_uint> work_set; std::vector<bst_uint> work_set_;
// set of index from that are split candidates. // set of index from that are split candidates.
std::vector<bst_uint> fsplit_set; std::vector<bst_uint> fsplit_set_;
// thread temp data // thread temp data
std::vector<std::vector<BaseMaker::SketchEntry> > thread_sketch; std::vector<std::vector<BaseMaker::SketchEntry> > thread_sketch_;
// used to hold statistics // used to hold statistics
std::vector<std::vector<TStats> > thread_stats; std::vector<std::vector<TStats> > thread_stats_;
// used to hold start pointer // used to hold start pointer
std::vector<std::vector<HistEntry> > thread_hist; std::vector<std::vector<HistEntry> > thread_hist_;
// node statistics // node statistics
std::vector<TStats> node_stats; std::vector<TStats> node_stats_;
// summary array // summary array
std::vector<WXQSketch::SummaryContainer> summary_array; std::vector<WXQSketch::SummaryContainer> summary_array_;
// reducer for summary // reducer for summary
rabit::SerializeReducer<WXQSketch::SummaryContainer> sreducer; rabit::SerializeReducer<WXQSketch::SummaryContainer> sreducer_;
// per node, per feature sketch // per node, per feature sketch
std::vector<common::WXQuantileSketch<bst_float, bst_float> > sketchs; std::vector<common::WXQuantileSketch<bst_float, bst_float> > sketchs_;
}; };
// global proposal // global proposal
template<typename TStats> template<typename TStats>
class GlobalProposalHistMaker: public CQHistMaker<TStats> { class GlobalProposalHistMaker: public CQHistMaker<TStats> {
protected: protected:
void ResetPosAndPropose(const std::vector<bst_gpair> &gpair, void ResetPosAndPropose(const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, DMatrix *p_fmat,
const std::vector<bst_uint> &fset, const std::vector<bst_uint> &fset,
const RegTree &tree) override { const RegTree &tree) override {
if (this->qexpand.size() == 1) { if (this->qexpand_.size() == 1) {
cached_rptr_.clear(); cached_rptr_.clear();
cached_cut_.clear(); cached_cut_.clear();
} }
if (cached_rptr_.size() == 0) { if (cached_rptr_.size() == 0) {
CHECK_EQ(this->qexpand.size(), 1U); CHECK_EQ(this->qexpand_.size(), 1U);
CQHistMaker<TStats>::ResetPosAndPropose(gpair, p_fmat, fset, tree); CQHistMaker<TStats>::ResetPosAndPropose(gpair, p_fmat, fset, tree);
cached_rptr_ = this->wspace.rptr; cached_rptr_ = this->wspace_.rptr;
cached_cut_ = this->wspace.cut; cached_cut_ = this->wspace_.cut;
} else { } else {
this->wspace.cut.clear(); this->wspace_.cut.clear();
this->wspace.rptr.clear(); this->wspace_.rptr.clear();
this->wspace.rptr.push_back(0); this->wspace_.rptr.push_back(0);
for (size_t i = 0; i < this->qexpand.size(); ++i) { for (size_t i = 0; i < this->qexpand_.size(); ++i) {
for (size_t j = 0; j < cached_rptr_.size() - 1; ++j) { for (size_t j = 0; j < cached_rptr_.size() - 1; ++j) {
this->wspace.rptr.push_back( this->wspace_.rptr.push_back(
this->wspace.rptr.back() + cached_rptr_[j + 1] - cached_rptr_[j]); this->wspace_.rptr.back() + cached_rptr_[j + 1] - cached_rptr_[j]);
} }
this->wspace.cut.insert(this->wspace.cut.end(), cached_cut_.begin(), cached_cut_.end()); this->wspace_.cut.insert(this->wspace_.cut.end(), cached_cut_.begin(), cached_cut_.end());
} }
CHECK_EQ(this->wspace.rptr.size(), CHECK_EQ(this->wspace_.rptr.size(),
(fset.size() + 1) * this->qexpand.size() + 1); (fset.size() + 1) * this->qexpand_.size() + 1);
CHECK_EQ(this->wspace.rptr.back(), this->wspace.cut.size()); CHECK_EQ(this->wspace_.rptr.back(), this->wspace_.cut.size());
} }
} }
// code to create histogram // code to create histogram
void CreateHist(const std::vector<bst_gpair> &gpair, void CreateHist(const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, DMatrix *p_fmat,
const std::vector<bst_uint> &fset, const std::vector<bst_uint> &fset,
const RegTree &tree) override { const RegTree &tree) override {
const MetaInfo &info = p_fmat->info(); const MetaInfo &info = p_fmat->Info();
// fill in reverse map // fill in reverse map
this->feat2workindex.resize(tree.param.num_feature); this->feat2workindex_.resize(tree.param.num_feature);
this->work_set = fset; this->work_set_ = fset;
std::fill(this->feat2workindex.begin(), this->feat2workindex.end(), -1); std::fill(this->feat2workindex_.begin(), this->feat2workindex_.end(), -1);
for (size_t i = 0; i < fset.size(); ++i) { for (size_t i = 0; i < fset.size(); ++i) {
this->feat2workindex[fset[i]] = static_cast<int>(i); this->feat2workindex_[fset[i]] = static_cast<int>(i);
} }
// start to work // start to work
this->wspace.Init(this->param, 1); this->wspace_.Init(this->param_, 1);
// to gain speedup in recovery // to gain speedup in recovery
{ {
this->thread_hist.resize(omp_get_max_threads()); this->thread_hist_.resize(omp_get_max_threads());
// TWOPASS: use the real set + split set in the column iteration. // TWOPASS: use the real set + split set in the column iteration.
this->SetDefaultPostion(p_fmat, tree); this->SetDefaultPostion(p_fmat, tree);
this->work_set.insert(this->work_set.end(), this->fsplit_set.begin(), this->fsplit_set.end()); this->work_set_.insert(this->work_set_.end(), this->fsplit_set_.begin(),
std::sort(this->work_set.begin(), this->work_set.end()); this->fsplit_set_.end());
this->work_set.resize( std::sort(this->work_set_.begin(), this->work_set_.end());
std::unique(this->work_set.begin(), this->work_set.end()) - this->work_set.begin()); this->work_set_.resize(
std::unique(this->work_set_.begin(), this->work_set_.end()) - this->work_set_.begin());
// start accumulating statistics // start accumulating statistics
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(this->work_set); dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(this->work_set_);
iter->BeforeFirst(); iter->BeforeFirst();
while (iter->Next()) { while (iter->Next()) {
const ColBatch &batch = iter->Value(); const ColBatch &batch = iter->Value();
// TWOPASS: use the real set + split set in the column iteration. // TWOPASS: use the real set + split set in the column iteration.
this->CorrectNonDefaultPositionByBatch(batch, this->fsplit_set, tree); this->CorrectNonDefaultPositionByBatch(batch, this->fsplit_set_, tree);
// start enumeration // start enumeration
const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size); const auto nsize = static_cast<bst_omp_uint>(batch.size);
#pragma omp parallel for schedule(dynamic, 1) #pragma omp parallel for schedule(dynamic, 1)
for (bst_omp_uint i = 0; i < nsize; ++i) { for (bst_omp_uint i = 0; i < nsize; ++i) {
int offset = this->feat2workindex[batch.col_index[i]]; int offset = this->feat2workindex_[batch.col_index[i]];
if (offset >= 0) { if (offset >= 0) {
this->UpdateHistCol(gpair, batch[i], info, tree, this->UpdateHistCol(gpair, batch[i], info, tree,
fset, offset, fset, offset,
&this->thread_hist[omp_get_thread_num()]); &this->thread_hist_[omp_get_thread_num()]);
} }
} }
} }
// update node statistics. // update node statistics.
this->GetNodeStats(gpair, *p_fmat, tree, this->GetNodeStats(gpair, *p_fmat, tree,
&(this->thread_stats), &(this->node_stats)); &(this->thread_stats_), &(this->node_stats_));
for (size_t i = 0; i < this->qexpand.size(); ++i) { for (size_t i = 0; i < this->qexpand_.size(); ++i) {
const int nid = this->qexpand[i]; const int nid = this->qexpand_[i];
const int wid = this->node2workindex[nid]; const int wid = this->node2workindex_[nid];
this->wspace.hset[0][fset.size() + wid * (fset.size()+1)] this->wspace_.hset[0][fset.size() + wid * (fset.size()+1)]
.data[0] = this->node_stats[nid]; .data[0] = this->node_stats_[nid];
} }
} }
this->histred.Allreduce(dmlc::BeginPtr(this->wspace.hset[0].data), this->histred_.Allreduce(dmlc::BeginPtr(this->wspace_.hset[0].data),
this->wspace.hset[0].data.size()); this->wspace_.hset[0].data.size());
} }
// cached unit pointer // cached unit pointer
@ -756,17 +756,17 @@ class GlobalProposalHistMaker: public CQHistMaker<TStats> {
template<typename TStats> template<typename TStats>
class QuantileHistMaker: public HistMaker<TStats> { class QuantileHistMaker: public HistMaker<TStats> {
protected: protected:
typedef common::WXQuantileSketch<bst_float, bst_float> WXQSketch; using WXQSketch = common::WXQuantileSketch<bst_float, bst_float>;
void ResetPosAndPropose(const std::vector<bst_gpair> &gpair, void ResetPosAndPropose(const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, DMatrix *p_fmat,
const std::vector <bst_uint> &fset, const std::vector <bst_uint> &fset,
const RegTree &tree) override { const RegTree &tree) override {
const MetaInfo &info = p_fmat->info(); const MetaInfo &info = p_fmat->Info();
// initialize the data structure // initialize the data structure
const int nthread = omp_get_max_threads(); const int nthread = omp_get_max_threads();
sketchs.resize(this->qexpand.size() * tree.param.num_feature); sketchs_.resize(this->qexpand_.size() * tree.param.num_feature);
for (size_t i = 0; i < sketchs.size(); ++i) { for (size_t i = 0; i < sketchs_.size(); ++i) {
sketchs[i].Init(info.num_row, this->param.sketch_eps); sketchs_[i].Init(info.num_row_, this->param_.sketch_eps);
} }
// start accumulating statistics // start accumulating statistics
dmlc::DataIter<RowBatch> *iter = p_fmat->RowIterator(); dmlc::DataIter<RowBatch> *iter = p_fmat->RowIterator();
@ -775,7 +775,7 @@ class QuantileHistMaker: public HistMaker<TStats> {
const RowBatch &batch = iter->Value(); const RowBatch &batch = iter->Value();
// parallel convert to column major format // parallel convert to column major format
common::ParallelGroupBuilder<SparseBatch::Entry> common::ParallelGroupBuilder<SparseBatch::Entry>
builder(&col_ptr, &col_data, &thread_col_ptr); builder(&col_ptr_, &col_data_, &thread_col_ptr_);
builder.InitBudget(tree.param.num_feature, nthread); builder.InitBudget(tree.param.num_feature, nthread);
const bst_omp_uint nbatch = static_cast<bst_omp_uint>(batch.size); const bst_omp_uint nbatch = static_cast<bst_omp_uint>(batch.size);
@ -783,13 +783,13 @@ class QuantileHistMaker: public HistMaker<TStats> {
for (bst_omp_uint i = 0; i < nbatch; ++i) { for (bst_omp_uint i = 0; i < nbatch; ++i) {
RowBatch::Inst inst = batch[i]; RowBatch::Inst inst = batch[i];
const bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i); const bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
int nid = this->position[ridx]; int nid = this->position_[ridx];
if (nid >= 0) { if (nid >= 0) {
if (!tree[nid].is_leaf()) { if (!tree[nid].IsLeaf()) {
this->position[ridx] = nid = HistMaker<TStats>::NextLevel(inst, tree, nid); this->position_[ridx] = nid = HistMaker<TStats>::NextLevel(inst, tree, nid);
} }
if (this->node2workindex[nid] < 0) { if (this->node2workindex_[nid] < 0) {
this->position[ridx] = ~nid; this->position_[ridx] = ~nid;
} else { } else {
for (bst_uint j = 0; j < inst.length; ++j) { for (bst_uint j = 0; j < inst.length; ++j) {
builder.AddBudget(inst[j].index, omp_get_thread_num()); builder.AddBudget(inst[j].index, omp_get_thread_num());
@ -802,7 +802,7 @@ class QuantileHistMaker: public HistMaker<TStats> {
for (bst_omp_uint i = 0; i < nbatch; ++i) { for (bst_omp_uint i = 0; i < nbatch; ++i) {
RowBatch::Inst inst = batch[i]; RowBatch::Inst inst = batch[i];
const bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i); const bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
const int nid = this->position[ridx]; const int nid = this->position_[ridx];
if (nid >= 0) { if (nid >= 0) {
for (bst_uint j = 0; j < inst.length; ++j) { for (bst_uint j = 0; j < inst.length; ++j) {
builder.Push(inst[j].index, builder.Push(inst[j].index,
@ -812,71 +812,71 @@ class QuantileHistMaker: public HistMaker<TStats> {
} }
} }
// start putting things into sketch // start putting things into sketch
const bst_omp_uint nfeat = col_ptr.size() - 1; const bst_omp_uint nfeat = col_ptr_.size() - 1;
#pragma omp parallel for schedule(dynamic, 1) #pragma omp parallel for schedule(dynamic, 1)
for (bst_omp_uint k = 0; k < nfeat; ++k) { for (bst_omp_uint k = 0; k < nfeat; ++k) {
for (size_t i = col_ptr[k]; i < col_ptr[k+1]; ++i) { for (size_t i = col_ptr_[k]; i < col_ptr_[k+1]; ++i) {
const SparseBatch::Entry &e = col_data[i]; const SparseBatch::Entry &e = col_data_[i];
const int wid = this->node2workindex[e.index]; const int wid = this->node2workindex_[e.index];
sketchs[wid * tree.param.num_feature + k].Push(e.fvalue, gpair[e.index].GetHess()); sketchs_[wid * tree.param.num_feature + k].Push(e.fvalue, gpair[e.index].GetHess());
} }
} }
} }
// setup maximum size // setup maximum size
unsigned max_size = this->param.max_sketch_size(); unsigned max_size = this->param_.MaxSketchSize();
// synchronize sketch // synchronize sketch
summary_array.resize(sketchs.size()); summary_array_.resize(sketchs_.size());
for (size_t i = 0; i < sketchs.size(); ++i) { for (size_t i = 0; i < sketchs_.size(); ++i) {
common::WQuantileSketch<bst_float, bst_float>::SummaryContainer out; common::WQuantileSketch<bst_float, bst_float>::SummaryContainer out;
sketchs[i].GetSummary(&out); sketchs_[i].GetSummary(&out);
summary_array[i].Reserve(max_size); summary_array_[i].Reserve(max_size);
summary_array[i].SetPrune(out, max_size); summary_array_[i].SetPrune(out, max_size);
} }
size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size); size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size);
sreducer.Allreduce(dmlc::BeginPtr(summary_array), nbytes, summary_array.size()); sreducer_.Allreduce(dmlc::BeginPtr(summary_array_), nbytes, summary_array_.size());
// now we get the final result of sketch, setup the cut // now we get the final result of sketch, setup the cut
this->wspace.cut.clear(); this->wspace_.cut.clear();
this->wspace.rptr.clear(); this->wspace_.rptr.clear();
this->wspace.rptr.push_back(0); this->wspace_.rptr.push_back(0);
for (size_t wid = 0; wid < this->qexpand.size(); ++wid) { for (size_t wid = 0; wid < this->qexpand_.size(); ++wid) {
for (int fid = 0; fid < tree.param.num_feature; ++fid) { for (int fid = 0; fid < tree.param.num_feature; ++fid) {
const WXQSketch::Summary &a = summary_array[wid * tree.param.num_feature + fid]; const WXQSketch::Summary &a = summary_array_[wid * tree.param.num_feature + fid];
for (size_t i = 1; i < a.size; ++i) { for (size_t i = 1; i < a.size; ++i) {
bst_float cpt = a.data[i].value - rt_eps; bst_float cpt = a.data[i].value - kRtEps;
if (i == 1 || cpt > this->wspace.cut.back()) { if (i == 1 || cpt > this->wspace_.cut.back()) {
this->wspace.cut.push_back(cpt); this->wspace_.cut.push_back(cpt);
} }
} }
// push a value that is greater than anything // push a value that is greater than anything
if (a.size != 0) { if (a.size != 0) {
bst_float cpt = a.data[a.size - 1].value; bst_float cpt = a.data[a.size - 1].value;
// this must be bigger than last value in a scale // this must be bigger than last value in a scale
bst_float last = cpt + fabs(cpt) + rt_eps; bst_float last = cpt + fabs(cpt) + kRtEps;
this->wspace.cut.push_back(last); this->wspace_.cut.push_back(last);
} }
this->wspace.rptr.push_back(this->wspace.cut.size()); this->wspace_.rptr.push_back(this->wspace_.cut.size());
} }
// reserve last value for global statistics // reserve last value for global statistics
this->wspace.cut.push_back(0.0f); this->wspace_.cut.push_back(0.0f);
this->wspace.rptr.push_back(this->wspace.cut.size()); this->wspace_.rptr.push_back(this->wspace_.cut.size());
} }
CHECK_EQ(this->wspace.rptr.size(), CHECK_EQ(this->wspace_.rptr.size(),
(tree.param.num_feature + 1) * this->qexpand.size() + 1); (tree.param.num_feature + 1) * this->qexpand_.size() + 1);
} }
private: private:
// summary array // summary array
std::vector<WXQSketch::SummaryContainer> summary_array; std::vector<WXQSketch::SummaryContainer> summary_array_;
// reducer for summary // reducer for summary
rabit::SerializeReducer<WXQSketch::SummaryContainer> sreducer; rabit::SerializeReducer<WXQSketch::SummaryContainer> sreducer_;
// local temp column data structure // local temp column data structure
std::vector<size_t> col_ptr; std::vector<size_t> col_ptr_;
// local storage of column data // local storage of column data
std::vector<SparseBatch::Entry> col_data; std::vector<SparseBatch::Entry> col_data_;
std::vector<std::vector<size_t> > thread_col_ptr; std::vector<std::vector<size_t> > thread_col_ptr_;
// per node, per feature sketch // per node, per feature sketch
std::vector<common::WQuantileSketch<bst_float, bst_float> > sketchs; std::vector<common::WQuantileSketch<bst_float, bst_float> > sketchs_;
}; };
XGBOOST_REGISTER_TREE_UPDATER(LocalHistMaker, "grow_local_histmaker") XGBOOST_REGISTER_TREE_UPDATER(LocalHistMaker, "grow_local_histmaker")

View File

@ -21,37 +21,37 @@ DMLC_REGISTRY_FILE_TAG(updater_prune);
class TreePruner: public TreeUpdater { class TreePruner: public TreeUpdater {
public: public:
TreePruner() { TreePruner() {
syncher.reset(TreeUpdater::Create("sync")); syncher_.reset(TreeUpdater::Create("sync"));
} }
// set training parameter // set training parameter
void Init(const std::vector<std::pair<std::string, std::string> >& args) override { void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
param.InitAllowUnknown(args); param_.InitAllowUnknown(args);
syncher->Init(args); syncher_->Init(args);
} }
// update the tree, do pruning // update the tree, do pruning
void Update(HostDeviceVector<bst_gpair> *gpair, void Update(HostDeviceVector<GradientPair> *gpair,
DMatrix *p_fmat, DMatrix *p_fmat,
const std::vector<RegTree*> &trees) override { const std::vector<RegTree*> &trees) override {
// rescale learning rate according to size of trees // rescale learning rate according to size of trees
float lr = param.learning_rate; float lr = param_.learning_rate;
param.learning_rate = lr / trees.size(); param_.learning_rate = lr / trees.size();
for (size_t i = 0; i < trees.size(); ++i) { for (auto tree : trees) {
this->DoPrune(*trees[i]); this->DoPrune(*tree);
} }
param.learning_rate = lr; param_.learning_rate = lr;
syncher->Update(gpair, p_fmat, trees); syncher_->Update(gpair, p_fmat, trees);
} }
private: private:
// try to prune off current leaf // try to prune off current leaf
inline int TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) { // NOLINT(*) inline int TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) { // NOLINT(*)
if (tree[nid].is_root()) return npruned; if (tree[nid].IsRoot()) return npruned;
int pid = tree[nid].parent(); int pid = tree[nid].Parent();
RegTree::NodeStat &s = tree.stat(pid); RegTree::NodeStat &s = tree.Stat(pid);
++s.leaf_child_cnt; ++s.leaf_child_cnt;
if (s.leaf_child_cnt >= 2 && param.need_prune(s.loss_chg, depth - 1)) { if (s.leaf_child_cnt >= 2 && param_.NeedPrune(s.loss_chg, depth - 1)) {
// need to be pruned // need to be pruned
tree.ChangeToLeaf(pid, param.learning_rate * s.base_weight); tree.ChangeToLeaf(pid, param_.learning_rate * s.base_weight);
// tail recursion // tail recursion
return this->TryPruneLeaf(tree, pid, depth - 1, npruned + 2); return this->TryPruneLeaf(tree, pid, depth - 1, npruned + 2);
} else { } else {
@ -63,25 +63,25 @@ class TreePruner: public TreeUpdater {
int npruned = 0; int npruned = 0;
// initialize auxiliary statistics // initialize auxiliary statistics
for (int nid = 0; nid < tree.param.num_nodes; ++nid) { for (int nid = 0; nid < tree.param.num_nodes; ++nid) {
tree.stat(nid).leaf_child_cnt = 0; tree.Stat(nid).leaf_child_cnt = 0;
} }
for (int nid = 0; nid < tree.param.num_nodes; ++nid) { for (int nid = 0; nid < tree.param.num_nodes; ++nid) {
if (tree[nid].is_leaf()) { if (tree[nid].IsLeaf()) {
npruned = this->TryPruneLeaf(tree, nid, tree.GetDepth(nid), npruned); npruned = this->TryPruneLeaf(tree, nid, tree.GetDepth(nid), npruned);
} }
} }
if (!param.silent) { if (!param_.silent) {
LOG(INFO) << "tree pruning end, " << tree.param.num_roots << " roots, " LOG(INFO) << "tree pruning end, " << tree.param.num_roots << " roots, "
<< tree.num_extra_nodes() << " extra nodes, " << npruned << tree.NumExtraNodes() << " extra nodes, " << npruned
<< " pruned nodes, max_depth=" << tree.MaxDepth(); << " pruned nodes, max_depth=" << tree.MaxDepth();
} }
} }
private: private:
// synchronizer // synchronizer
std::unique_ptr<TreeUpdater> syncher; std::unique_ptr<TreeUpdater> syncher_;
// training parameter // training parameter
TrainParam param; TrainParam param_;
}; };
XGBOOST_REGISTER_TREE_UPDATER(TreePruner, "prune") XGBOOST_REGISTER_TREE_UPDATER(TreePruner, "prune")

View File

@ -22,14 +22,14 @@ template<typename TStats>
class TreeRefresher: public TreeUpdater { class TreeRefresher: public TreeUpdater {
public: public:
void Init(const std::vector<std::pair<std::string, std::string> >& args) override { void Init(const std::vector<std::pair<std::string, std::string> >& args) override {
param.InitAllowUnknown(args); param_.InitAllowUnknown(args);
} }
// update the tree, do pruning // update the tree, do pruning
void Update(HostDeviceVector<bst_gpair> *gpair, void Update(HostDeviceVector<GradientPair> *gpair,
DMatrix *p_fmat, DMatrix *p_fmat,
const std::vector<RegTree*> &trees) override { const std::vector<RegTree*> &trees) override {
if (trees.size() == 0) return; if (trees.size() == 0) return;
std::vector<bst_gpair> &gpair_h = gpair->data_h(); std::vector<GradientPair> &gpair_h = gpair->HostVector();
// number of threads // number of threads
// thread temporal space // thread temporal space
std::vector<std::vector<TStats> > stemp; std::vector<std::vector<TStats> > stemp;
@ -42,11 +42,11 @@ class TreeRefresher: public TreeUpdater {
{ {
int tid = omp_get_thread_num(); int tid = omp_get_thread_num();
int num_nodes = 0; int num_nodes = 0;
for (size_t i = 0; i < trees.size(); ++i) { for (auto tree : trees) {
num_nodes += trees[i]->param.num_nodes; num_nodes += tree->param.num_nodes;
} }
stemp[tid].resize(num_nodes, TStats(param)); stemp[tid].resize(num_nodes, TStats(param_));
std::fill(stemp[tid].begin(), stemp[tid].end(), TStats(param)); std::fill(stemp[tid].begin(), stemp[tid].end(), TStats(param_));
fvec_temp[tid].Init(trees[0]->param.num_feature); fvec_temp[tid].Init(trees[0]->param.num_feature);
} }
// if it is C++11, use lazy evaluation for Allreduce, // if it is C++11, use lazy evaluation for Allreduce,
@ -55,32 +55,32 @@ class TreeRefresher: public TreeUpdater {
auto lazy_get_stats = [&]() auto lazy_get_stats = [&]()
#endif #endif
{ {
const MetaInfo &info = p_fmat->info(); const MetaInfo &info = p_fmat->Info();
// start accumulating statistics // start accumulating statistics
dmlc::DataIter<RowBatch> *iter = p_fmat->RowIterator(); dmlc::DataIter<RowBatch> *iter = p_fmat->RowIterator();
iter->BeforeFirst(); iter->BeforeFirst();
while (iter->Next()) { while (iter->Next()) {
const RowBatch &batch = iter->Value(); const RowBatch &batch = iter->Value();
CHECK_LT(batch.size, std::numeric_limits<unsigned>::max()); CHECK_LT(batch.size, std::numeric_limits<unsigned>::max());
const bst_omp_uint nbatch = static_cast<bst_omp_uint>(batch.size); const auto nbatch = static_cast<bst_omp_uint>(batch.size);
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nbatch; ++i) { for (bst_omp_uint i = 0; i < nbatch; ++i) {
RowBatch::Inst inst = batch[i]; RowBatch::Inst inst = batch[i];
const int tid = omp_get_thread_num(); const int tid = omp_get_thread_num();
const bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i); const auto ridx = static_cast<bst_uint>(batch.base_rowid + i);
RegTree::FVec &feats = fvec_temp[tid]; RegTree::FVec &feats = fvec_temp[tid];
feats.Fill(inst); feats.Fill(inst);
int offset = 0; int offset = 0;
for (size_t j = 0; j < trees.size(); ++j) { for (auto tree : trees) {
AddStats(*trees[j], feats, gpair_h, info, ridx, AddStats(*tree, feats, gpair_h, info, ridx,
dmlc::BeginPtr(stemp[tid]) + offset); dmlc::BeginPtr(stemp[tid]) + offset);
offset += trees[j]->param.num_nodes; offset += tree->param.num_nodes;
} }
feats.Drop(inst); feats.Drop(inst);
} }
} }
// aggregate the statistics // aggregate the statistics
int num_nodes = static_cast<int>(stemp[0].size()); auto num_nodes = static_cast<int>(stemp[0].size());
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (int nid = 0; nid < num_nodes; ++nid) { for (int nid = 0; nid < num_nodes; ++nid) {
for (int tid = 1; tid < nthread; ++tid) { for (int tid = 1; tid < nthread; ++tid) {
@ -89,64 +89,64 @@ class TreeRefresher: public TreeUpdater {
} }
}; };
#if __cplusplus >= 201103L #if __cplusplus >= 201103L
reducer.Allreduce(dmlc::BeginPtr(stemp[0]), stemp[0].size(), lazy_get_stats); reducer_.Allreduce(dmlc::BeginPtr(stemp[0]), stemp[0].size(), lazy_get_stats);
#else #else
reducer.Allreduce(dmlc::BeginPtr(stemp[0]), stemp[0].size()); reducer_.Allreduce(dmlc::BeginPtr(stemp[0]), stemp[0].size());
#endif #endif
// rescale learning rate according to size of trees // rescale learning rate according to size of trees
float lr = param.learning_rate; float lr = param_.learning_rate;
param.learning_rate = lr / trees.size(); param_.learning_rate = lr / trees.size();
int offset = 0; int offset = 0;
for (size_t i = 0; i < trees.size(); ++i) { for (auto tree : trees) {
for (int rid = 0; rid < trees[i]->param.num_roots; ++rid) { for (int rid = 0; rid < tree->param.num_roots; ++rid) {
this->Refresh(dmlc::BeginPtr(stemp[0]) + offset, rid, trees[i]); this->Refresh(dmlc::BeginPtr(stemp[0]) + offset, rid, tree);
} }
offset += trees[i]->param.num_nodes; offset += tree->param.num_nodes;
} }
// set learning rate back // set learning rate back
param.learning_rate = lr; param_.learning_rate = lr;
} }
private: private:
inline static void AddStats(const RegTree &tree, inline static void AddStats(const RegTree &tree,
const RegTree::FVec &feat, const RegTree::FVec &feat,
const std::vector<bst_gpair> &gpair, const std::vector<GradientPair> &gpair,
const MetaInfo &info, const MetaInfo &info,
const bst_uint ridx, const bst_uint ridx,
TStats *gstats) { TStats *gstats) {
// start from groups that belongs to current data // start from groups that belongs to current data
int pid = static_cast<int>(info.GetRoot(ridx)); auto pid = static_cast<int>(info.GetRoot(ridx));
gstats[pid].Add(gpair, info, ridx); gstats[pid].Add(gpair, info, ridx);
// tranverse tree // tranverse tree
while (!tree[pid].is_leaf()) { while (!tree[pid].IsLeaf()) {
unsigned split_index = tree[pid].split_index(); unsigned split_index = tree[pid].SplitIndex();
pid = tree.GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index)); pid = tree.GetNext(pid, feat.Fvalue(split_index), feat.IsMissing(split_index));
gstats[pid].Add(gpair, info, ridx); gstats[pid].Add(gpair, info, ridx);
} }
} }
inline void Refresh(const TStats *gstats, inline void Refresh(const TStats *gstats,
int nid, RegTree *p_tree) { int nid, RegTree *p_tree) {
RegTree &tree = *p_tree; RegTree &tree = *p_tree;
tree.stat(nid).base_weight = static_cast<bst_float>(gstats[nid].CalcWeight(param)); tree.Stat(nid).base_weight = static_cast<bst_float>(gstats[nid].CalcWeight(param_));
tree.stat(nid).sum_hess = static_cast<bst_float>(gstats[nid].sum_hess); tree.Stat(nid).sum_hess = static_cast<bst_float>(gstats[nid].sum_hess);
gstats[nid].SetLeafVec(param, tree.leafvec(nid)); gstats[nid].SetLeafVec(param_, tree.Leafvec(nid));
if (tree[nid].is_leaf()) { if (tree[nid].IsLeaf()) {
if (param.refresh_leaf) { if (param_.refresh_leaf) {
tree[nid].set_leaf(tree.stat(nid).base_weight * param.learning_rate); tree[nid].SetLeaf(tree.Stat(nid).base_weight * param_.learning_rate);
} }
} else { } else {
tree.stat(nid).loss_chg = static_cast<bst_float>( tree.Stat(nid).loss_chg = static_cast<bst_float>(
gstats[tree[nid].cleft()].CalcGain(param) + gstats[tree[nid].LeftChild()].CalcGain(param_) +
gstats[tree[nid].cright()].CalcGain(param) - gstats[tree[nid].RightChild()].CalcGain(param_) -
gstats[nid].CalcGain(param)); gstats[nid].CalcGain(param_));
this->Refresh(gstats, tree[nid].cleft(), p_tree); this->Refresh(gstats, tree[nid].LeftChild(), p_tree);
this->Refresh(gstats, tree[nid].cright(), p_tree); this->Refresh(gstats, tree[nid].RightChild(), p_tree);
} }
} }
// training parameter // training parameter
TrainParam param; TrainParam param_;
// reducer // reducer
rabit::Reducer<TStats, TStats::Reduce> reducer; rabit::Reducer<TStats, TStats::Reduce> reducer_;
}; };
XGBOOST_REGISTER_TREE_UPDATER(TreeRefresher, "refresh") XGBOOST_REGISTER_TREE_UPDATER(TreeRefresher, "refresh")

View File

@ -22,58 +22,57 @@ DMLC_REGISTRY_FILE_TAG(updater_skmaker);
class SketchMaker: public BaseMaker { class SketchMaker: public BaseMaker {
public: public:
void Update(HostDeviceVector<bst_gpair> *gpair, void Update(HostDeviceVector<GradientPair> *gpair,
DMatrix *p_fmat, DMatrix *p_fmat,
const std::vector<RegTree*> &trees) override { const std::vector<RegTree*> &trees) override {
// rescale learning rate according to size of trees // rescale learning rate according to size of trees
float lr = param.learning_rate; float lr = param_.learning_rate;
param.learning_rate = lr / trees.size(); param_.learning_rate = lr / trees.size();
// build tree // build tree
for (size_t i = 0; i < trees.size(); ++i) { for (auto tree : trees) {
this->Update(gpair->data_h(), p_fmat, trees[i]); this->Update(gpair->HostVector(), p_fmat, tree);
} }
param.learning_rate = lr; param_.learning_rate = lr;
} }
protected: protected:
inline void Update(const std::vector<bst_gpair> &gpair, inline void Update(const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, DMatrix *p_fmat,
RegTree *p_tree) { RegTree *p_tree) {
this->InitData(gpair, *p_fmat, *p_tree); this->InitData(gpair, *p_fmat, *p_tree);
for (int depth = 0; depth < param.max_depth; ++depth) { for (int depth = 0; depth < param_.max_depth; ++depth) {
this->GetNodeStats(gpair, *p_fmat, *p_tree, this->GetNodeStats(gpair, *p_fmat, *p_tree,
&thread_stats, &node_stats); &thread_stats_, &node_stats_);
this->BuildSketch(gpair, p_fmat, *p_tree); this->BuildSketch(gpair, p_fmat, *p_tree);
this->SyncNodeStats(); this->SyncNodeStats();
this->FindSplit(depth, gpair, p_fmat, p_tree); this->FindSplit(depth, gpair, p_fmat, p_tree);
this->ResetPositionCol(qexpand, p_fmat, *p_tree); this->ResetPositionCol(qexpand_, p_fmat, *p_tree);
this->UpdateQueueExpand(*p_tree); this->UpdateQueueExpand(*p_tree);
// if nothing left to be expand, break // if nothing left to be expand, break
if (qexpand.size() == 0) break; if (qexpand_.size() == 0) break;
} }
if (qexpand.size() != 0) { if (qexpand_.size() != 0) {
this->GetNodeStats(gpair, *p_fmat, *p_tree, this->GetNodeStats(gpair, *p_fmat, *p_tree,
&thread_stats, &node_stats); &thread_stats_, &node_stats_);
this->SyncNodeStats(); this->SyncNodeStats();
} }
// set all statistics correctly // set all statistics correctly
for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) { for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) {
this->SetStats(nid, node_stats[nid], p_tree); this->SetStats(nid, node_stats_[nid], p_tree);
if (!(*p_tree)[nid].is_leaf()) { if (!(*p_tree)[nid].IsLeaf()) {
p_tree->stat(nid).loss_chg = static_cast<bst_float>( p_tree->Stat(nid).loss_chg = static_cast<bst_float>(
node_stats[(*p_tree)[nid].cleft()].CalcGain(param) + node_stats_[(*p_tree)[nid].LeftChild()].CalcGain(param_) +
node_stats[(*p_tree)[nid].cright()].CalcGain(param) - node_stats_[(*p_tree)[nid].RightChild()].CalcGain(param_) -
node_stats[nid].CalcGain(param)); node_stats_[nid].CalcGain(param_));
} }
} }
// set left leaves // set left leaves
for (size_t i = 0; i < qexpand.size(); ++i) { for (int nid : qexpand_) {
const int nid = qexpand[i]; (*p_tree)[nid].SetLeaf(p_tree->Stat(nid).base_weight * param_.learning_rate);
(*p_tree)[nid].set_leaf(p_tree->stat(nid).base_weight * param.learning_rate);
} }
} }
// define the sketch we want to use // define the sketch we want to use
typedef common::WXQuantileSketch<bst_float, bst_float> WXQSketch; using WXQSketch = common::WXQuantileSketch<bst_float, bst_float>;
private: private:
// statistics needed in the gradient calculation // statistics needed in the gradient calculation
@ -84,20 +83,20 @@ class SketchMaker: public BaseMaker {
double neg_grad; double neg_grad;
/*! \brief sum of hessian statistics */ /*! \brief sum of hessian statistics */
double sum_hess; double sum_hess;
SKStats(void) {} SKStats() = default;
// constructor // constructor
explicit SKStats(const TrainParam &param) { explicit SKStats(const TrainParam &param) {
this->Clear(); this->Clear();
} }
/*! \brief clear the statistics */ /*! \brief clear the statistics */
inline void Clear(void) { inline void Clear() {
neg_grad = pos_grad = sum_hess = 0.0f; neg_grad = pos_grad = sum_hess = 0.0f;
} }
// accumulate statistics // accumulate statistics
inline void Add(const std::vector<bst_gpair> &gpair, inline void Add(const std::vector<GradientPair> &gpair,
const MetaInfo &info, const MetaInfo &info,
bst_uint ridx) { bst_uint ridx) {
const bst_gpair &b = gpair[ridx]; const GradientPair &b = gpair[ridx];
if (b.GetGrad() >= 0.0f) { if (b.GetGrad() >= 0.0f) {
pos_grad += b.GetGrad(); pos_grad += b.GetGrad();
} else { } else {
@ -133,48 +132,48 @@ class SketchMaker: public BaseMaker {
inline void SetLeafVec(const TrainParam &param, bst_float *vec) const { inline void SetLeafVec(const TrainParam &param, bst_float *vec) const {
} }
}; };
inline void BuildSketch(const std::vector<bst_gpair> &gpair, inline void BuildSketch(const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, DMatrix *p_fmat,
const RegTree &tree) { const RegTree &tree) {
const MetaInfo& info = p_fmat->info(); const MetaInfo& info = p_fmat->Info();
sketchs.resize(this->qexpand.size() * tree.param.num_feature * 3); sketchs_.resize(this->qexpand_.size() * tree.param.num_feature * 3);
for (size_t i = 0; i < sketchs.size(); ++i) { for (auto & sketch : sketchs_) {
sketchs[i].Init(info.num_row, this->param.sketch_eps); sketch.Init(info.num_row_, this->param_.sketch_eps);
} }
thread_sketch.resize(omp_get_max_threads()); thread_sketch_.resize(omp_get_max_threads());
// number of rows in // number of rows in
const size_t nrows = p_fmat->buffered_rowset().size(); const size_t nrows = p_fmat->BufferedRowset().Size();
// start accumulating statistics // start accumulating statistics
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(); dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator();
iter->BeforeFirst(); iter->BeforeFirst();
while (iter->Next()) { while (iter->Next()) {
const ColBatch &batch = iter->Value(); const ColBatch &batch = iter->Value();
// start enumeration // start enumeration
const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size); const auto nsize = static_cast<bst_omp_uint>(batch.size);
#pragma omp parallel for schedule(dynamic, 1) #pragma omp parallel for schedule(dynamic, 1)
for (bst_omp_uint i = 0; i < nsize; ++i) { for (bst_omp_uint i = 0; i < nsize; ++i) {
this->UpdateSketchCol(gpair, batch[i], tree, this->UpdateSketchCol(gpair, batch[i], tree,
node_stats, node_stats_,
batch.col_index[i], batch.col_index[i],
batch[i].length == nrows, batch[i].length == nrows,
&thread_sketch[omp_get_thread_num()]); &thread_sketch_[omp_get_thread_num()]);
} }
} }
// setup maximum size // setup maximum size
unsigned max_size = param.max_sketch_size(); unsigned max_size = param_.MaxSketchSize();
// synchronize sketch // synchronize sketch
summary_array.resize(sketchs.size()); summary_array_.resize(sketchs_.size());
for (size_t i = 0; i < sketchs.size(); ++i) { for (size_t i = 0; i < sketchs_.size(); ++i) {
common::WXQuantileSketch<bst_float, bst_float>::SummaryContainer out; common::WXQuantileSketch<bst_float, bst_float>::SummaryContainer out;
sketchs[i].GetSummary(&out); sketchs_[i].GetSummary(&out);
summary_array[i].Reserve(max_size); summary_array_[i].Reserve(max_size);
summary_array[i].SetPrune(out, max_size); summary_array_[i].SetPrune(out, max_size);
} }
size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size); size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size);
sketch_reducer.Allreduce(dmlc::BeginPtr(summary_array), nbytes, summary_array.size()); sketch_reducer_.Allreduce(dmlc::BeginPtr(summary_array_), nbytes, summary_array_.size());
} }
// update sketch information in column fid // update sketch information in column fid
inline void UpdateSketchCol(const std::vector<bst_gpair> &gpair, inline void UpdateSketchCol(const std::vector<GradientPair> &gpair,
const ColBatch::Inst &c, const ColBatch::Inst &c,
const RegTree &tree, const RegTree &tree,
const std::vector<SKStats> &nstats, const std::vector<SKStats> &nstats,
@ -185,20 +184,19 @@ class SketchMaker: public BaseMaker {
// initialize sbuilder for use // initialize sbuilder for use
std::vector<SketchEntry> &sbuilder = *p_temp; std::vector<SketchEntry> &sbuilder = *p_temp;
sbuilder.resize(tree.param.num_nodes * 3); sbuilder.resize(tree.param.num_nodes * 3);
for (size_t i = 0; i < this->qexpand.size(); ++i) { for (unsigned int nid : this->qexpand_) {
const unsigned nid = this->qexpand[i]; const unsigned wid = this->node2workindex_[nid];
const unsigned wid = this->node2workindex[nid];
for (int k = 0; k < 3; ++k) { for (int k = 0; k < 3; ++k) {
sbuilder[3 * nid + k].sum_total = 0.0f; sbuilder[3 * nid + k].sum_total = 0.0f;
sbuilder[3 * nid + k].sketch = &sketchs[(wid * tree.param.num_feature + fid) * 3 + k]; sbuilder[3 * nid + k].sketch = &sketchs_[(wid * tree.param.num_feature + fid) * 3 + k];
} }
} }
if (!col_full) { if (!col_full) {
for (bst_uint j = 0; j < c.length; ++j) { for (bst_uint j = 0; j < c.length; ++j) {
const bst_uint ridx = c[j].index; const bst_uint ridx = c[j].index;
const int nid = this->position[ridx]; const int nid = this->position_[ridx];
if (nid >= 0) { if (nid >= 0) {
const bst_gpair &e = gpair[ridx]; const GradientPair &e = gpair[ridx];
if (e.GetGrad() >= 0.0f) { if (e.GetGrad() >= 0.0f) {
sbuilder[3 * nid + 0].sum_total += e.GetGrad(); sbuilder[3 * nid + 0].sum_total += e.GetGrad();
} else { } else {
@ -208,8 +206,7 @@ class SketchMaker: public BaseMaker {
} }
} }
} else { } else {
for (size_t i = 0; i < this->qexpand.size(); ++i) { for (unsigned int nid : this->qexpand_) {
const unsigned nid = this->qexpand[i];
sbuilder[3 * nid + 0].sum_total = static_cast<bst_float>(nstats[nid].pos_grad); sbuilder[3 * nid + 0].sum_total = static_cast<bst_float>(nstats[nid].pos_grad);
sbuilder[3 * nid + 1].sum_total = static_cast<bst_float>(nstats[nid].neg_grad); sbuilder[3 * nid + 1].sum_total = static_cast<bst_float>(nstats[nid].neg_grad);
sbuilder[3 * nid + 2].sum_total = static_cast<bst_float>(nstats[nid].sum_hess); sbuilder[3 * nid + 2].sum_total = static_cast<bst_float>(nstats[nid].sum_hess);
@ -217,8 +214,7 @@ class SketchMaker: public BaseMaker {
} }
// if only one value, no need to do second pass // if only one value, no need to do second pass
if (c[0].fvalue == c[c.length-1].fvalue) { if (c[0].fvalue == c[c.length-1].fvalue) {
for (size_t i = 0; i < this->qexpand.size(); ++i) { for (int nid : this->qexpand_) {
const int nid = this->qexpand[i];
for (int k = 0; k < 3; ++k) { for (int k = 0; k < 3; ++k) {
sbuilder[3 * nid + k].sketch->Push(c[0].fvalue, sbuilder[3 * nid + k].sketch->Push(c[0].fvalue,
static_cast<bst_float>( static_cast<bst_float>(
@ -228,9 +224,8 @@ class SketchMaker: public BaseMaker {
return; return;
} }
// two pass scan // two pass scan
unsigned max_size = param.max_sketch_size(); unsigned max_size = param_.MaxSketchSize();
for (size_t i = 0; i < this->qexpand.size(); ++i) { for (int nid : this->qexpand_) {
const int nid = this->qexpand[i];
for (int k = 0; k < 3; ++k) { for (int k = 0; k < 3; ++k) {
sbuilder[3 * nid + k].Init(max_size); sbuilder[3 * nid + k].Init(max_size);
} }
@ -238,9 +233,9 @@ class SketchMaker: public BaseMaker {
// second pass, build the sketch // second pass, build the sketch
for (bst_uint j = 0; j < c.length; ++j) { for (bst_uint j = 0; j < c.length; ++j) {
const bst_uint ridx = c[j].index; const bst_uint ridx = c[j].index;
const int nid = this->position[ridx]; const int nid = this->position_[ridx];
if (nid >= 0) { if (nid >= 0) {
const bst_gpair &e = gpair[ridx]; const GradientPair &e = gpair[ridx];
if (e.GetGrad() >= 0.0f) { if (e.GetGrad() >= 0.0f) {
sbuilder[3 * nid + 0].Push(c[j].fvalue, e.GetGrad(), max_size); sbuilder[3 * nid + 0].Push(c[j].fvalue, e.GetGrad(), max_size);
} else { } else {
@ -249,70 +244,69 @@ class SketchMaker: public BaseMaker {
sbuilder[3 * nid + 2].Push(c[j].fvalue, e.GetHess(), max_size); sbuilder[3 * nid + 2].Push(c[j].fvalue, e.GetHess(), max_size);
} }
} }
for (size_t i = 0; i < this->qexpand.size(); ++i) { for (int nid : this->qexpand_) {
const int nid = this->qexpand[i];
for (int k = 0; k < 3; ++k) { for (int k = 0; k < 3; ++k) {
sbuilder[3 * nid + k].Finalize(max_size); sbuilder[3 * nid + k].Finalize(max_size);
} }
} }
} }
inline void SyncNodeStats(void) { inline void SyncNodeStats() {
CHECK_NE(qexpand.size(), 0U); CHECK_NE(qexpand_.size(), 0U);
std::vector<SKStats> tmp(qexpand.size()); std::vector<SKStats> tmp(qexpand_.size());
for (size_t i = 0; i < qexpand.size(); ++i) { for (size_t i = 0; i < qexpand_.size(); ++i) {
tmp[i] = node_stats[qexpand[i]]; tmp[i] = node_stats_[qexpand_[i]];
} }
stats_reducer.Allreduce(dmlc::BeginPtr(tmp), tmp.size()); stats_reducer_.Allreduce(dmlc::BeginPtr(tmp), tmp.size());
for (size_t i = 0; i < qexpand.size(); ++i) { for (size_t i = 0; i < qexpand_.size(); ++i) {
node_stats[qexpand[i]] = tmp[i]; node_stats_[qexpand_[i]] = tmp[i];
} }
} }
inline void FindSplit(int depth, inline void FindSplit(int depth,
const std::vector<bst_gpair> &gpair, const std::vector<GradientPair> &gpair,
DMatrix *p_fmat, DMatrix *p_fmat,
RegTree *p_tree) { RegTree *p_tree) {
const bst_uint num_feature = p_tree->param.num_feature; const bst_uint num_feature = p_tree->param.num_feature;
// get the best split condition for each node // get the best split condition for each node
std::vector<SplitEntry> sol(qexpand.size()); std::vector<SplitEntry> sol(qexpand_.size());
bst_omp_uint nexpand = static_cast<bst_omp_uint>(qexpand.size()); auto nexpand = static_cast<bst_omp_uint>(qexpand_.size());
#pragma omp parallel for schedule(dynamic, 1) #pragma omp parallel for schedule(dynamic, 1)
for (bst_omp_uint wid = 0; wid < nexpand; ++wid) { for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
const int nid = qexpand[wid]; const int nid = qexpand_[wid];
CHECK_EQ(node2workindex[nid], static_cast<int>(wid)); CHECK_EQ(node2workindex_[nid], static_cast<int>(wid));
SplitEntry &best = sol[wid]; SplitEntry &best = sol[wid];
for (bst_uint fid = 0; fid < num_feature; ++fid) { for (bst_uint fid = 0; fid < num_feature; ++fid) {
unsigned base = (wid * p_tree->param.num_feature + fid) * 3; unsigned base = (wid * p_tree->param.num_feature + fid) * 3;
EnumerateSplit(summary_array[base + 0], EnumerateSplit(summary_array_[base + 0],
summary_array[base + 1], summary_array_[base + 1],
summary_array[base + 2], summary_array_[base + 2],
node_stats[nid], fid, &best); node_stats_[nid], fid, &best);
} }
} }
// get the best result, we can synchronize the solution // get the best result, we can synchronize the solution
for (bst_omp_uint wid = 0; wid < nexpand; ++wid) { for (bst_omp_uint wid = 0; wid < nexpand; ++wid) {
const int nid = qexpand[wid]; const int nid = qexpand_[wid];
const SplitEntry &best = sol[wid]; const SplitEntry &best = sol[wid];
// set up the values // set up the values
p_tree->stat(nid).loss_chg = best.loss_chg; p_tree->Stat(nid).loss_chg = best.loss_chg;
this->SetStats(nid, node_stats[nid], p_tree); this->SetStats(nid, node_stats_[nid], p_tree);
// now we know the solution in snode[nid], set split // now we know the solution in snode[nid], set split
if (best.loss_chg > rt_eps) { if (best.loss_chg > kRtEps) {
p_tree->AddChilds(nid); p_tree->AddChilds(nid);
(*p_tree)[nid].set_split(best.split_index(), (*p_tree)[nid].SetSplit(best.SplitIndex(),
best.split_value, best.default_left()); best.split_value, best.DefaultLeft());
// mark right child as 0, to indicate fresh leaf // mark right child as 0, to indicate fresh leaf
(*p_tree)[(*p_tree)[nid].cleft()].set_leaf(0.0f, 0); (*p_tree)[(*p_tree)[nid].LeftChild()].SetLeaf(0.0f, 0);
(*p_tree)[(*p_tree)[nid].cright()].set_leaf(0.0f, 0); (*p_tree)[(*p_tree)[nid].RightChild()].SetLeaf(0.0f, 0);
} else { } else {
(*p_tree)[nid].set_leaf(p_tree->stat(nid).base_weight * param.learning_rate); (*p_tree)[nid].SetLeaf(p_tree->Stat(nid).base_weight * param_.learning_rate);
} }
} }
} }
// set statistics on ptree // set statistics on ptree
inline void SetStats(int nid, const SKStats &node_sum, RegTree *p_tree) { inline void SetStats(int nid, const SKStats &node_sum, RegTree *p_tree) {
p_tree->stat(nid).base_weight = static_cast<bst_float>(node_sum.CalcWeight(param)); p_tree->Stat(nid).base_weight = static_cast<bst_float>(node_sum.CalcWeight(param_));
p_tree->stat(nid).sum_hess = static_cast<bst_float>(node_sum.sum_hess); p_tree->Stat(nid).sum_hess = static_cast<bst_float>(node_sum.sum_hess);
node_sum.SetLeafVec(param, p_tree->leafvec(nid)); node_sum.SetLeafVec(param_, p_tree->Leafvec(nid));
} }
inline void EnumerateSplit(const WXQSketch::Summary &pos_grad, inline void EnumerateSplit(const WXQSketch::Summary &pos_grad,
const WXQSketch::Summary &neg_grad, const WXQSketch::Summary &neg_grad,
@ -321,7 +315,7 @@ class SketchMaker: public BaseMaker {
bst_uint fid, bst_uint fid,
SplitEntry *best) { SplitEntry *best) {
if (sum_hess.size == 0) return; if (sum_hess.size == 0) return;
double root_gain = node_sum.CalcGain(param); double root_gain = node_sum.CalcGain(param_);
std::vector<bst_float> fsplits; std::vector<bst_float> fsplits;
for (size_t i = 0; i < pos_grad.size; ++i) { for (size_t i = 0; i < pos_grad.size; ++i) {
fsplits.push_back(pos_grad.data[i].value); fsplits.push_back(pos_grad.data[i].value);
@ -350,17 +344,17 @@ class SketchMaker: public BaseMaker {
s.sum_hess = 0.5f * (hess.rmin + hess.rmax - hess.wmin); s.sum_hess = 0.5f * (hess.rmin + hess.rmax - hess.wmin);
c.SetSubstract(node_sum, s); c.SetSubstract(node_sum, s);
// forward // forward
if (s.sum_hess >= param.min_child_weight && if (s.sum_hess >= param_.min_child_weight &&
c.sum_hess >= param.min_child_weight) { c.sum_hess >= param_.min_child_weight) {
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; double loss_chg = s.CalcGain(param_) + c.CalcGain(param_) - root_gain;
best->Update(static_cast<bst_float>(loss_chg), fid, fsplits[i], false); best->Update(static_cast<bst_float>(loss_chg), fid, fsplits[i], false);
} }
// backward // backward
c.SetSubstract(feat_sum, s); c.SetSubstract(feat_sum, s);
s.SetSubstract(node_sum, c); s.SetSubstract(node_sum, c);
if (s.sum_hess >= param.min_child_weight && if (s.sum_hess >= param_.min_child_weight &&
c.sum_hess >= param.min_child_weight) { c.sum_hess >= param_.min_child_weight) {
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; double loss_chg = s.CalcGain(param_) + c.CalcGain(param_) - root_gain;
best->Update(static_cast<bst_float>(loss_chg), fid, fsplits[i], true); best->Update(static_cast<bst_float>(loss_chg), fid, fsplits[i], true);
} }
} }
@ -368,10 +362,10 @@ class SketchMaker: public BaseMaker {
// all including // all including
SKStats s = feat_sum, c; SKStats s = feat_sum, c;
c.SetSubstract(node_sum, s); c.SetSubstract(node_sum, s);
if (s.sum_hess >= param.min_child_weight && if (s.sum_hess >= param_.min_child_weight &&
c.sum_hess >= param.min_child_weight) { c.sum_hess >= param_.min_child_weight) {
bst_float cpt = fsplits.back(); bst_float cpt = fsplits.back();
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; double loss_chg = s.CalcGain(param_) + c.CalcGain(param_) - root_gain;
best->Update(static_cast<bst_float>(loss_chg), best->Update(static_cast<bst_float>(loss_chg),
fid, cpt + std::abs(cpt) + 1.0f, false); fid, cpt + std::abs(cpt) + 1.0f, false);
} }
@ -380,19 +374,19 @@ class SketchMaker: public BaseMaker {
// thread temp data // thread temp data
// used to hold temporal sketch // used to hold temporal sketch
std::vector<std::vector<SketchEntry> > thread_sketch; std::vector<std::vector<SketchEntry> > thread_sketch_;
// used to hold statistics // used to hold statistics
std::vector<std::vector<SKStats> > thread_stats; std::vector<std::vector<SKStats> > thread_stats_;
// node statistics // node statistics
std::vector<SKStats> node_stats; std::vector<SKStats> node_stats_;
// summary array // summary array
std::vector<WXQSketch::SummaryContainer> summary_array; std::vector<WXQSketch::SummaryContainer> summary_array_;
// reducer for summary // reducer for summary
rabit::Reducer<SKStats, SKStats::Reduce> stats_reducer; rabit::Reducer<SKStats, SKStats::Reduce> stats_reducer_;
// reducer for summary // reducer for summary
rabit::SerializeReducer<WXQSketch::SummaryContainer> sketch_reducer; rabit::SerializeReducer<WXQSketch::SummaryContainer> sketch_reducer_;
// per node, per feature sketch // per node, per feature sketch
std::vector<common::WXQuantileSketch<bst_float, bst_float> > sketchs; std::vector<common::WXQuantileSketch<bst_float, bst_float> > sketchs_;
}; };
XGBOOST_REGISTER_TREE_UPDATER(SketchMaker, "grow_skmaker") XGBOOST_REGISTER_TREE_UPDATER(SketchMaker, "grow_skmaker")

View File

@ -23,7 +23,7 @@ class TreeSyncher: public TreeUpdater {
public: public:
void Init(const std::vector<std::pair<std::string, std::string> >& args) override {} void Init(const std::vector<std::pair<std::string, std::string> >& args) override {}
void Update(HostDeviceVector<bst_gpair> *gpair, void Update(HostDeviceVector<GradientPair> *gpair,
DMatrix* dmat, DMatrix* dmat,
const std::vector<RegTree*> &trees) override { const std::vector<RegTree*> &trees) override {
if (rabit::GetWorldSize() == 1) return; if (rabit::GetWorldSize() == 1) return;
@ -31,14 +31,14 @@ class TreeSyncher: public TreeUpdater {
common::MemoryBufferStream fs(&s_model); common::MemoryBufferStream fs(&s_model);
int rank = rabit::GetRank(); int rank = rabit::GetRank();
if (rank == 0) { if (rank == 0) {
for (size_t i = 0; i < trees.size(); ++i) { for (auto tree : trees) {
trees[i]->Save(&fs); tree->Save(&fs);
} }
} }
fs.Seek(0); fs.Seek(0);
rabit::Broadcast(&s_model, 0); rabit::Broadcast(&s_model, 0);
for (size_t i = 0; i < trees.size(); ++i) { for (auto tree : trees) {
trees[i]->Load(&fs); tree->Load(&fs);
} }
} }
}; };

View File

@ -20,10 +20,10 @@ TEST(c_api, XGDMatrixCreateFromMat_omp) {
std::shared_ptr<xgboost::DMatrix> dmat = std::shared_ptr<xgboost::DMatrix> dmat =
*static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle); *static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
xgboost::MetaInfo &info = dmat->info(); xgboost::MetaInfo &info = dmat->Info();
ASSERT_EQ(info.num_col, num_cols); ASSERT_EQ(info.num_col_, num_cols);
ASSERT_EQ(info.num_row, row); ASSERT_EQ(info.num_row_, row);
ASSERT_EQ(info.num_nonzero, num_cols * row - num_missing); ASSERT_EQ(info.num_nonzero_, num_cols * row - num_missing);
auto iter = dmat->RowIterator(); auto iter = dmat->RowIterator();
iter->BeforeFirst(); iter->BeforeFirst();

View File

@ -38,7 +38,7 @@ void SpeedTest() {
xgboost::common::Timer t; xgboost::common::Timer t;
dh::TransformLbs( dh::TransformLbs(
0, &temp_memory, h_rows.size(), dh::raw(row_ptr), row_ptr.size() - 1, 0, &temp_memory, h_rows.size(), dh::Raw(row_ptr), row_ptr.size() - 1,
false, false,
[=] __device__(size_t idx, size_t ridx) { d_output_row[idx] = ridx; }); [=] __device__(size_t idx, size_t ridx) { d_output_row[idx] = ridx; });
@ -66,7 +66,7 @@ void TestLbs() {
thrust::device_vector<int> output_row(h_rows.size()); thrust::device_vector<int> output_row(h_rows.size());
auto d_output_row = output_row.data(); auto d_output_row = output_row.data();
dh::TransformLbs(0, &temp_memory, h_rows.size(), dh::raw(row_ptr), dh::TransformLbs(0, &temp_memory, h_rows.size(), dh::Raw(row_ptr),
row_ptr.size() - 1, false, row_ptr.size() - 1, false,
[=] __device__(size_t idx, size_t ridx) { [=] __device__(size_t idx, size_t ridx) {
d_output_row[idx] = ridx; d_output_row[idx] = ridx;
@ -83,6 +83,6 @@ TEST(cub_lbs, Test) { TestLbs(); }
TEST(sumReduce, Test) { TEST(sumReduce, Test) {
thrust::device_vector<float> data(100, 1.0f); thrust::device_vector<float> data(100, 1.0f);
dh::CubMemory temp; dh::CubMemory temp;
auto sum = dh::sumReduction(temp, dh::raw(data), data.size()); auto sum = dh::SumReduction(temp, dh::Raw(data), data.size());
ASSERT_NEAR(sum, 100.0f, 1e-5); ASSERT_NEAR(sum, 100.0f, 1e-5);
} }

View File

@ -12,9 +12,9 @@ TEST(MetaInfo, GetSet) {
info.SetInfo("root_index", double2, xgboost::kDouble, 2); info.SetInfo("root_index", double2, xgboost::kDouble, 2);
EXPECT_EQ(info.GetRoot(1), 2.0f); EXPECT_EQ(info.GetRoot(1), 2.0f);
EXPECT_EQ(info.labels.size(), 0); EXPECT_EQ(info.labels_.size(), 0);
info.SetInfo("label", double2, xgboost::kFloat32, 2); info.SetInfo("label", double2, xgboost::kFloat32, 2);
EXPECT_EQ(info.labels.size(), 2); EXPECT_EQ(info.labels_.size(), 2);
float float2[2] = {1.0f, 2.0f}; float float2[2] = {1.0f, 2.0f};
EXPECT_EQ(info.GetWeight(1), 1.0f) EXPECT_EQ(info.GetWeight(1), 1.0f)
@ -23,26 +23,26 @@ TEST(MetaInfo, GetSet) {
EXPECT_EQ(info.GetWeight(1), 2.0f); EXPECT_EQ(info.GetWeight(1), 2.0f);
uint32_t uint32_t2[2] = {1U, 2U}; uint32_t uint32_t2[2] = {1U, 2U};
EXPECT_EQ(info.base_margin.size(), 0); EXPECT_EQ(info.base_margin_.size(), 0);
info.SetInfo("base_margin", uint32_t2, xgboost::kUInt32, 2); info.SetInfo("base_margin", uint32_t2, xgboost::kUInt32, 2);
EXPECT_EQ(info.base_margin.size(), 2); EXPECT_EQ(info.base_margin_.size(), 2);
uint64_t uint64_t2[2] = {1U, 2U}; uint64_t uint64_t2[2] = {1U, 2U};
EXPECT_EQ(info.group_ptr.size(), 0); EXPECT_EQ(info.group_ptr_.size(), 0);
info.SetInfo("group", uint64_t2, xgboost::kUInt64, 2); info.SetInfo("group", uint64_t2, xgboost::kUInt64, 2);
ASSERT_EQ(info.group_ptr.size(), 3); ASSERT_EQ(info.group_ptr_.size(), 3);
EXPECT_EQ(info.group_ptr[2], 3); EXPECT_EQ(info.group_ptr_[2], 3);
info.Clear(); info.Clear();
ASSERT_EQ(info.group_ptr.size(), 0); ASSERT_EQ(info.group_ptr_.size(), 0);
} }
TEST(MetaInfo, SaveLoadBinary) { TEST(MetaInfo, SaveLoadBinary) {
xgboost::MetaInfo info; xgboost::MetaInfo info;
double vals[2] = {1.0, 2.0}; double vals[2] = {1.0, 2.0};
info.SetInfo("label", vals, xgboost::kDouble, 2); info.SetInfo("label", vals, xgboost::kDouble, 2);
info.num_row = 2; info.num_row_ = 2;
info.num_col = 1; info.num_col_ = 1;
std::string tmp_file = TempFileName(); std::string tmp_file = TempFileName();
dmlc::Stream * fs = dmlc::Stream::Create(tmp_file.c_str(), "w"); dmlc::Stream * fs = dmlc::Stream::Create(tmp_file.c_str(), "w");
@ -55,9 +55,9 @@ TEST(MetaInfo, SaveLoadBinary) {
fs = dmlc::Stream::Create(tmp_file.c_str(), "r"); fs = dmlc::Stream::Create(tmp_file.c_str(), "r");
xgboost::MetaInfo inforead; xgboost::MetaInfo inforead;
inforead.LoadBinary(fs); inforead.LoadBinary(fs);
EXPECT_EQ(inforead.labels, info.labels); EXPECT_EQ(inforead.labels_, info.labels_);
EXPECT_EQ(inforead.num_col, info.num_col); EXPECT_EQ(inforead.num_col_, info.num_col_);
EXPECT_EQ(inforead.num_row, info.num_row); EXPECT_EQ(inforead.num_row_, info.num_row_);
std::remove(tmp_file.c_str()); std::remove(tmp_file.c_str());
} }

View File

@ -14,9 +14,9 @@ TEST(SimpleCSRSource, SaveLoadBinary) {
xgboost::DMatrix * dmat_read = xgboost::DMatrix::Load(tmp_binfile, true, false); xgboost::DMatrix * dmat_read = xgboost::DMatrix::Load(tmp_binfile, true, false);
std::remove(tmp_binfile.c_str()); std::remove(tmp_binfile.c_str());
EXPECT_EQ(dmat->info().num_col, dmat_read->info().num_col); EXPECT_EQ(dmat->Info().num_col_, dmat_read->Info().num_col_);
EXPECT_EQ(dmat->info().num_row, dmat_read->info().num_row); EXPECT_EQ(dmat->Info().num_row_, dmat_read->Info().num_row_);
EXPECT_EQ(dmat->info().num_row, dmat_read->info().num_row); EXPECT_EQ(dmat->Info().num_row_, dmat_read->Info().num_row_);
dmlc::DataIter<xgboost::RowBatch> * row_iter = dmat->RowIterator(); dmlc::DataIter<xgboost::RowBatch> * row_iter = dmat->RowIterator();
dmlc::DataIter<xgboost::RowBatch> * row_iter_read = dmat_read->RowIterator(); dmlc::DataIter<xgboost::RowBatch> * row_iter_read = dmat_read->RowIterator();

View File

@ -10,10 +10,10 @@ TEST(SimpleDMatrix, MetaInfo) {
std::remove(tmp_file.c_str()); std::remove(tmp_file.c_str());
// Test the metadata that was parsed // Test the metadata that was parsed
EXPECT_EQ(dmat->info().num_row, 2); EXPECT_EQ(dmat->Info().num_row_, 2);
EXPECT_EQ(dmat->info().num_col, 5); EXPECT_EQ(dmat->Info().num_col_, 5);
EXPECT_EQ(dmat->info().num_nonzero, 6); EXPECT_EQ(dmat->Info().num_nonzero_, 6);
EXPECT_EQ(dmat->info().labels.size(), dmat->info().num_row); EXPECT_EQ(dmat->Info().labels_.size(), dmat->Info().num_row_);
} }
TEST(SimpleDMatrix, RowAccess) { TEST(SimpleDMatrix, RowAccess) {
@ -26,7 +26,7 @@ TEST(SimpleDMatrix, RowAccess) {
long row_count = 0; long row_count = 0;
row_iter->BeforeFirst(); row_iter->BeforeFirst();
while (row_iter->Next()) row_count += row_iter->Value().size; while (row_iter->Next()) row_count += row_iter->Value().size;
EXPECT_EQ(row_count, dmat->info().num_row); EXPECT_EQ(row_count, dmat->Info().num_row_);
// Test the data read into the first row // Test the data read into the first row
row_iter->BeforeFirst(); row_iter->BeforeFirst();
row_iter->Next(); row_iter->Next();
@ -43,15 +43,15 @@ TEST(SimpleDMatrix, ColAccessWithoutBatches) {
std::remove(tmp_file.c_str()); std::remove(tmp_file.c_str());
// Unsorted column access // Unsorted column access
const std::vector<bool> enable(dmat->info().num_col, true); const std::vector<bool> enable(dmat->Info().num_col_, true);
EXPECT_EQ(dmat->HaveColAccess(false), false); EXPECT_EQ(dmat->HaveColAccess(false), false);
dmat->InitColAccess(enable, 1, dmat->info().num_row, false); dmat->InitColAccess(enable, 1, dmat->Info().num_row_, false);
dmat->InitColAccess(enable, 0, 0, false); // Calling it again should not change it dmat->InitColAccess(enable, 0, 0, false); // Calling it again should not change it
ASSERT_EQ(dmat->HaveColAccess(false), true); ASSERT_EQ(dmat->HaveColAccess(false), true);
// Sorted column access // Sorted column access
EXPECT_EQ(dmat->HaveColAccess(true), false); EXPECT_EQ(dmat->HaveColAccess(true), false);
dmat->InitColAccess(enable, 1, dmat->info().num_row, true); dmat->InitColAccess(enable, 1, dmat->Info().num_row_, true);
dmat->InitColAccess(enable, 0, 0, true); // Calling it again should not change it dmat->InitColAccess(enable, 0, 0, true); // Calling it again should not change it
ASSERT_EQ(dmat->HaveColAccess(true), true); ASSERT_EQ(dmat->HaveColAccess(true), true);
@ -67,7 +67,7 @@ TEST(SimpleDMatrix, ColAccessWithoutBatches) {
col_iter->BeforeFirst(); col_iter->BeforeFirst();
while (col_iter->Next()) { while (col_iter->Next()) {
num_col_batch += 1; num_col_batch += 1;
EXPECT_EQ(col_iter->Value().size, dmat->info().num_col) EXPECT_EQ(col_iter->Value().size, dmat->Info().num_col_)
<< "Expected batch size = number of cells as #batches is 1."; << "Expected batch size = number of cells as #batches is 1.";
for (int i = 0; i < static_cast<int>(col_iter->Value().size); ++i) { for (int i = 0; i < static_cast<int>(col_iter->Value().size); ++i) {
EXPECT_EQ(col_iter->Value()[i].length, dmat->GetColSize(i)) EXPECT_EQ(col_iter->Value()[i].length, dmat->GetColSize(i))
@ -94,7 +94,7 @@ TEST(SimpleDMatrix, ColAccessWithBatches) {
std::remove(tmp_file.c_str()); std::remove(tmp_file.c_str());
// Unsorted column access // Unsorted column access
const std::vector<bool> enable(dmat->info().num_col, true); const std::vector<bool> enable(dmat->Info().num_col_, true);
EXPECT_EQ(dmat->HaveColAccess(false), false); EXPECT_EQ(dmat->HaveColAccess(false), false);
dmat->InitColAccess(enable, 1, 1, false); dmat->InitColAccess(enable, 1, 1, false);
dmat->InitColAccess(enable, 0, 0, false); // Calling it again should not change it dmat->InitColAccess(enable, 0, 0, false); // Calling it again should not change it
@ -118,20 +118,20 @@ TEST(SimpleDMatrix, ColAccessWithBatches) {
col_iter->BeforeFirst(); col_iter->BeforeFirst();
while (col_iter->Next()) { while (col_iter->Next()) {
num_col_batch += 1; num_col_batch += 1;
EXPECT_EQ(col_iter->Value().size, dmat->info().num_col) EXPECT_EQ(col_iter->Value().size, dmat->Info().num_col_)
<< "Expected batch size = num_cols as max_row_perbatch is 1."; << "Expected batch size = num_cols as max_row_perbatch is 1.";
for (int i = 0; i < static_cast<int>(col_iter->Value().size); ++i) { for (int i = 0; i < static_cast<int>(col_iter->Value().size); ++i) {
EXPECT_LE(col_iter->Value()[i].length, 1) EXPECT_LE(col_iter->Value()[i].length, 1)
<< "Expected length of each colbatch <=1 as max_row_perbatch is 1."; << "Expected length of each colbatch <=1 as max_row_perbatch is 1.";
} }
} }
EXPECT_EQ(num_col_batch, dmat->info().num_row) EXPECT_EQ(num_col_batch, dmat->Info().num_row_)
<< "Expected num batches = num_rows as max_row_perbatch is 1"; << "Expected num batches = num_rows as max_row_perbatch is 1";
col_iter = nullptr; col_iter = nullptr;
// The iterator feats should ignore any numbers larger than the num_col // The iterator feats should ignore any numbers larger than the num_col
std::vector<xgboost::bst_uint> sub_feats = { std::vector<xgboost::bst_uint> sub_feats = {
4, 3, static_cast<unsigned int>(dmat->info().num_col + 1)}; 4, 3, static_cast<unsigned int>(dmat->Info().num_col_ + 1)};
dmlc::DataIter<xgboost::ColBatch> * sub_col_iter = dmat->ColIterator(sub_feats); dmlc::DataIter<xgboost::ColBatch> * sub_col_iter = dmat->ColIterator(sub_feats);
// Loop over the batches and assert the data is as expected // Loop over the batches and assert the data is as expected
sub_col_iter->BeforeFirst(); sub_col_iter->BeforeFirst();

View File

@ -12,10 +12,10 @@ TEST(SparsePageDMatrix, MetaInfo) {
EXPECT_TRUE(FileExists(tmp_file + ".cache")); EXPECT_TRUE(FileExists(tmp_file + ".cache"));
// Test the metadata that was parsed // Test the metadata that was parsed
EXPECT_EQ(dmat->info().num_row, 2); EXPECT_EQ(dmat->Info().num_row_, 2);
EXPECT_EQ(dmat->info().num_col, 5); EXPECT_EQ(dmat->Info().num_col_, 5);
EXPECT_EQ(dmat->info().num_nonzero, 6); EXPECT_EQ(dmat->Info().num_nonzero_, 6);
EXPECT_EQ(dmat->info().labels.size(), dmat->info().num_row); EXPECT_EQ(dmat->Info().labels_.size(), dmat->Info().num_row_);
// Clean up of external memory files // Clean up of external memory files
std::remove((tmp_file + ".cache").c_str()); std::remove((tmp_file + ".cache").c_str());
@ -34,7 +34,7 @@ TEST(SparsePageDMatrix, RowAccess) {
long row_count = 0; long row_count = 0;
row_iter->BeforeFirst(); row_iter->BeforeFirst();
while (row_iter->Next()) row_count += row_iter->Value().size; while (row_iter->Next()) row_count += row_iter->Value().size;
EXPECT_EQ(row_count, dmat->info().num_row); EXPECT_EQ(row_count, dmat->Info().num_row_);
// Test the data read into the first row // Test the data read into the first row
row_iter->BeforeFirst(); row_iter->BeforeFirst();
row_iter->Next(); row_iter->Next();
@ -57,7 +57,7 @@ TEST(SparsePageDMatrix, ColAcess) {
EXPECT_FALSE(FileExists(tmp_file + ".cache.col.page")); EXPECT_FALSE(FileExists(tmp_file + ".cache.col.page"));
EXPECT_EQ(dmat->HaveColAccess(true), false); EXPECT_EQ(dmat->HaveColAccess(true), false);
const std::vector<bool> enable(dmat->info().num_col, true); const std::vector<bool> enable(dmat->Info().num_col_, true);
dmat->InitColAccess(enable, 1, 1, true); // Max 1 row per patch dmat->InitColAccess(enable, 1, 1, true); // Max 1 row per patch
ASSERT_EQ(dmat->HaveColAccess(true), true); ASSERT_EQ(dmat->HaveColAccess(true), true);
EXPECT_TRUE(FileExists(tmp_file + ".cache.col.page")); EXPECT_TRUE(FileExists(tmp_file + ".cache.col.page"));
@ -73,10 +73,10 @@ TEST(SparsePageDMatrix, ColAcess) {
col_iter->BeforeFirst(); col_iter->BeforeFirst();
while (col_iter->Next()) { while (col_iter->Next()) {
num_col_batch += 1; num_col_batch += 1;
EXPECT_EQ(col_iter->Value().size, dmat->info().num_col) EXPECT_EQ(col_iter->Value().size, dmat->Info().num_col_)
<< "Expected batch size to be same as num_cols as max_row_perbatch is 1."; << "Expected batch size to be same as num_cols as max_row_perbatch is 1.";
} }
EXPECT_EQ(num_col_batch, dmat->info().num_row) EXPECT_EQ(num_col_batch, dmat->Info().num_row_)
<< "Expected num batches to be same as num_rows as max_row_perbatch is 1"; << "Expected num batches to be same as num_rows as max_row_perbatch is 1";
col_iter = nullptr; col_iter = nullptr;

View File

@ -34,17 +34,17 @@ void CheckObjFunction(xgboost::ObjFunction * obj,
std::vector<xgboost::bst_float> out_grad, std::vector<xgboost::bst_float> out_grad,
std::vector<xgboost::bst_float> out_hess) { std::vector<xgboost::bst_float> out_hess) {
xgboost::MetaInfo info; xgboost::MetaInfo info;
info.num_row = labels.size(); info.num_row_ = labels.size();
info.labels = labels; info.labels_ = labels;
info.weights = weights; info.weights_ = weights;
xgboost::HostDeviceVector<xgboost::bst_float> in_preds(preds); xgboost::HostDeviceVector<xgboost::bst_float> in_preds(preds);
xgboost::HostDeviceVector<xgboost::bst_gpair> out_gpair; xgboost::HostDeviceVector<xgboost::GradientPair> out_gpair;
obj->GetGradient(&in_preds, info, 1, &out_gpair); obj->GetGradient(&in_preds, info, 1, &out_gpair);
std::vector<xgboost::bst_gpair>& gpair = out_gpair.data_h(); std::vector<xgboost::GradientPair>& gpair = out_gpair.HostVector();
ASSERT_EQ(gpair.size(), in_preds.size()); ASSERT_EQ(gpair.size(), in_preds.Size());
for (int i = 0; i < static_cast<int>(gpair.size()); ++i) { for (int i = 0; i < static_cast<int>(gpair.size()); ++i) {
EXPECT_NEAR(gpair[i].GetGrad(), out_grad[i], 0.01) EXPECT_NEAR(gpair[i].GetGrad(), out_grad[i], 0.01)
<< "Unexpected grad for pred=" << preds[i] << " label=" << labels[i] << "Unexpected grad for pred=" << preds[i] << " label=" << labels[i]
@ -60,9 +60,9 @@ xgboost::bst_float GetMetricEval(xgboost::Metric * metric,
std::vector<xgboost::bst_float> labels, std::vector<xgboost::bst_float> labels,
std::vector<xgboost::bst_float> weights) { std::vector<xgboost::bst_float> weights) {
xgboost::MetaInfo info; xgboost::MetaInfo info;
info.num_row = labels.size(); info.num_row_ = labels.size();
info.labels = labels; info.labels_ = labels;
info.weights = weights; info.weights_ = weights;
return metric->Eval(preds, info, false); return metric->Eval(preds, info, false);
} }

View File

@ -8,15 +8,15 @@ typedef std::pair<std::string, std::string> arg;
TEST(Linear, shotgun) { TEST(Linear, shotgun) {
typedef std::pair<std::string, std::string> arg; typedef std::pair<std::string, std::string> arg;
auto mat = CreateDMatrix(10, 10, 0); auto mat = CreateDMatrix(10, 10, 0);
std::vector<bool> enabled(mat->info().num_col, true); std::vector<bool> enabled(mat->Info().num_col_, true);
mat->InitColAccess(enabled, 1.0f, 1 << 16, false); mat->InitColAccess(enabled, 1.0f, 1 << 16, false);
auto updater = std::unique_ptr<xgboost::LinearUpdater>( auto updater = std::unique_ptr<xgboost::LinearUpdater>(
xgboost::LinearUpdater::Create("shotgun")); xgboost::LinearUpdater::Create("shotgun"));
updater->Init({{"eta", "1."}}); updater->Init({{"eta", "1."}});
std::vector<xgboost::bst_gpair> gpair(mat->info().num_row, std::vector<xgboost::GradientPair> gpair(mat->Info().num_row_,
xgboost::bst_gpair(-5, 1.0)); xgboost::GradientPair(-5, 1.0));
xgboost::gbm::GBLinearModel model; xgboost::gbm::GBLinearModel model;
model.param.num_feature = mat->info().num_col; model.param.num_feature = mat->Info().num_col_;
model.param.num_output_group = 1; model.param.num_output_group = 1;
model.LazyInitModel(); model.LazyInitModel();
updater->Update(&gpair, mat.get(), &model, gpair.size()); updater->Update(&gpair, mat.get(), &model, gpair.size());
@ -27,15 +27,15 @@ TEST(Linear, shotgun) {
TEST(Linear, coordinate) { TEST(Linear, coordinate) {
typedef std::pair<std::string, std::string> arg; typedef std::pair<std::string, std::string> arg;
auto mat = CreateDMatrix(10, 10, 0); auto mat = CreateDMatrix(10, 10, 0);
std::vector<bool> enabled(mat->info().num_col, true); std::vector<bool> enabled(mat->Info().num_col_, true);
mat->InitColAccess(enabled, 1.0f, 1 << 16, false); mat->InitColAccess(enabled, 1.0f, 1 << 16, false);
auto updater = std::unique_ptr<xgboost::LinearUpdater>( auto updater = std::unique_ptr<xgboost::LinearUpdater>(
xgboost::LinearUpdater::Create("coord_descent")); xgboost::LinearUpdater::Create("coord_descent"));
updater->Init({}); updater->Init({});
std::vector<xgboost::bst_gpair> gpair(mat->info().num_row, std::vector<xgboost::GradientPair> gpair(mat->Info().num_row_,
xgboost::bst_gpair(-5, 1.0)); xgboost::GradientPair(-5, 1.0));
xgboost::gbm::GBLinearModel model; xgboost::gbm::GBLinearModel model;
model.param.num_feature = mat->info().num_col; model.param.num_feature = mat->Info().num_col_;
model.param.num_output_group = 1; model.param.num_output_group = 1;
model.LazyInitModel(); model.LazyInitModel();
updater->Update(&gpair, mat.get(), &model, gpair.size()); updater->Update(&gpair, mat.get(), &model, gpair.size());

View File

@ -49,8 +49,8 @@ TEST(Objective, LogisticRegressionBasic) {
xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1}; xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
std::vector<xgboost::bst_float> out_preds = {0.5f, 0.524f, 0.622f, 0.710f, 0.731f}; std::vector<xgboost::bst_float> out_preds = {0.5f, 0.524f, 0.622f, 0.710f, 0.731f};
obj->PredTransform(&io_preds); obj->PredTransform(&io_preds);
auto& preds = io_preds.data_h(); auto& preds = io_preds.HostVector();
for (int i = 0; i < static_cast<int>(io_preds.size()); ++i) { for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
EXPECT_NEAR(preds[i], out_preds[i], 0.01f); EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
} }
} }
@ -98,8 +98,8 @@ TEST(Objective, PoissonRegressionBasic) {
xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1}; xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
std::vector<xgboost::bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f}; std::vector<xgboost::bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
obj->PredTransform(&io_preds); obj->PredTransform(&io_preds);
auto& preds = io_preds.data_h(); auto& preds = io_preds.HostVector();
for (int i = 0; i < static_cast<int>(io_preds.size()); ++i) { for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
EXPECT_NEAR(preds[i], out_preds[i], 0.01f); EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
} }
} }
@ -134,8 +134,8 @@ TEST(Objective, GammaRegressionBasic) {
xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1}; xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
std::vector<xgboost::bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f}; std::vector<xgboost::bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
obj->PredTransform(&io_preds); obj->PredTransform(&io_preds);
auto& preds = io_preds.data_h(); auto& preds = io_preds.HostVector();
for (int i = 0; i < static_cast<int>(io_preds.size()); ++i) { for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
EXPECT_NEAR(preds[i], out_preds[i], 0.01f); EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
} }
} }
@ -171,8 +171,8 @@ TEST(Objective, TweedieRegressionBasic) {
xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1}; xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
std::vector<xgboost::bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f}; std::vector<xgboost::bst_float> out_preds = {1, 1.10f, 1.64f, 2.45f, 2.71f};
obj->PredTransform(&io_preds); obj->PredTransform(&io_preds);
auto& preds = io_preds.data_h(); auto& preds = io_preds.HostVector();
for (int i = 0; i < static_cast<int>(io_preds.size()); ++i) { for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
EXPECT_NEAR(preds[i], out_preds[i], 0.01f); EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
} }
} }

View File

@ -51,8 +51,8 @@ TEST(Objective, GPULogisticRegressionBasic) {
xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1}; xgboost::HostDeviceVector<xgboost::bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
std::vector<xgboost::bst_float> out_preds = {0.5f, 0.524f, 0.622f, 0.710f, 0.731f}; std::vector<xgboost::bst_float> out_preds = {0.5f, 0.524f, 0.622f, 0.710f, 0.731f};
obj->PredTransform(&io_preds); obj->PredTransform(&io_preds);
auto& preds = io_preds.data_h(); auto& preds = io_preds.HostVector();
for (int i = 0; i < static_cast<int>(io_preds.size()); ++i) { for (int i = 0; i < static_cast<int>(io_preds.Size()); ++i) {
EXPECT_NEAR(preds[i], out_preds[i], 0.01f); EXPECT_NEAR(preds[i], out_preds[i], 0.01f);
} }
} }

View File

@ -11,8 +11,8 @@ TEST(cpu_predictor, Test) {
std::vector<std::unique_ptr<RegTree>> trees; std::vector<std::unique_ptr<RegTree>> trees;
trees.push_back(std::unique_ptr<RegTree>(new RegTree)); trees.push_back(std::unique_ptr<RegTree>(new RegTree));
trees.back()->InitModel(); trees.back()->InitModel();
(*trees.back())[0].set_leaf(1.5f); (*trees.back())[0].SetLeaf(1.5f);
(*trees.back()).stat(0).sum_hess = 1.0f; (*trees.back()).Stat(0).sum_hess = 1.0f;
gbm::GBTreeModel model(0.5); gbm::GBTreeModel model(0.5);
model.CommitModel(std::move(trees), 0); model.CommitModel(std::move(trees), 0);
model.param.num_output_group = 1; model.param.num_output_group = 1;
@ -26,8 +26,8 @@ TEST(cpu_predictor, Test) {
// Test predict batch // Test predict batch
HostDeviceVector<float> out_predictions; HostDeviceVector<float> out_predictions;
cpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0); cpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
std::vector<float>& out_predictions_h = out_predictions.data_h(); std::vector<float>& out_predictions_h = out_predictions.HostVector();
for (int i = 0; i < out_predictions.size(); i++) { for (int i = 0; i < out_predictions.Size(); i++) {
ASSERT_EQ(out_predictions_h[i], 1.5); ASSERT_EQ(out_predictions_h[i], 1.5);
} }

View File

@ -21,8 +21,8 @@ TEST(gpu_predictor, Test) {
std::vector<std::unique_ptr<RegTree>> trees; std::vector<std::unique_ptr<RegTree>> trees;
trees.push_back(std::unique_ptr<RegTree>(new RegTree())); trees.push_back(std::unique_ptr<RegTree>(new RegTree()));
trees.back()->InitModel(); trees.back()->InitModel();
(*trees.back())[0].set_leaf(1.5f); (*trees.back())[0].SetLeaf(1.5f);
(*trees.back()).stat(0).sum_hess = 1.0f; (*trees.back()).Stat(0).sum_hess = 1.0f;
gbm::GBTreeModel model(0.5); gbm::GBTreeModel model(0.5);
model.CommitModel(std::move(trees), 0); model.CommitModel(std::move(trees), 0);
model.param.num_output_group = 1; model.param.num_output_group = 1;
@ -37,10 +37,10 @@ TEST(gpu_predictor, Test) {
HostDeviceVector<float> cpu_out_predictions; HostDeviceVector<float> cpu_out_predictions;
gpu_predictor->PredictBatch(dmat.get(), &gpu_out_predictions, model, 0); gpu_predictor->PredictBatch(dmat.get(), &gpu_out_predictions, model, 0);
cpu_predictor->PredictBatch(dmat.get(), &cpu_out_predictions, model, 0); cpu_predictor->PredictBatch(dmat.get(), &cpu_out_predictions, model, 0);
std::vector<float>& gpu_out_predictions_h = gpu_out_predictions.data_h(); std::vector<float>& gpu_out_predictions_h = gpu_out_predictions.HostVector();
std::vector<float>& cpu_out_predictions_h = cpu_out_predictions.data_h(); std::vector<float>& cpu_out_predictions_h = cpu_out_predictions.HostVector();
float abs_tolerance = 0.001; float abs_tolerance = 0.001;
for (int i = 0; i < gpu_out_predictions.size(); i++) { for (int i = 0; i < gpu_out_predictions.Size(); i++) {
ASSERT_LT(std::abs(gpu_out_predictions_h[i] - cpu_out_predictions_h[i]), ASSERT_LT(std::abs(gpu_out_predictions_h[i] - cpu_out_predictions_h[i]),
abs_tolerance); abs_tolerance);
} }

View File

@ -29,7 +29,7 @@ TEST(gpu_hist_experimental, TestSparseShard) {
ASSERT_LT(shard.row_stride, columns); ASSERT_LT(shard.row_stride, columns);
auto host_gidx_buffer = shard.gidx_buffer.as_vector(); auto host_gidx_buffer = shard.gidx_buffer.AsVector();
common::CompressedIterator<uint32_t> gidx(host_gidx_buffer.data(), common::CompressedIterator<uint32_t> gidx(host_gidx_buffer.data(),
hmat.row_ptr.back() + 1); hmat.row_ptr.back() + 1);
@ -64,7 +64,7 @@ TEST(gpu_hist_experimental, TestDenseShard) {
ASSERT_EQ(shard.row_stride, columns); ASSERT_EQ(shard.row_stride, columns);
auto host_gidx_buffer = shard.gidx_buffer.as_vector(); auto host_gidx_buffer = shard.gidx_buffer.AsVector();
common::CompressedIterator<uint32_t> gidx(host_gidx_buffer.data(), common::CompressedIterator<uint32_t> gidx(host_gidx_buffer.data(),
hmat.row_ptr.back() + 1); hmat.row_ptr.back() + 1);

View File

@ -89,8 +89,8 @@ TEST(Param, SplitEntry) {
xgboost::tree::SplitEntry se3; xgboost::tree::SplitEntry se3;
se3.Update(2, 101, 0, false); se3.Update(2, 101, 0, false);
xgboost::tree::SplitEntry::Reduce(se2, se3); xgboost::tree::SplitEntry::Reduce(se2, se3);
EXPECT_EQ(se2.split_index(), 101); EXPECT_EQ(se2.SplitIndex(), 101);
EXPECT_FALSE(se2.default_left()); EXPECT_FALSE(se2.DefaultLeft());
EXPECT_TRUE(se1.NeedReplace(3, 1)); EXPECT_TRUE(se1.NeedReplace(3, 1));
} }

View File

@ -10,6 +10,21 @@ if [ ${TASK} == "lint" ]; then
echo "----------------------------" echo "----------------------------"
(cat logclean.txt|grep warning) && exit -1 (cat logclean.txt|grep warning) && exit -1
(cat logclean.txt|grep error) && exit -1 (cat logclean.txt|grep error) && exit -1
# Rename cuda files for static analysis
for file in $(find src -name '*.cu'); do
cp "$file" "${file/.cu/_tmp.cc}"
done
header_filter='(xgboost\/src|xgboost\/include)'
for filename in $(find src -name '*.cc'); do
clang-tidy $filename -header-filter=$header_filter -- -Iinclude -Idmlc-core/include -Irabit/include -std=c++11 >> logtidy.txt
done
echo "---------clang-tidy log----------"
cat logtidy.txt
echo "----------------------------"
# Fail only on warnings related to XGBoost source files
(cat logtidy.txt|grep -E 'dmlc/xgboost.*warning'|grep -v dmlc-core) && exit -1
exit 0 exit 0
fi fi