Clang-tidy static analysis (#3222)

* Clang-tidy static analysis

* Modernise checks

* Google coding standard checks

* Identifier renaming according to Google style
This commit is contained in:
Rory Mitchell
2018-04-19 18:57:13 +12:00
committed by GitHub
parent 3242b0a378
commit ccf80703ef
97 changed files with 3407 additions and 3354 deletions

View File

@@ -81,20 +81,19 @@ namespace xgboost {
* \brief unsigned integer type used in boost,
* used for feature index and row index.
*/
typedef uint32_t bst_uint;
typedef int32_t bst_int;
using bst_uint = uint32_t; // NOLINT
using bst_int = int32_t; // NOLINT
/*! \brief long integers */
typedef uint64_t bst_ulong; // NOLINT(*)
/*! \brief float type, used for storing statistics */
typedef float bst_float;
using bst_float = float; // NOLINT
namespace detail {
/*! \brief Implementation of gradient statistics pair. Template specialisation
* may be used to overload different gradients types e.g. low precision, high
* precision, integer, floating point. */
template <typename T>
class bst_gpair_internal {
class GradientPairInternal {
/*! \brief gradient statistics */
T grad_;
/*! \brief second order gradient statistics */
@@ -104,23 +103,23 @@ class bst_gpair_internal {
XGBOOST_DEVICE void SetHess(float h) { hess_ = h; }
public:
typedef T value_t;
using ValueT = T;
XGBOOST_DEVICE bst_gpair_internal() : grad_(0), hess_(0) {}
XGBOOST_DEVICE GradientPairInternal() : grad_(0), hess_(0) {}
XGBOOST_DEVICE bst_gpair_internal(float grad, float hess) {
XGBOOST_DEVICE GradientPairInternal(float grad, float hess) {
SetGrad(grad);
SetHess(hess);
}
// Copy constructor if of same value type
XGBOOST_DEVICE bst_gpair_internal(const bst_gpair_internal<T> &g)
: grad_(g.grad_), hess_(g.hess_) {}
XGBOOST_DEVICE GradientPairInternal(const GradientPairInternal<T> &g)
: grad_(g.grad_), hess_(g.hess_) {} // NOLINT
// Copy constructor if different value type - use getters and setters to
// perform conversion
template <typename T2>
XGBOOST_DEVICE bst_gpair_internal(const bst_gpair_internal<T2> &g) {
XGBOOST_DEVICE explicit GradientPairInternal(const GradientPairInternal<T2> &g) {
SetGrad(g.GetGrad());
SetHess(g.GetHess());
}
@@ -128,85 +127,85 @@ class bst_gpair_internal {
XGBOOST_DEVICE float GetGrad() const { return grad_; }
XGBOOST_DEVICE float GetHess() const { return hess_; }
XGBOOST_DEVICE bst_gpair_internal<T> &operator+=(
const bst_gpair_internal<T> &rhs) {
XGBOOST_DEVICE GradientPairInternal<T> &operator+=(
const GradientPairInternal<T> &rhs) {
grad_ += rhs.grad_;
hess_ += rhs.hess_;
return *this;
}
XGBOOST_DEVICE bst_gpair_internal<T> operator+(
const bst_gpair_internal<T> &rhs) const {
bst_gpair_internal<T> g;
XGBOOST_DEVICE GradientPairInternal<T> operator+(
const GradientPairInternal<T> &rhs) const {
GradientPairInternal<T> g;
g.grad_ = grad_ + rhs.grad_;
g.hess_ = hess_ + rhs.hess_;
return g;
}
XGBOOST_DEVICE bst_gpair_internal<T> &operator-=(
const bst_gpair_internal<T> &rhs) {
XGBOOST_DEVICE GradientPairInternal<T> &operator-=(
const GradientPairInternal<T> &rhs) {
grad_ -= rhs.grad_;
hess_ -= rhs.hess_;
return *this;
}
XGBOOST_DEVICE bst_gpair_internal<T> operator-(
const bst_gpair_internal<T> &rhs) const {
bst_gpair_internal<T> g;
XGBOOST_DEVICE GradientPairInternal<T> operator-(
const GradientPairInternal<T> &rhs) const {
GradientPairInternal<T> g;
g.grad_ = grad_ - rhs.grad_;
g.hess_ = hess_ - rhs.hess_;
return g;
}
XGBOOST_DEVICE bst_gpair_internal(int value) {
*this = bst_gpair_internal<T>(static_cast<float>(value),
XGBOOST_DEVICE explicit GradientPairInternal(int value) {
*this = GradientPairInternal<T>(static_cast<float>(value),
static_cast<float>(value));
}
friend std::ostream &operator<<(std::ostream &os,
const bst_gpair_internal<T> &g) {
const GradientPairInternal<T> &g) {
os << g.GetGrad() << "/" << g.GetHess();
return os;
}
};
template<>
inline XGBOOST_DEVICE float bst_gpair_internal<int64_t>::GetGrad() const {
inline XGBOOST_DEVICE float GradientPairInternal<int64_t>::GetGrad() const {
return grad_ * 1e-4f;
}
template<>
inline XGBOOST_DEVICE float bst_gpair_internal<int64_t>::GetHess() const {
inline XGBOOST_DEVICE float GradientPairInternal<int64_t>::GetHess() const {
return hess_ * 1e-4f;
}
template<>
inline XGBOOST_DEVICE void bst_gpair_internal<int64_t>::SetGrad(float g) {
inline XGBOOST_DEVICE void GradientPairInternal<int64_t>::SetGrad(float g) {
grad_ = static_cast<int64_t>(std::round(g * 1e4));
}
template<>
inline XGBOOST_DEVICE void bst_gpair_internal<int64_t>::SetHess(float h) {
inline XGBOOST_DEVICE void GradientPairInternal<int64_t>::SetHess(float h) {
hess_ = static_cast<int64_t>(std::round(h * 1e4));
}
} // namespace detail
/*! \brief gradient statistics pair usually needed in gradient boosting */
typedef detail::bst_gpair_internal<float> bst_gpair;
using GradientPair = detail::GradientPairInternal<float>;
/*! \brief High precision gradient statistics pair */
typedef detail::bst_gpair_internal<double> bst_gpair_precise;
using GradientPairPrecise = detail::GradientPairInternal<double>;
/*! \brief High precision gradient statistics pair with integer backed
* storage. Operators are associative where floating point versions are not
* associative. */
typedef detail::bst_gpair_internal<int64_t> bst_gpair_integer;
using GradientPairInteger = detail::GradientPairInternal<int64_t>;
/*! \brief small eps gap for minimum split decision. */
const bst_float rt_eps = 1e-6f;
const bst_float kRtEps = 1e-6f;
/*! \brief define unsigned long for openmp loop */
typedef dmlc::omp_ulong omp_ulong;
using omp_ulong = dmlc::omp_ulong; // NOLINT
/*! \brief define unsigned int for openmp loop */
typedef dmlc::omp_uint bst_omp_uint;
using bst_omp_uint = dmlc::omp_uint; // NOLINT
/*!
* \brief define compatible keywords in g++

View File

@@ -30,16 +30,16 @@ typedef uint64_t bst_ulong; // NOLINT(*)
/*! \brief handle to DMatrix */
typedef void *DMatrixHandle;
typedef void *DMatrixHandle; // NOLINT(*)
/*! \brief handle to Booster */
typedef void *BoosterHandle;
typedef void *BoosterHandle; // NOLINT(*)
/*! \brief handle to a data iterator */
typedef void *DataIterHandle;
typedef void *DataIterHandle; // NOLINT(*)
/*! \brief handle to a internal data holder. */
typedef void *DataHolderHandle;
typedef void *DataHolderHandle; // NOLINT(*)
/*! \brief Mini batch used in XGBoost Data Iteration */
typedef struct {
typedef struct { // NOLINT(*)
/*! \brief number of rows in the minibatch */
size_t size;
/*! \brief row pointer to the rows in the data */
@@ -66,7 +66,7 @@ typedef struct {
* \param handle The handle to the callback.
* \param batch The data content to be set.
*/
XGB_EXTERN_C typedef int XGBCallbackSetData(
XGB_EXTERN_C typedef int XGBCallbackSetData( // NOLINT(*)
DataHolderHandle handle, XGBoostBatchCSR batch);
/*!
@@ -80,9 +80,8 @@ XGB_EXTERN_C typedef int XGBCallbackSetData(
* \param set_function_handle The handle to be passed to set function.
* \return 0 if we are reaching the end and batch is not returned.
*/
XGB_EXTERN_C typedef int XGBCallbackDataIterNext(
DataIterHandle data_handle,
XGBCallbackSetData* set_function,
XGB_EXTERN_C typedef int XGBCallbackDataIterNext( // NOLINT(*)
DataIterHandle data_handle, XGBCallbackSetData *set_function,
DataHolderHandle set_function_handle);
/*!
@@ -216,11 +215,9 @@ XGB_DLL int XGDMatrixCreateFromMat(const float *data,
* \param nthread number of threads (up to maximum cores available, if <=0 use all cores)
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixCreateFromMat_omp(const float *data,
bst_ulong nrow,
bst_ulong ncol,
float missing,
DMatrixHandle *out,
XGB_DLL int XGDMatrixCreateFromMat_omp(const float *data, // NOLINT
bst_ulong nrow, bst_ulong ncol,
float missing, DMatrixHandle *out,
int nthread);
/*!
* \brief create a new dmatrix from sliced content of existing matrix

View File

@@ -30,44 +30,45 @@ enum DataType {
/*!
* \brief Meta information about dataset, always sit in memory.
*/
struct MetaInfo {
class MetaInfo {
public:
/*! \brief number of rows in the data */
uint64_t num_row;
uint64_t num_row_{0};
/*! \brief number of columns in the data */
uint64_t num_col;
uint64_t num_col_{0};
/*! \brief number of nonzero entries in the data */
uint64_t num_nonzero;
uint64_t num_nonzero_{0};
/*! \brief label of each instance */
std::vector<bst_float> labels;
std::vector<bst_float> labels_;
/*!
* \brief specified root index of each instance,
* can be used for multi task setting
*/
std::vector<bst_uint> root_index;
std::vector<bst_uint> root_index_;
/*!
* \brief the index of begin and end of a group
* needed when the learning task is ranking.
*/
std::vector<bst_uint> group_ptr;
std::vector<bst_uint> group_ptr_;
/*! \brief weights of each instance, optional */
std::vector<bst_float> weights;
std::vector<bst_float> weights_;
/*!
* \brief initialized margins,
* if specified, xgboost will start from this init margin
* can be used to specify initial prediction to boost from.
*/
std::vector<bst_float> base_margin;
std::vector<bst_float> base_margin_;
/*! \brief version flag, used to check version of this info */
static const int kVersion = 1;
/*! \brief default constructor */
MetaInfo() : num_row(0), num_col(0), num_nonzero(0) {}
MetaInfo() = default;
/*!
* \brief Get weight of each instances.
* \param i Instance index.
* \return The weight.
*/
inline bst_float GetWeight(size_t i) const {
return weights.size() != 0 ? weights[i] : 1.0f;
return weights_.size() != 0 ? weights_[i] : 1.0f;
}
/*!
* \brief Get the root index of i-th instance.
@@ -75,20 +76,20 @@ struct MetaInfo {
* \return The pre-defined root index of i-th instance.
*/
inline unsigned GetRoot(size_t i) const {
return root_index.size() != 0 ? root_index[i] : 0U;
return root_index_.size() != 0 ? root_index_[i] : 0U;
}
/*! \brief get sorted indexes (argsort) of labels by absolute value (used by cox loss) */
inline const std::vector<size_t>& LabelAbsSort() const {
if (label_order_cache.size() == labels.size()) {
return label_order_cache;
if (label_order_cache_.size() == labels_.size()) {
return label_order_cache_;
}
label_order_cache.resize(labels.size());
std::iota(label_order_cache.begin(), label_order_cache.end(), 0);
const auto l = labels;
XGBOOST_PARALLEL_SORT(label_order_cache.begin(), label_order_cache.end(),
label_order_cache_.resize(labels_.size());
std::iota(label_order_cache_.begin(), label_order_cache_.end(), 0);
const auto l = labels_;
XGBOOST_PARALLEL_SORT(label_order_cache_.begin(), label_order_cache_.end(),
[&l](size_t i1, size_t i2) {return std::abs(l[i1]) < std::abs(l[i2]);});
return label_order_cache;
return label_order_cache_;
}
/*! \brief clear all the information */
void Clear();
@@ -113,7 +114,7 @@ struct MetaInfo {
private:
/*! \brief argsort of labels */
mutable std::vector<size_t> label_order_cache;
mutable std::vector<size_t> label_order_cache_;
};
/*! \brief read-only sparse instance batch in CSR format */
@@ -125,7 +126,7 @@ struct SparseBatch {
/*! \brief feature value */
bst_float fvalue;
/*! \brief default constructor */
Entry() {}
Entry() = default;
/*!
* \brief constructor with index and value
* \param index The feature or row index.
@@ -141,11 +142,11 @@ struct SparseBatch {
/*! \brief an instance of sparse vector in the batch */
struct Inst {
/*! \brief pointer to the elements*/
const Entry *data;
const Entry *data{nullptr};
/*! \brief length of the instance */
bst_uint length;
bst_uint length{0};
/*! \brief constructor */
Inst() : data(0), length(0) {}
Inst() = default;
Inst(const Entry *data, bst_uint length) : data(data), length(length) {}
/*! \brief get i-th pair in the sparse vector*/
inline const Entry& operator[](size_t i) const {
@@ -167,7 +168,7 @@ struct RowBatch : public SparseBatch {
const Entry *data_ptr;
/*! \brief get i-th row from the batch */
inline Inst operator[](size_t i) const {
return Inst(data_ptr + ind_ptr[i], static_cast<bst_uint>(ind_ptr[i + 1] - ind_ptr[i]));
return {data_ptr + ind_ptr[i], static_cast<bst_uint>(ind_ptr[i + 1] - ind_ptr[i])};
}
};
@@ -206,16 +207,16 @@ class DataSource : public dmlc::DataIter<RowBatch> {
* \brief A vector-like structure to represent set of rows.
* But saves the memory when all rows are in the set (common case in xgb)
*/
struct RowSet {
class RowSet {
public:
/*! \return i-th row index */
inline bst_uint operator[](size_t i) const;
/*! \return the size of the set. */
inline size_t size() const;
inline size_t Size() const;
/*! \brief push the index back to the set */
inline void push_back(bst_uint i);
inline void PushBack(bst_uint i);
/*! \brief clear the set */
inline void clear();
inline void Clear();
/*!
* \brief save rowset to file.
* \param fo The file to be saved.
@@ -228,11 +229,11 @@ struct RowSet {
*/
inline bool Load(dmlc::Stream* fi);
/*! \brief constructor */
RowSet() : size_(0) {}
RowSet() = default;
private:
/*! \brief The internal data structure of size */
uint64_t size_;
uint64_t size_{0};
/*! \brief The internal data structure of row set if not all*/
std::vector<bst_uint> rows_;
};
@@ -250,11 +251,11 @@ struct RowSet {
class DMatrix {
public:
/*! \brief default constructor */
DMatrix() : cache_learner_ptr_(nullptr) {}
DMatrix() = default;
/*! \brief meta information of the dataset */
virtual MetaInfo& info() = 0;
virtual MetaInfo& Info() = 0;
/*! \brief meta information of the dataset */
virtual const MetaInfo& info() const = 0;
virtual const MetaInfo& Info() const = 0;
/*!
* \brief get the row iterator, reset to beginning position
* \note Only either RowIterator or column Iterator can be active.
@@ -291,9 +292,9 @@ class DMatrix {
/*! \brief get column density */
virtual float GetColDensity(size_t cidx) const = 0;
/*! \return reference of buffered rowset, in column access */
virtual const RowSet& buffered_rowset() const = 0;
virtual const RowSet& BufferedRowset() const = 0;
/*! \brief virtual destructor */
virtual ~DMatrix() {}
virtual ~DMatrix() = default;
/*!
* \brief Save DMatrix to local file.
* The saved file only works for non-sharded dataset(single machine training).
@@ -343,7 +344,7 @@ class DMatrix {
// allow learner class to access this field.
friend class LearnerImpl;
/*! \brief public field to back ref cached matrix. */
LearnerImpl* cache_learner_ptr_;
LearnerImpl* cache_learner_ptr_{nullptr};
};
// implementation of inline functions
@@ -351,15 +352,15 @@ inline bst_uint RowSet::operator[](size_t i) const {
return rows_.size() == 0 ? static_cast<bst_uint>(i) : rows_[i];
}
inline size_t RowSet::size() const {
inline size_t RowSet::Size() const {
return size_;
}
inline void RowSet::clear() {
inline void RowSet::Clear() {
rows_.clear(); size_ = 0;
}
inline void RowSet::push_back(bst_uint i) {
inline void RowSet::PushBack(bst_uint i) {
if (rows_.size() == 0) {
if (i == size_) {
++size_; return;

View File

@@ -45,7 +45,7 @@ class FeatureMap {
*/
inline void PushBack(int fid, const char *fname, const char *ftype) {
CHECK_EQ(fid, static_cast<int>(names_.size()));
names_.push_back(std::string(fname));
names_.emplace_back(fname);
types_.push_back(GetType(ftype));
}
/*! \brief clear the feature map */
@@ -54,11 +54,11 @@ class FeatureMap {
types_.clear();
}
/*! \return number of known features */
inline size_t size() const {
inline size_t Size() const {
return names_.size();
}
/*! \return name of specific feature */
inline const char* name(size_t idx) const {
inline const char* Name(size_t idx) const {
CHECK_LT(idx, names_.size()) << "FeatureMap feature index exceed bound";
return names_[idx].c_str();
}
@@ -75,7 +75,7 @@ class FeatureMap {
* \return The translated type.
*/
inline static Type GetType(const char* tname) {
using namespace std;
using std::strcmp;
if (!strcmp("i", tname)) return kIndicator;
if (!strcmp("q", tname)) return kQuantitive;
if (!strcmp("int", tname)) return kInteger;

View File

@@ -27,7 +27,7 @@ namespace xgboost {
class GradientBooster {
public:
/*! \brief virtual destructor */
virtual ~GradientBooster() {}
virtual ~GradientBooster() = default;
/*!
* \brief set configuration from pair iterators.
* \param begin The beginning iterator.
@@ -69,7 +69,7 @@ class GradientBooster {
* the booster may change content of gpair
*/
virtual void DoBoost(DMatrix* p_fmat,
HostDeviceVector<bst_gpair>* in_gpair,
HostDeviceVector<GradientPair>* in_gpair,
ObjFunction* obj = nullptr) = 0;
/*!

View File

@@ -37,7 +37,7 @@ namespace xgboost {
class Learner : public rabit::Serializable {
public:
/*! \brief virtual destructor */
virtual ~Learner() {}
~Learner() override = default;
/*!
* \brief set configuration from pair iterators.
* \param begin The beginning iterator.
@@ -62,12 +62,12 @@ class Learner : public rabit::Serializable {
* \brief load model from stream
* \param fi input stream.
*/
virtual void Load(dmlc::Stream* fi) = 0;
void Load(dmlc::Stream* fi) override = 0;
/*!
* \brief save model to stream.
* \param fo output stream
*/
virtual void Save(dmlc::Stream* fo) const = 0;
void Save(dmlc::Stream* fo) const override = 0;
/*!
* \brief update the model for one iteration
* With the specified objective function.
@@ -84,7 +84,7 @@ class Learner : public rabit::Serializable {
*/
virtual void BoostOneIter(int iter,
DMatrix* train,
HostDeviceVector<bst_gpair>* in_gpair) = 0;
HostDeviceVector<GradientPair>* in_gpair) = 0;
/*!
* \brief evaluate the model for specific iteration using the configured metrics.
* \param iter iteration number
@@ -194,7 +194,7 @@ inline void Learner::Predict(const SparseBatch::Inst& inst,
bool output_margin,
HostDeviceVector<bst_float>* out_preds,
unsigned ntree_limit) const {
gbm_->PredictInstance(inst, &out_preds->data_h(), ntree_limit);
gbm_->PredictInstance(inst, &out_preds->HostVector(), ntree_limit);
if (!output_margin) {
obj_->PredTransform(out_preds);
}

View File

@@ -19,7 +19,7 @@ namespace xgboost {
class LinearUpdater {
public:
/*! \brief virtual destructor */
virtual ~LinearUpdater() {}
virtual ~LinearUpdater() = default;
/*!
* \brief Initialize the updater with given arguments.
* \param args arguments to the objective function.
@@ -36,7 +36,7 @@ class LinearUpdater {
* \param sum_instance_weight The sum instance weights, used to normalise l1/l2 penalty.
*/
virtual void Update(std::vector<bst_gpair>* in_gpair, DMatrix* data,
virtual void Update(std::vector<GradientPair>* in_gpair, DMatrix* data,
gbm::GBLinearModel* model,
double sum_instance_weight) = 0;

View File

@@ -21,7 +21,7 @@ class BaseLogger {
log_stream_ << "[" << dmlc::DateLogger().HumanDate() << "] ";
#endif
}
std::ostream& stream() { return log_stream_; }
std::ostream& stream() { return log_stream_; } // NOLINT
protected:
std::ostringstream log_stream_;

View File

@@ -35,7 +35,7 @@ class Metric {
/*! \return name of metric */
virtual const char* Name() const = 0;
/*! \brief virtual destructor */
virtual ~Metric() {}
virtual ~Metric() = default;
/*!
* \brief create a metric according to name.
* \param name name of the metric.

View File

@@ -23,7 +23,7 @@ namespace xgboost {
class ObjFunction {
public:
/*! \brief virtual destructor */
virtual ~ObjFunction() {}
virtual ~ObjFunction() = default;
/*!
* \brief set configuration from pair iterators.
* \param begin The beginning iterator.
@@ -47,7 +47,7 @@ class ObjFunction {
virtual void GetGradient(HostDeviceVector<bst_float>* preds,
const MetaInfo& info,
int iteration,
HostDeviceVector<bst_gpair>* out_gpair) = 0;
HostDeviceVector<GradientPair>* out_gpair) = 0;
/*! \return the default evaluation metric for the objective */
virtual const char* DefaultEvalMetric() const = 0;

View File

@@ -36,7 +36,7 @@ namespace xgboost {
class Predictor {
public:
virtual ~Predictor() {}
virtual ~Predictor() = default;
/**
* \fn virtual void Predictor::Init(const std::vector<std::pair<std::string,

View File

@@ -71,70 +71,70 @@ template<typename TSplitCond, typename TNodeStat>
class TreeModel {
public:
/*! \brief data type to indicate split condition */
typedef TNodeStat NodeStat;
using NodeStat = TNodeStat;
/*! \brief auxiliary statistics of node to help tree building */
typedef TSplitCond SplitCond;
using SplitCond = TSplitCond;
/*! \brief tree node */
class Node {
public:
Node() : sindex_(0) {
Node() {
// assert compact alignment
static_assert(sizeof(Node) == 4 * sizeof(int) + sizeof(Info),
"Node: 64 bit align");
}
/*! \brief index of left child */
inline int cleft() const {
inline int LeftChild() const {
return this->cleft_;
}
/*! \brief index of right child */
inline int cright() const {
inline int RightChild() const {
return this->cright_;
}
/*! \brief index of default child when feature is missing */
inline int cdefault() const {
return this->default_left() ? this->cleft() : this->cright();
inline int DefaultChild() const {
return this->DefaultLeft() ? this->LeftChild() : this->RightChild();
}
/*! \brief feature index of split condition */
inline unsigned split_index() const {
inline unsigned SplitIndex() const {
return sindex_ & ((1U << 31) - 1U);
}
/*! \brief when feature is unknown, whether goes to left child */
inline bool default_left() const {
inline bool DefaultLeft() const {
return (sindex_ >> 31) != 0;
}
/*! \brief whether current node is leaf node */
inline bool is_leaf() const {
inline bool IsLeaf() const {
return cleft_ == -1;
}
/*! \return get leaf value of leaf node */
inline bst_float leaf_value() const {
inline bst_float LeafValue() const {
return (this->info_).leaf_value;
}
/*! \return get split condition of the node */
inline TSplitCond split_cond() const {
inline TSplitCond SplitCond() const {
return (this->info_).split_cond;
}
/*! \brief get parent of the node */
inline int parent() const {
inline int Parent() const {
return parent_ & ((1U << 31) - 1);
}
/*! \brief whether current node is left child */
inline bool is_left_child() const {
inline bool IsLeftChild() const {
return (parent_ & (1U << 31)) != 0;
}
/*! \brief whether this node is deleted */
inline bool is_deleted() const {
inline bool IsDeleted() const {
return sindex_ == std::numeric_limits<unsigned>::max();
}
/*! \brief whether current node is root */
inline bool is_root() const {
inline bool IsRoot() const {
return parent_ == -1;
}
/*!
* \brief set the right child
* \param nid node id to right child
*/
inline void set_right_child(int nid) {
inline void SetRightChild(int nid) {
this->cright_ = nid;
}
/*!
@@ -143,7 +143,7 @@ class TreeModel {
* \param split_cond split condition
* \param default_left the default direction when feature is unknown
*/
inline void set_split(unsigned split_index, TSplitCond split_cond,
inline void SetSplit(unsigned split_index, TSplitCond split_cond,
bool default_left = false) {
if (default_left) split_index |= (1U << 31);
this->sindex_ = split_index;
@@ -155,13 +155,13 @@ class TreeModel {
* \param right right index, could be used to store
* additional information
*/
inline void set_leaf(bst_float value, int right = -1) {
inline void SetLeaf(bst_float value, int right = -1) {
(this->info_).leaf_value = value;
this->cleft_ = -1;
this->cright_ = right;
}
/*! \brief mark that this node is deleted */
inline void mark_delete() {
inline void MarkDelete() {
this->sindex_ = std::numeric_limits<unsigned>::max();
}
@@ -181,11 +181,11 @@ class TreeModel {
// pointer to left, right
int cleft_, cright_;
// split feature index, left split or right split depends on the highest bit
unsigned sindex_;
unsigned sindex_{0};
// extra info
Info info_;
// set parent
inline void set_parent(int pidx, bool is_left_child = true) {
inline void SetParent(int pidx, bool is_left_child = true) {
if (is_left_child) pidx |= (1U << 31);
this->parent_ = pidx;
}
@@ -193,35 +193,35 @@ class TreeModel {
protected:
// vector of nodes
std::vector<Node> nodes;
std::vector<Node> nodes_;
// free node space, used during training process
std::vector<int> deleted_nodes;
std::vector<int> deleted_nodes_;
// stats of nodes
std::vector<TNodeStat> stats;
std::vector<TNodeStat> stats_;
// leaf vector, that is used to store additional information
std::vector<bst_float> leaf_vector;
std::vector<bst_float> leaf_vector_;
// allocate a new node,
// !!!!!! NOTE: may cause BUG here, nodes.resize
inline int AllocNode() {
if (param.num_deleted != 0) {
int nd = deleted_nodes.back();
deleted_nodes.pop_back();
int nd = deleted_nodes_.back();
deleted_nodes_.pop_back();
--param.num_deleted;
return nd;
}
int nd = param.num_nodes++;
CHECK_LT(param.num_nodes, std::numeric_limits<int>::max())
<< "number of nodes in the tree exceed 2^31";
nodes.resize(param.num_nodes);
stats.resize(param.num_nodes);
leaf_vector.resize(param.num_nodes * param.size_leaf_vector);
nodes_.resize(param.num_nodes);
stats_.resize(param.num_nodes);
leaf_vector_.resize(param.num_nodes * param.size_leaf_vector);
return nd;
}
// delete a tree node, keep the parent field to allow trace back
inline void DeleteNode(int nid) {
CHECK_GE(nid, param.num_roots);
deleted_nodes.push_back(nid);
nodes[nid].mark_delete();
deleted_nodes_.push_back(nid);
nodes_[nid].MarkDelete();
++param.num_deleted;
}
@@ -232,11 +232,11 @@ class TreeModel {
* \param value new leaf value
*/
inline void ChangeToLeaf(int rid, bst_float value) {
CHECK(nodes[nodes[rid].cleft() ].is_leaf());
CHECK(nodes[nodes[rid].cright()].is_leaf());
this->DeleteNode(nodes[rid].cleft());
this->DeleteNode(nodes[rid].cright());
nodes[rid].set_leaf(value);
CHECK(nodes_[nodes_[rid].LeftChild() ].IsLeaf());
CHECK(nodes_[nodes_[rid].RightChild()].IsLeaf());
this->DeleteNode(nodes_[rid].LeftChild());
this->DeleteNode(nodes_[rid].RightChild());
nodes_[rid].SetLeaf(value);
}
/*!
* \brief collapse a non leaf node to a leaf node, delete its children
@@ -244,12 +244,12 @@ class TreeModel {
* \param value new leaf value
*/
inline void CollapseToLeaf(int rid, bst_float value) {
if (nodes[rid].is_leaf()) return;
if (!nodes[nodes[rid].cleft() ].is_leaf()) {
CollapseToLeaf(nodes[rid].cleft(), 0.0f);
if (nodes_[rid].IsLeaf()) return;
if (!nodes_[nodes_[rid].LeftChild() ].IsLeaf()) {
CollapseToLeaf(nodes_[rid].LeftChild(), 0.0f);
}
if (!nodes[nodes[rid].cright() ].is_leaf()) {
CollapseToLeaf(nodes[rid].cright(), 0.0f);
if (!nodes_[nodes_[rid].RightChild() ].IsLeaf()) {
CollapseToLeaf(nodes_[rid].RightChild(), 0.0f);
}
this->ChangeToLeaf(rid, value);
}
@@ -262,47 +262,47 @@ class TreeModel {
param.num_nodes = 1;
param.num_roots = 1;
param.num_deleted = 0;
nodes.resize(1);
nodes_.resize(1);
}
/*! \brief get node given nid */
inline Node& operator[](int nid) {
return nodes[nid];
return nodes_[nid];
}
/*! \brief get node given nid */
inline const Node& operator[](int nid) const {
return nodes[nid];
return nodes_[nid];
}
/*! \brief get const reference to nodes */
inline const std::vector<Node>& GetNodes() const { return nodes; }
inline const std::vector<Node>& GetNodes() const { return nodes_; }
/*! \brief get node statistics given nid */
inline NodeStat& stat(int nid) {
return stats[nid];
inline NodeStat& Stat(int nid) {
return stats_[nid];
}
/*! \brief get node statistics given nid */
inline const NodeStat& stat(int nid) const {
return stats[nid];
inline const NodeStat& Stat(int nid) const {
return stats_[nid];
}
/*! \brief get leaf vector given nid */
inline bst_float* leafvec(int nid) {
if (leaf_vector.size() == 0) return nullptr;
return& leaf_vector[nid * param.size_leaf_vector];
inline bst_float* Leafvec(int nid) {
if (leaf_vector_.size() == 0) return nullptr;
return& leaf_vector_[nid * param.size_leaf_vector];
}
/*! \brief get leaf vector given nid */
inline const bst_float* leafvec(int nid) const {
if (leaf_vector.size() == 0) return nullptr;
return& leaf_vector[nid * param.size_leaf_vector];
inline const bst_float* Leafvec(int nid) const {
if (leaf_vector_.size() == 0) return nullptr;
return& leaf_vector_[nid * param.size_leaf_vector];
}
/*! \brief initialize the model */
inline void InitModel() {
param.num_nodes = param.num_roots;
nodes.resize(param.num_nodes);
stats.resize(param.num_nodes);
leaf_vector.resize(param.num_nodes * param.size_leaf_vector, 0.0f);
nodes_.resize(param.num_nodes);
stats_.resize(param.num_nodes);
leaf_vector_.resize(param.num_nodes * param.size_leaf_vector, 0.0f);
for (int i = 0; i < param.num_nodes; i ++) {
nodes[i].set_leaf(0.0f);
nodes[i].set_parent(-1);
nodes_[i].SetLeaf(0.0f);
nodes_[i].SetParent(-1);
}
}
/*!
@@ -311,35 +311,35 @@ class TreeModel {
*/
inline void Load(dmlc::Stream* fi) {
CHECK_EQ(fi->Read(&param, sizeof(TreeParam)), sizeof(TreeParam));
nodes.resize(param.num_nodes);
stats.resize(param.num_nodes);
nodes_.resize(param.num_nodes);
stats_.resize(param.num_nodes);
CHECK_NE(param.num_nodes, 0);
CHECK_EQ(fi->Read(dmlc::BeginPtr(nodes), sizeof(Node) * nodes.size()),
sizeof(Node) * nodes.size());
CHECK_EQ(fi->Read(dmlc::BeginPtr(stats), sizeof(NodeStat) * stats.size()),
sizeof(NodeStat) * stats.size());
CHECK_EQ(fi->Read(dmlc::BeginPtr(nodes_), sizeof(Node) * nodes_.size()),
sizeof(Node) * nodes_.size());
CHECK_EQ(fi->Read(dmlc::BeginPtr(stats_), sizeof(NodeStat) * stats_.size()),
sizeof(NodeStat) * stats_.size());
if (param.size_leaf_vector != 0) {
CHECK(fi->Read(&leaf_vector));
CHECK(fi->Read(&leaf_vector_));
}
// chg deleted nodes
deleted_nodes.resize(0);
deleted_nodes_.resize(0);
for (int i = param.num_roots; i < param.num_nodes; ++i) {
if (nodes[i].is_deleted()) deleted_nodes.push_back(i);
if (nodes_[i].IsDeleted()) deleted_nodes_.push_back(i);
}
CHECK_EQ(static_cast<int>(deleted_nodes.size()), param.num_deleted);
CHECK_EQ(static_cast<int>(deleted_nodes_.size()), param.num_deleted);
}
/*!
* \brief save model to stream
* \param fo output stream
*/
inline void Save(dmlc::Stream* fo) const {
CHECK_EQ(param.num_nodes, static_cast<int>(nodes.size()));
CHECK_EQ(param.num_nodes, static_cast<int>(stats.size()));
CHECK_EQ(param.num_nodes, static_cast<int>(nodes_.size()));
CHECK_EQ(param.num_nodes, static_cast<int>(stats_.size()));
fo->Write(&param, sizeof(TreeParam));
CHECK_NE(param.num_nodes, 0);
fo->Write(dmlc::BeginPtr(nodes), sizeof(Node) * nodes.size());
fo->Write(dmlc::BeginPtr(stats), sizeof(NodeStat) * nodes.size());
if (param.size_leaf_vector != 0) fo->Write(leaf_vector);
fo->Write(dmlc::BeginPtr(nodes_), sizeof(Node) * nodes_.size());
fo->Write(dmlc::BeginPtr(stats_), sizeof(NodeStat) * nodes_.size());
if (param.size_leaf_vector != 0) fo->Write(leaf_vector_);
}
/*!
* \brief add child nodes to node
@@ -348,10 +348,10 @@ class TreeModel {
inline void AddChilds(int nid) {
int pleft = this->AllocNode();
int pright = this->AllocNode();
nodes[nid].cleft_ = pleft;
nodes[nid].cright_ = pright;
nodes[nodes[nid].cleft() ].set_parent(nid, true);
nodes[nodes[nid].cright()].set_parent(nid, false);
nodes_[nid].cleft_ = pleft;
nodes_[nid].cright_ = pright;
nodes_[nodes_[nid].LeftChild() ].SetParent(nid, true);
nodes_[nodes_[nid].RightChild()].SetParent(nid, false);
}
/*!
* \brief only add a right child to a leaf node
@@ -359,8 +359,8 @@ class TreeModel {
*/
inline void AddRightChild(int nid) {
int pright = this->AllocNode();
nodes[nid].right = pright;
nodes[nodes[nid].right].set_parent(nid, false);
nodes_[nid].right = pright;
nodes_[nodes_[nid].right].SetParent(nid, false);
}
/*!
* \brief get current depth
@@ -369,9 +369,9 @@ class TreeModel {
*/
inline int GetDepth(int nid, bool pass_rchild = false) const {
int depth = 0;
while (!nodes[nid].is_root()) {
if (!pass_rchild || nodes[nid].is_left_child()) ++depth;
nid = nodes[nid].parent();
while (!nodes_[nid].IsRoot()) {
if (!pass_rchild || nodes_[nid].IsLeftChild()) ++depth;
nid = nodes_[nid].Parent();
}
return depth;
}
@@ -380,9 +380,9 @@ class TreeModel {
* \param nid node id
*/
inline int MaxDepth(int nid) const {
if (nodes[nid].is_leaf()) return 0;
return std::max(MaxDepth(nodes[nid].cleft())+1,
MaxDepth(nodes[nid].cright())+1);
if (nodes_[nid].IsLeaf()) return 0;
return std::max(MaxDepth(nodes_[nid].LeftChild())+1,
MaxDepth(nodes_[nid].RightChild())+1);
}
/*!
* \brief get maximum depth
@@ -395,7 +395,7 @@ class TreeModel {
return maxd;
}
/*! \brief number of extra nodes besides the root */
inline int num_extra_nodes() const {
inline int NumExtraNodes() const {
return param.num_nodes - param.num_roots - param.num_deleted;
}
};
@@ -421,7 +421,7 @@ struct PathElement {
bst_float zero_fraction;
bst_float one_fraction;
bst_float pweight;
PathElement() {}
PathElement() = default;
PathElement(int i, bst_float z, bst_float o, bst_float w) :
feature_index(i), zero_fraction(z), one_fraction(o), pweight(w) {}
};
@@ -457,19 +457,19 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat> {
* \brief returns the size of the feature vector
* \return the size of the feature vector
*/
inline size_t size() const;
inline size_t Size() const;
/*!
* \brief get ith value
* \param i feature index.
* \return the i-th feature value
*/
inline bst_float fvalue(size_t i) const;
inline bst_float Fvalue(size_t i) const;
/*!
* \brief check whether i-th entry is missing
* \param i feature index.
* \return whether i-th value is missing.
*/
inline bool is_missing(size_t i) const;
inline bool IsMissing(size_t i) const;
private:
/*!
@@ -480,7 +480,7 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat> {
bst_float fvalue;
int flag;
};
std::vector<Entry> data;
std::vector<Entry> data_;
};
/*!
* \brief get the leaf index
@@ -562,63 +562,63 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat> {
private:
inline bst_float FillNodeMeanValue(int nid);
std::vector<bst_float> node_mean_values;
std::vector<bst_float> node_mean_values_;
};
// implementations of inline functions
// do not need to read if only use the model
inline void RegTree::FVec::Init(size_t size) {
Entry e; e.flag = -1;
data.resize(size);
std::fill(data.begin(), data.end(), e);
data_.resize(size);
std::fill(data_.begin(), data_.end(), e);
}
inline void RegTree::FVec::Fill(const RowBatch::Inst& inst) {
for (bst_uint i = 0; i < inst.length; ++i) {
if (inst[i].index >= data.size()) continue;
data[inst[i].index].fvalue = inst[i].fvalue;
if (inst[i].index >= data_.size()) continue;
data_[inst[i].index].fvalue = inst[i].fvalue;
}
}
inline void RegTree::FVec::Drop(const RowBatch::Inst& inst) {
for (bst_uint i = 0; i < inst.length; ++i) {
if (inst[i].index >= data.size()) continue;
data[inst[i].index].flag = -1;
if (inst[i].index >= data_.size()) continue;
data_[inst[i].index].flag = -1;
}
}
inline size_t RegTree::FVec::size() const {
return data.size();
inline size_t RegTree::FVec::Size() const {
return data_.size();
}
inline bst_float RegTree::FVec::fvalue(size_t i) const {
return data[i].fvalue;
inline bst_float RegTree::FVec::Fvalue(size_t i) const {
return data_[i].fvalue;
}
inline bool RegTree::FVec::is_missing(size_t i) const {
return data[i].flag == -1;
inline bool RegTree::FVec::IsMissing(size_t i) const {
return data_[i].flag == -1;
}
inline int RegTree::GetLeafIndex(const RegTree::FVec& feat, unsigned root_id) const {
int pid = static_cast<int>(root_id);
while (!(*this)[pid].is_leaf()) {
unsigned split_index = (*this)[pid].split_index();
pid = this->GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index));
auto pid = static_cast<int>(root_id);
while (!(*this)[pid].IsLeaf()) {
unsigned split_index = (*this)[pid].SplitIndex();
pid = this->GetNext(pid, feat.Fvalue(split_index), feat.IsMissing(split_index));
}
return pid;
}
inline bst_float RegTree::Predict(const RegTree::FVec& feat, unsigned root_id) const {
int pid = this->GetLeafIndex(feat, root_id);
return (*this)[pid].leaf_value();
return (*this)[pid].LeafValue();
}
inline void RegTree::FillNodeMeanValues() {
size_t num_nodes = this->param.num_nodes;
if (this->node_mean_values.size() == num_nodes) {
if (this->node_mean_values_.size() == num_nodes) {
return;
}
this->node_mean_values.resize(num_nodes);
this->node_mean_values_.resize(num_nodes);
for (int root_id = 0; root_id < param.num_roots; ++root_id) {
this->FillNodeMeanValue(root_id);
}
@@ -627,40 +627,39 @@ inline void RegTree::FillNodeMeanValues() {
inline bst_float RegTree::FillNodeMeanValue(int nid) {
bst_float result;
auto& node = (*this)[nid];
if (node.is_leaf()) {
result = node.leaf_value();
if (node.IsLeaf()) {
result = node.LeafValue();
} else {
result = this->FillNodeMeanValue(node.cleft()) * this->stat(node.cleft()).sum_hess;
result += this->FillNodeMeanValue(node.cright()) * this->stat(node.cright()).sum_hess;
result /= this->stat(nid).sum_hess;
result = this->FillNodeMeanValue(node.LeftChild()) * this->Stat(node.LeftChild()).sum_hess;
result += this->FillNodeMeanValue(node.RightChild()) * this->Stat(node.RightChild()).sum_hess;
result /= this->Stat(nid).sum_hess;
}
this->node_mean_values[nid] = result;
this->node_mean_values_[nid] = result;
return result;
}
inline void RegTree::CalculateContributionsApprox(const RegTree::FVec& feat, unsigned root_id,
bst_float *out_contribs) const {
CHECK_GT(this->node_mean_values.size(), 0U);
CHECK_GT(this->node_mean_values_.size(), 0U);
// this follows the idea of http://blog.datadive.net/interpreting-random-forests/
bst_float node_value;
unsigned split_index;
int pid = static_cast<int>(root_id);
unsigned split_index = 0;
auto pid = static_cast<int>(root_id);
// update bias value
node_value = this->node_mean_values[pid];
out_contribs[feat.size()] += node_value;
if ((*this)[pid].is_leaf()) {
bst_float node_value = this->node_mean_values_[pid];
out_contribs[feat.Size()] += node_value;
if ((*this)[pid].IsLeaf()) {
// nothing to do anymore
return;
}
while (!(*this)[pid].is_leaf()) {
split_index = (*this)[pid].split_index();
pid = this->GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index));
bst_float new_value = this->node_mean_values[pid];
while (!(*this)[pid].IsLeaf()) {
split_index = (*this)[pid].SplitIndex();
pid = this->GetNext(pid, feat.Fvalue(split_index), feat.IsMissing(split_index));
bst_float new_value = this->node_mean_values_[pid];
// update feature weight
out_contribs[split_index] += new_value - node_value;
node_value = new_value;
}
bst_float leaf_value = (*this)[pid].leaf_value();
bst_float leaf_value = (*this)[pid].LeafValue();
// update leaf feature weight
out_contribs[split_index] += leaf_value - node_value;
}
@@ -749,33 +748,33 @@ inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
ExtendPath(unique_path, unique_depth, parent_zero_fraction,
parent_one_fraction, parent_feature_index);
}
const unsigned split_index = node.split_index();
const unsigned split_index = node.SplitIndex();
// leaf node
if (node.is_leaf()) {
if (node.IsLeaf()) {
for (unsigned i = 1; i <= unique_depth; ++i) {
const bst_float w = UnwoundPathSum(unique_path, unique_depth, i);
const PathElement &el = unique_path[i];
phi[el.feature_index] += w * (el.one_fraction - el.zero_fraction)
* node.leaf_value() * condition_fraction;
* node.LeafValue() * condition_fraction;
}
// internal node
} else {
// find which branch is "hot" (meaning x would follow it)
unsigned hot_index = 0;
if (feat.is_missing(split_index)) {
hot_index = node.cdefault();
} else if (feat.fvalue(split_index) < node.split_cond()) {
hot_index = node.cleft();
if (feat.IsMissing(split_index)) {
hot_index = node.DefaultChild();
} else if (feat.Fvalue(split_index) < node.SplitCond()) {
hot_index = node.LeftChild();
} else {
hot_index = node.cright();
hot_index = node.RightChild();
}
const unsigned cold_index = (static_cast<int>(hot_index) == node.cleft() ?
node.cright() : node.cleft());
const bst_float w = this->stat(node_index).sum_hess;
const bst_float hot_zero_fraction = this->stat(hot_index).sum_hess / w;
const bst_float cold_zero_fraction = this->stat(cold_index).sum_hess / w;
const unsigned cold_index = (static_cast<int>(hot_index) == node.LeftChild() ?
node.RightChild() : node.LeftChild());
const bst_float w = this->Stat(node_index).sum_hess;
const bst_float hot_zero_fraction = this->Stat(hot_index).sum_hess / w;
const bst_float cold_zero_fraction = this->Stat(cold_index).sum_hess / w;
bst_float incoming_zero_fraction = 1;
bst_float incoming_one_fraction = 1;
@@ -820,13 +819,13 @@ inline void RegTree::CalculateContributions(const RegTree::FVec& feat, unsigned
unsigned condition_feature) const {
// find the expected value of the tree's predictions
if (condition == 0) {
bst_float node_value = this->node_mean_values[static_cast<int>(root_id)];
out_contribs[feat.size()] += node_value;
bst_float node_value = this->node_mean_values_[static_cast<int>(root_id)];
out_contribs[feat.Size()] += node_value;
}
// Preallocate space for the unique path data
const int maxd = this->MaxDepth(root_id) + 2;
PathElement *unique_path_data = new PathElement[(maxd * (maxd + 1)) / 2];
auto *unique_path_data = new PathElement[(maxd * (maxd + 1)) / 2];
TreeShap(feat, out_contribs, root_id, 0, unique_path_data,
1, 1, -1, condition, condition_feature, 1);
@@ -835,14 +834,14 @@ inline void RegTree::CalculateContributions(const RegTree::FVec& feat, unsigned
/*! \brief get next position of the tree given current pid */
inline int RegTree::GetNext(int pid, bst_float fvalue, bool is_unknown) const {
bst_float split_value = (*this)[pid].split_cond();
bst_float split_value = (*this)[pid].SplitCond();
if (is_unknown) {
return (*this)[pid].cdefault();
return (*this)[pid].DefaultChild();
} else {
if (fvalue < split_value) {
return (*this)[pid].cleft();
return (*this)[pid].LeftChild();
} else {
return (*this)[pid].cright();
return (*this)[pid].RightChild();
}
}
}

View File

@@ -25,7 +25,7 @@ namespace xgboost {
class TreeUpdater {
public:
/*! \brief virtual destructor */
virtual ~TreeUpdater() {}
virtual ~TreeUpdater() = default;
/*!
* \brief Initialize the updater with given arguments.
* \param args arguments to the objective function.
@@ -40,7 +40,7 @@ class TreeUpdater {
* but maybe different random seeds, usually one tree is passed in at a time,
* there can be multiple trees when we train random forest style model
*/
virtual void Update(HostDeviceVector<bst_gpair>* gpair,
virtual void Update(HostDeviceVector<GradientPair>* gpair,
DMatrix* data,
const std::vector<RegTree*>& trees) = 0;