Clang-tidy static analysis (#3222)
* Clang-tidy static analysis * Modernise checks * Google coding standard checks * Identifier renaming according to Google style
This commit is contained in:
@@ -81,20 +81,19 @@ namespace xgboost {
|
||||
* \brief unsigned integer type used in boost,
|
||||
* used for feature index and row index.
|
||||
*/
|
||||
typedef uint32_t bst_uint;
|
||||
typedef int32_t bst_int;
|
||||
using bst_uint = uint32_t; // NOLINT
|
||||
using bst_int = int32_t; // NOLINT
|
||||
/*! \brief long integers */
|
||||
typedef uint64_t bst_ulong; // NOLINT(*)
|
||||
/*! \brief float type, used for storing statistics */
|
||||
typedef float bst_float;
|
||||
|
||||
using bst_float = float; // NOLINT
|
||||
|
||||
namespace detail {
|
||||
/*! \brief Implementation of gradient statistics pair. Template specialisation
|
||||
* may be used to overload different gradients types e.g. low precision, high
|
||||
* precision, integer, floating point. */
|
||||
template <typename T>
|
||||
class bst_gpair_internal {
|
||||
class GradientPairInternal {
|
||||
/*! \brief gradient statistics */
|
||||
T grad_;
|
||||
/*! \brief second order gradient statistics */
|
||||
@@ -104,23 +103,23 @@ class bst_gpair_internal {
|
||||
XGBOOST_DEVICE void SetHess(float h) { hess_ = h; }
|
||||
|
||||
public:
|
||||
typedef T value_t;
|
||||
using ValueT = T;
|
||||
|
||||
XGBOOST_DEVICE bst_gpair_internal() : grad_(0), hess_(0) {}
|
||||
XGBOOST_DEVICE GradientPairInternal() : grad_(0), hess_(0) {}
|
||||
|
||||
XGBOOST_DEVICE bst_gpair_internal(float grad, float hess) {
|
||||
XGBOOST_DEVICE GradientPairInternal(float grad, float hess) {
|
||||
SetGrad(grad);
|
||||
SetHess(hess);
|
||||
}
|
||||
|
||||
// Copy constructor if of same value type
|
||||
XGBOOST_DEVICE bst_gpair_internal(const bst_gpair_internal<T> &g)
|
||||
: grad_(g.grad_), hess_(g.hess_) {}
|
||||
XGBOOST_DEVICE GradientPairInternal(const GradientPairInternal<T> &g)
|
||||
: grad_(g.grad_), hess_(g.hess_) {} // NOLINT
|
||||
|
||||
// Copy constructor if different value type - use getters and setters to
|
||||
// perform conversion
|
||||
template <typename T2>
|
||||
XGBOOST_DEVICE bst_gpair_internal(const bst_gpair_internal<T2> &g) {
|
||||
XGBOOST_DEVICE explicit GradientPairInternal(const GradientPairInternal<T2> &g) {
|
||||
SetGrad(g.GetGrad());
|
||||
SetHess(g.GetHess());
|
||||
}
|
||||
@@ -128,85 +127,85 @@ class bst_gpair_internal {
|
||||
XGBOOST_DEVICE float GetGrad() const { return grad_; }
|
||||
XGBOOST_DEVICE float GetHess() const { return hess_; }
|
||||
|
||||
XGBOOST_DEVICE bst_gpair_internal<T> &operator+=(
|
||||
const bst_gpair_internal<T> &rhs) {
|
||||
XGBOOST_DEVICE GradientPairInternal<T> &operator+=(
|
||||
const GradientPairInternal<T> &rhs) {
|
||||
grad_ += rhs.grad_;
|
||||
hess_ += rhs.hess_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
XGBOOST_DEVICE bst_gpair_internal<T> operator+(
|
||||
const bst_gpair_internal<T> &rhs) const {
|
||||
bst_gpair_internal<T> g;
|
||||
XGBOOST_DEVICE GradientPairInternal<T> operator+(
|
||||
const GradientPairInternal<T> &rhs) const {
|
||||
GradientPairInternal<T> g;
|
||||
g.grad_ = grad_ + rhs.grad_;
|
||||
g.hess_ = hess_ + rhs.hess_;
|
||||
return g;
|
||||
}
|
||||
|
||||
XGBOOST_DEVICE bst_gpair_internal<T> &operator-=(
|
||||
const bst_gpair_internal<T> &rhs) {
|
||||
XGBOOST_DEVICE GradientPairInternal<T> &operator-=(
|
||||
const GradientPairInternal<T> &rhs) {
|
||||
grad_ -= rhs.grad_;
|
||||
hess_ -= rhs.hess_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
XGBOOST_DEVICE bst_gpair_internal<T> operator-(
|
||||
const bst_gpair_internal<T> &rhs) const {
|
||||
bst_gpair_internal<T> g;
|
||||
XGBOOST_DEVICE GradientPairInternal<T> operator-(
|
||||
const GradientPairInternal<T> &rhs) const {
|
||||
GradientPairInternal<T> g;
|
||||
g.grad_ = grad_ - rhs.grad_;
|
||||
g.hess_ = hess_ - rhs.hess_;
|
||||
return g;
|
||||
}
|
||||
|
||||
XGBOOST_DEVICE bst_gpair_internal(int value) {
|
||||
*this = bst_gpair_internal<T>(static_cast<float>(value),
|
||||
XGBOOST_DEVICE explicit GradientPairInternal(int value) {
|
||||
*this = GradientPairInternal<T>(static_cast<float>(value),
|
||||
static_cast<float>(value));
|
||||
}
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os,
|
||||
const bst_gpair_internal<T> &g) {
|
||||
const GradientPairInternal<T> &g) {
|
||||
os << g.GetGrad() << "/" << g.GetHess();
|
||||
return os;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
inline XGBOOST_DEVICE float bst_gpair_internal<int64_t>::GetGrad() const {
|
||||
inline XGBOOST_DEVICE float GradientPairInternal<int64_t>::GetGrad() const {
|
||||
return grad_ * 1e-4f;
|
||||
}
|
||||
template<>
|
||||
inline XGBOOST_DEVICE float bst_gpair_internal<int64_t>::GetHess() const {
|
||||
inline XGBOOST_DEVICE float GradientPairInternal<int64_t>::GetHess() const {
|
||||
return hess_ * 1e-4f;
|
||||
}
|
||||
template<>
|
||||
inline XGBOOST_DEVICE void bst_gpair_internal<int64_t>::SetGrad(float g) {
|
||||
inline XGBOOST_DEVICE void GradientPairInternal<int64_t>::SetGrad(float g) {
|
||||
grad_ = static_cast<int64_t>(std::round(g * 1e4));
|
||||
}
|
||||
template<>
|
||||
inline XGBOOST_DEVICE void bst_gpair_internal<int64_t>::SetHess(float h) {
|
||||
inline XGBOOST_DEVICE void GradientPairInternal<int64_t>::SetHess(float h) {
|
||||
hess_ = static_cast<int64_t>(std::round(h * 1e4));
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
/*! \brief gradient statistics pair usually needed in gradient boosting */
|
||||
typedef detail::bst_gpair_internal<float> bst_gpair;
|
||||
using GradientPair = detail::GradientPairInternal<float>;
|
||||
|
||||
/*! \brief High precision gradient statistics pair */
|
||||
typedef detail::bst_gpair_internal<double> bst_gpair_precise;
|
||||
using GradientPairPrecise = detail::GradientPairInternal<double>;
|
||||
|
||||
/*! \brief High precision gradient statistics pair with integer backed
|
||||
* storage. Operators are associative where floating point versions are not
|
||||
* associative. */
|
||||
typedef detail::bst_gpair_internal<int64_t> bst_gpair_integer;
|
||||
using GradientPairInteger = detail::GradientPairInternal<int64_t>;
|
||||
|
||||
/*! \brief small eps gap for minimum split decision. */
|
||||
const bst_float rt_eps = 1e-6f;
|
||||
const bst_float kRtEps = 1e-6f;
|
||||
|
||||
/*! \brief define unsigned long for openmp loop */
|
||||
typedef dmlc::omp_ulong omp_ulong;
|
||||
using omp_ulong = dmlc::omp_ulong; // NOLINT
|
||||
/*! \brief define unsigned int for openmp loop */
|
||||
typedef dmlc::omp_uint bst_omp_uint;
|
||||
using bst_omp_uint = dmlc::omp_uint; // NOLINT
|
||||
|
||||
/*!
|
||||
* \brief define compatible keywords in g++
|
||||
|
||||
@@ -30,16 +30,16 @@ typedef uint64_t bst_ulong; // NOLINT(*)
|
||||
|
||||
|
||||
/*! \brief handle to DMatrix */
|
||||
typedef void *DMatrixHandle;
|
||||
typedef void *DMatrixHandle; // NOLINT(*)
|
||||
/*! \brief handle to Booster */
|
||||
typedef void *BoosterHandle;
|
||||
typedef void *BoosterHandle; // NOLINT(*)
|
||||
/*! \brief handle to a data iterator */
|
||||
typedef void *DataIterHandle;
|
||||
typedef void *DataIterHandle; // NOLINT(*)
|
||||
/*! \brief handle to a internal data holder. */
|
||||
typedef void *DataHolderHandle;
|
||||
typedef void *DataHolderHandle; // NOLINT(*)
|
||||
|
||||
/*! \brief Mini batch used in XGBoost Data Iteration */
|
||||
typedef struct {
|
||||
typedef struct { // NOLINT(*)
|
||||
/*! \brief number of rows in the minibatch */
|
||||
size_t size;
|
||||
/*! \brief row pointer to the rows in the data */
|
||||
@@ -66,7 +66,7 @@ typedef struct {
|
||||
* \param handle The handle to the callback.
|
||||
* \param batch The data content to be set.
|
||||
*/
|
||||
XGB_EXTERN_C typedef int XGBCallbackSetData(
|
||||
XGB_EXTERN_C typedef int XGBCallbackSetData( // NOLINT(*)
|
||||
DataHolderHandle handle, XGBoostBatchCSR batch);
|
||||
|
||||
/*!
|
||||
@@ -80,9 +80,8 @@ XGB_EXTERN_C typedef int XGBCallbackSetData(
|
||||
* \param set_function_handle The handle to be passed to set function.
|
||||
* \return 0 if we are reaching the end and batch is not returned.
|
||||
*/
|
||||
XGB_EXTERN_C typedef int XGBCallbackDataIterNext(
|
||||
DataIterHandle data_handle,
|
||||
XGBCallbackSetData* set_function,
|
||||
XGB_EXTERN_C typedef int XGBCallbackDataIterNext( // NOLINT(*)
|
||||
DataIterHandle data_handle, XGBCallbackSetData *set_function,
|
||||
DataHolderHandle set_function_handle);
|
||||
|
||||
/*!
|
||||
@@ -216,11 +215,9 @@ XGB_DLL int XGDMatrixCreateFromMat(const float *data,
|
||||
* \param nthread number of threads (up to maximum cores available, if <=0 use all cores)
|
||||
* \return 0 when success, -1 when failure happens
|
||||
*/
|
||||
XGB_DLL int XGDMatrixCreateFromMat_omp(const float *data,
|
||||
bst_ulong nrow,
|
||||
bst_ulong ncol,
|
||||
float missing,
|
||||
DMatrixHandle *out,
|
||||
XGB_DLL int XGDMatrixCreateFromMat_omp(const float *data, // NOLINT
|
||||
bst_ulong nrow, bst_ulong ncol,
|
||||
float missing, DMatrixHandle *out,
|
||||
int nthread);
|
||||
/*!
|
||||
* \brief create a new dmatrix from sliced content of existing matrix
|
||||
|
||||
@@ -30,44 +30,45 @@ enum DataType {
|
||||
/*!
|
||||
* \brief Meta information about dataset, always sit in memory.
|
||||
*/
|
||||
struct MetaInfo {
|
||||
class MetaInfo {
|
||||
public:
|
||||
/*! \brief number of rows in the data */
|
||||
uint64_t num_row;
|
||||
uint64_t num_row_{0};
|
||||
/*! \brief number of columns in the data */
|
||||
uint64_t num_col;
|
||||
uint64_t num_col_{0};
|
||||
/*! \brief number of nonzero entries in the data */
|
||||
uint64_t num_nonzero;
|
||||
uint64_t num_nonzero_{0};
|
||||
/*! \brief label of each instance */
|
||||
std::vector<bst_float> labels;
|
||||
std::vector<bst_float> labels_;
|
||||
/*!
|
||||
* \brief specified root index of each instance,
|
||||
* can be used for multi task setting
|
||||
*/
|
||||
std::vector<bst_uint> root_index;
|
||||
std::vector<bst_uint> root_index_;
|
||||
/*!
|
||||
* \brief the index of begin and end of a group
|
||||
* needed when the learning task is ranking.
|
||||
*/
|
||||
std::vector<bst_uint> group_ptr;
|
||||
std::vector<bst_uint> group_ptr_;
|
||||
/*! \brief weights of each instance, optional */
|
||||
std::vector<bst_float> weights;
|
||||
std::vector<bst_float> weights_;
|
||||
/*!
|
||||
* \brief initialized margins,
|
||||
* if specified, xgboost will start from this init margin
|
||||
* can be used to specify initial prediction to boost from.
|
||||
*/
|
||||
std::vector<bst_float> base_margin;
|
||||
std::vector<bst_float> base_margin_;
|
||||
/*! \brief version flag, used to check version of this info */
|
||||
static const int kVersion = 1;
|
||||
/*! \brief default constructor */
|
||||
MetaInfo() : num_row(0), num_col(0), num_nonzero(0) {}
|
||||
MetaInfo() = default;
|
||||
/*!
|
||||
* \brief Get weight of each instances.
|
||||
* \param i Instance index.
|
||||
* \return The weight.
|
||||
*/
|
||||
inline bst_float GetWeight(size_t i) const {
|
||||
return weights.size() != 0 ? weights[i] : 1.0f;
|
||||
return weights_.size() != 0 ? weights_[i] : 1.0f;
|
||||
}
|
||||
/*!
|
||||
* \brief Get the root index of i-th instance.
|
||||
@@ -75,20 +76,20 @@ struct MetaInfo {
|
||||
* \return The pre-defined root index of i-th instance.
|
||||
*/
|
||||
inline unsigned GetRoot(size_t i) const {
|
||||
return root_index.size() != 0 ? root_index[i] : 0U;
|
||||
return root_index_.size() != 0 ? root_index_[i] : 0U;
|
||||
}
|
||||
/*! \brief get sorted indexes (argsort) of labels by absolute value (used by cox loss) */
|
||||
inline const std::vector<size_t>& LabelAbsSort() const {
|
||||
if (label_order_cache.size() == labels.size()) {
|
||||
return label_order_cache;
|
||||
if (label_order_cache_.size() == labels_.size()) {
|
||||
return label_order_cache_;
|
||||
}
|
||||
label_order_cache.resize(labels.size());
|
||||
std::iota(label_order_cache.begin(), label_order_cache.end(), 0);
|
||||
const auto l = labels;
|
||||
XGBOOST_PARALLEL_SORT(label_order_cache.begin(), label_order_cache.end(),
|
||||
label_order_cache_.resize(labels_.size());
|
||||
std::iota(label_order_cache_.begin(), label_order_cache_.end(), 0);
|
||||
const auto l = labels_;
|
||||
XGBOOST_PARALLEL_SORT(label_order_cache_.begin(), label_order_cache_.end(),
|
||||
[&l](size_t i1, size_t i2) {return std::abs(l[i1]) < std::abs(l[i2]);});
|
||||
|
||||
return label_order_cache;
|
||||
return label_order_cache_;
|
||||
}
|
||||
/*! \brief clear all the information */
|
||||
void Clear();
|
||||
@@ -113,7 +114,7 @@ struct MetaInfo {
|
||||
|
||||
private:
|
||||
/*! \brief argsort of labels */
|
||||
mutable std::vector<size_t> label_order_cache;
|
||||
mutable std::vector<size_t> label_order_cache_;
|
||||
};
|
||||
|
||||
/*! \brief read-only sparse instance batch in CSR format */
|
||||
@@ -125,7 +126,7 @@ struct SparseBatch {
|
||||
/*! \brief feature value */
|
||||
bst_float fvalue;
|
||||
/*! \brief default constructor */
|
||||
Entry() {}
|
||||
Entry() = default;
|
||||
/*!
|
||||
* \brief constructor with index and value
|
||||
* \param index The feature or row index.
|
||||
@@ -141,11 +142,11 @@ struct SparseBatch {
|
||||
/*! \brief an instance of sparse vector in the batch */
|
||||
struct Inst {
|
||||
/*! \brief pointer to the elements*/
|
||||
const Entry *data;
|
||||
const Entry *data{nullptr};
|
||||
/*! \brief length of the instance */
|
||||
bst_uint length;
|
||||
bst_uint length{0};
|
||||
/*! \brief constructor */
|
||||
Inst() : data(0), length(0) {}
|
||||
Inst() = default;
|
||||
Inst(const Entry *data, bst_uint length) : data(data), length(length) {}
|
||||
/*! \brief get i-th pair in the sparse vector*/
|
||||
inline const Entry& operator[](size_t i) const {
|
||||
@@ -167,7 +168,7 @@ struct RowBatch : public SparseBatch {
|
||||
const Entry *data_ptr;
|
||||
/*! \brief get i-th row from the batch */
|
||||
inline Inst operator[](size_t i) const {
|
||||
return Inst(data_ptr + ind_ptr[i], static_cast<bst_uint>(ind_ptr[i + 1] - ind_ptr[i]));
|
||||
return {data_ptr + ind_ptr[i], static_cast<bst_uint>(ind_ptr[i + 1] - ind_ptr[i])};
|
||||
}
|
||||
};
|
||||
|
||||
@@ -206,16 +207,16 @@ class DataSource : public dmlc::DataIter<RowBatch> {
|
||||
* \brief A vector-like structure to represent set of rows.
|
||||
* But saves the memory when all rows are in the set (common case in xgb)
|
||||
*/
|
||||
struct RowSet {
|
||||
class RowSet {
|
||||
public:
|
||||
/*! \return i-th row index */
|
||||
inline bst_uint operator[](size_t i) const;
|
||||
/*! \return the size of the set. */
|
||||
inline size_t size() const;
|
||||
inline size_t Size() const;
|
||||
/*! \brief push the index back to the set */
|
||||
inline void push_back(bst_uint i);
|
||||
inline void PushBack(bst_uint i);
|
||||
/*! \brief clear the set */
|
||||
inline void clear();
|
||||
inline void Clear();
|
||||
/*!
|
||||
* \brief save rowset to file.
|
||||
* \param fo The file to be saved.
|
||||
@@ -228,11 +229,11 @@ struct RowSet {
|
||||
*/
|
||||
inline bool Load(dmlc::Stream* fi);
|
||||
/*! \brief constructor */
|
||||
RowSet() : size_(0) {}
|
||||
RowSet() = default;
|
||||
|
||||
private:
|
||||
/*! \brief The internal data structure of size */
|
||||
uint64_t size_;
|
||||
uint64_t size_{0};
|
||||
/*! \brief The internal data structure of row set if not all*/
|
||||
std::vector<bst_uint> rows_;
|
||||
};
|
||||
@@ -250,11 +251,11 @@ struct RowSet {
|
||||
class DMatrix {
|
||||
public:
|
||||
/*! \brief default constructor */
|
||||
DMatrix() : cache_learner_ptr_(nullptr) {}
|
||||
DMatrix() = default;
|
||||
/*! \brief meta information of the dataset */
|
||||
virtual MetaInfo& info() = 0;
|
||||
virtual MetaInfo& Info() = 0;
|
||||
/*! \brief meta information of the dataset */
|
||||
virtual const MetaInfo& info() const = 0;
|
||||
virtual const MetaInfo& Info() const = 0;
|
||||
/*!
|
||||
* \brief get the row iterator, reset to beginning position
|
||||
* \note Only either RowIterator or column Iterator can be active.
|
||||
@@ -291,9 +292,9 @@ class DMatrix {
|
||||
/*! \brief get column density */
|
||||
virtual float GetColDensity(size_t cidx) const = 0;
|
||||
/*! \return reference of buffered rowset, in column access */
|
||||
virtual const RowSet& buffered_rowset() const = 0;
|
||||
virtual const RowSet& BufferedRowset() const = 0;
|
||||
/*! \brief virtual destructor */
|
||||
virtual ~DMatrix() {}
|
||||
virtual ~DMatrix() = default;
|
||||
/*!
|
||||
* \brief Save DMatrix to local file.
|
||||
* The saved file only works for non-sharded dataset(single machine training).
|
||||
@@ -343,7 +344,7 @@ class DMatrix {
|
||||
// allow learner class to access this field.
|
||||
friend class LearnerImpl;
|
||||
/*! \brief public field to back ref cached matrix. */
|
||||
LearnerImpl* cache_learner_ptr_;
|
||||
LearnerImpl* cache_learner_ptr_{nullptr};
|
||||
};
|
||||
|
||||
// implementation of inline functions
|
||||
@@ -351,15 +352,15 @@ inline bst_uint RowSet::operator[](size_t i) const {
|
||||
return rows_.size() == 0 ? static_cast<bst_uint>(i) : rows_[i];
|
||||
}
|
||||
|
||||
inline size_t RowSet::size() const {
|
||||
inline size_t RowSet::Size() const {
|
||||
return size_;
|
||||
}
|
||||
|
||||
inline void RowSet::clear() {
|
||||
inline void RowSet::Clear() {
|
||||
rows_.clear(); size_ = 0;
|
||||
}
|
||||
|
||||
inline void RowSet::push_back(bst_uint i) {
|
||||
inline void RowSet::PushBack(bst_uint i) {
|
||||
if (rows_.size() == 0) {
|
||||
if (i == size_) {
|
||||
++size_; return;
|
||||
|
||||
@@ -45,7 +45,7 @@ class FeatureMap {
|
||||
*/
|
||||
inline void PushBack(int fid, const char *fname, const char *ftype) {
|
||||
CHECK_EQ(fid, static_cast<int>(names_.size()));
|
||||
names_.push_back(std::string(fname));
|
||||
names_.emplace_back(fname);
|
||||
types_.push_back(GetType(ftype));
|
||||
}
|
||||
/*! \brief clear the feature map */
|
||||
@@ -54,11 +54,11 @@ class FeatureMap {
|
||||
types_.clear();
|
||||
}
|
||||
/*! \return number of known features */
|
||||
inline size_t size() const {
|
||||
inline size_t Size() const {
|
||||
return names_.size();
|
||||
}
|
||||
/*! \return name of specific feature */
|
||||
inline const char* name(size_t idx) const {
|
||||
inline const char* Name(size_t idx) const {
|
||||
CHECK_LT(idx, names_.size()) << "FeatureMap feature index exceed bound";
|
||||
return names_[idx].c_str();
|
||||
}
|
||||
@@ -75,7 +75,7 @@ class FeatureMap {
|
||||
* \return The translated type.
|
||||
*/
|
||||
inline static Type GetType(const char* tname) {
|
||||
using namespace std;
|
||||
using std::strcmp;
|
||||
if (!strcmp("i", tname)) return kIndicator;
|
||||
if (!strcmp("q", tname)) return kQuantitive;
|
||||
if (!strcmp("int", tname)) return kInteger;
|
||||
|
||||
@@ -27,7 +27,7 @@ namespace xgboost {
|
||||
class GradientBooster {
|
||||
public:
|
||||
/*! \brief virtual destructor */
|
||||
virtual ~GradientBooster() {}
|
||||
virtual ~GradientBooster() = default;
|
||||
/*!
|
||||
* \brief set configuration from pair iterators.
|
||||
* \param begin The beginning iterator.
|
||||
@@ -69,7 +69,7 @@ class GradientBooster {
|
||||
* the booster may change content of gpair
|
||||
*/
|
||||
virtual void DoBoost(DMatrix* p_fmat,
|
||||
HostDeviceVector<bst_gpair>* in_gpair,
|
||||
HostDeviceVector<GradientPair>* in_gpair,
|
||||
ObjFunction* obj = nullptr) = 0;
|
||||
|
||||
/*!
|
||||
|
||||
@@ -37,7 +37,7 @@ namespace xgboost {
|
||||
class Learner : public rabit::Serializable {
|
||||
public:
|
||||
/*! \brief virtual destructor */
|
||||
virtual ~Learner() {}
|
||||
~Learner() override = default;
|
||||
/*!
|
||||
* \brief set configuration from pair iterators.
|
||||
* \param begin The beginning iterator.
|
||||
@@ -62,12 +62,12 @@ class Learner : public rabit::Serializable {
|
||||
* \brief load model from stream
|
||||
* \param fi input stream.
|
||||
*/
|
||||
virtual void Load(dmlc::Stream* fi) = 0;
|
||||
void Load(dmlc::Stream* fi) override = 0;
|
||||
/*!
|
||||
* \brief save model to stream.
|
||||
* \param fo output stream
|
||||
*/
|
||||
virtual void Save(dmlc::Stream* fo) const = 0;
|
||||
void Save(dmlc::Stream* fo) const override = 0;
|
||||
/*!
|
||||
* \brief update the model for one iteration
|
||||
* With the specified objective function.
|
||||
@@ -84,7 +84,7 @@ class Learner : public rabit::Serializable {
|
||||
*/
|
||||
virtual void BoostOneIter(int iter,
|
||||
DMatrix* train,
|
||||
HostDeviceVector<bst_gpair>* in_gpair) = 0;
|
||||
HostDeviceVector<GradientPair>* in_gpair) = 0;
|
||||
/*!
|
||||
* \brief evaluate the model for specific iteration using the configured metrics.
|
||||
* \param iter iteration number
|
||||
@@ -194,7 +194,7 @@ inline void Learner::Predict(const SparseBatch::Inst& inst,
|
||||
bool output_margin,
|
||||
HostDeviceVector<bst_float>* out_preds,
|
||||
unsigned ntree_limit) const {
|
||||
gbm_->PredictInstance(inst, &out_preds->data_h(), ntree_limit);
|
||||
gbm_->PredictInstance(inst, &out_preds->HostVector(), ntree_limit);
|
||||
if (!output_margin) {
|
||||
obj_->PredTransform(out_preds);
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ namespace xgboost {
|
||||
class LinearUpdater {
|
||||
public:
|
||||
/*! \brief virtual destructor */
|
||||
virtual ~LinearUpdater() {}
|
||||
virtual ~LinearUpdater() = default;
|
||||
/*!
|
||||
* \brief Initialize the updater with given arguments.
|
||||
* \param args arguments to the objective function.
|
||||
@@ -36,7 +36,7 @@ class LinearUpdater {
|
||||
* \param sum_instance_weight The sum instance weights, used to normalise l1/l2 penalty.
|
||||
*/
|
||||
|
||||
virtual void Update(std::vector<bst_gpair>* in_gpair, DMatrix* data,
|
||||
virtual void Update(std::vector<GradientPair>* in_gpair, DMatrix* data,
|
||||
gbm::GBLinearModel* model,
|
||||
double sum_instance_weight) = 0;
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ class BaseLogger {
|
||||
log_stream_ << "[" << dmlc::DateLogger().HumanDate() << "] ";
|
||||
#endif
|
||||
}
|
||||
std::ostream& stream() { return log_stream_; }
|
||||
std::ostream& stream() { return log_stream_; } // NOLINT
|
||||
|
||||
protected:
|
||||
std::ostringstream log_stream_;
|
||||
|
||||
@@ -35,7 +35,7 @@ class Metric {
|
||||
/*! \return name of metric */
|
||||
virtual const char* Name() const = 0;
|
||||
/*! \brief virtual destructor */
|
||||
virtual ~Metric() {}
|
||||
virtual ~Metric() = default;
|
||||
/*!
|
||||
* \brief create a metric according to name.
|
||||
* \param name name of the metric.
|
||||
|
||||
@@ -23,7 +23,7 @@ namespace xgboost {
|
||||
class ObjFunction {
|
||||
public:
|
||||
/*! \brief virtual destructor */
|
||||
virtual ~ObjFunction() {}
|
||||
virtual ~ObjFunction() = default;
|
||||
/*!
|
||||
* \brief set configuration from pair iterators.
|
||||
* \param begin The beginning iterator.
|
||||
@@ -47,7 +47,7 @@ class ObjFunction {
|
||||
virtual void GetGradient(HostDeviceVector<bst_float>* preds,
|
||||
const MetaInfo& info,
|
||||
int iteration,
|
||||
HostDeviceVector<bst_gpair>* out_gpair) = 0;
|
||||
HostDeviceVector<GradientPair>* out_gpair) = 0;
|
||||
|
||||
/*! \return the default evaluation metric for the objective */
|
||||
virtual const char* DefaultEvalMetric() const = 0;
|
||||
|
||||
@@ -36,7 +36,7 @@ namespace xgboost {
|
||||
|
||||
class Predictor {
|
||||
public:
|
||||
virtual ~Predictor() {}
|
||||
virtual ~Predictor() = default;
|
||||
|
||||
/**
|
||||
* \fn virtual void Predictor::Init(const std::vector<std::pair<std::string,
|
||||
|
||||
@@ -71,70 +71,70 @@ template<typename TSplitCond, typename TNodeStat>
|
||||
class TreeModel {
|
||||
public:
|
||||
/*! \brief data type to indicate split condition */
|
||||
typedef TNodeStat NodeStat;
|
||||
using NodeStat = TNodeStat;
|
||||
/*! \brief auxiliary statistics of node to help tree building */
|
||||
typedef TSplitCond SplitCond;
|
||||
using SplitCond = TSplitCond;
|
||||
/*! \brief tree node */
|
||||
class Node {
|
||||
public:
|
||||
Node() : sindex_(0) {
|
||||
Node() {
|
||||
// assert compact alignment
|
||||
static_assert(sizeof(Node) == 4 * sizeof(int) + sizeof(Info),
|
||||
"Node: 64 bit align");
|
||||
}
|
||||
/*! \brief index of left child */
|
||||
inline int cleft() const {
|
||||
inline int LeftChild() const {
|
||||
return this->cleft_;
|
||||
}
|
||||
/*! \brief index of right child */
|
||||
inline int cright() const {
|
||||
inline int RightChild() const {
|
||||
return this->cright_;
|
||||
}
|
||||
/*! \brief index of default child when feature is missing */
|
||||
inline int cdefault() const {
|
||||
return this->default_left() ? this->cleft() : this->cright();
|
||||
inline int DefaultChild() const {
|
||||
return this->DefaultLeft() ? this->LeftChild() : this->RightChild();
|
||||
}
|
||||
/*! \brief feature index of split condition */
|
||||
inline unsigned split_index() const {
|
||||
inline unsigned SplitIndex() const {
|
||||
return sindex_ & ((1U << 31) - 1U);
|
||||
}
|
||||
/*! \brief when feature is unknown, whether goes to left child */
|
||||
inline bool default_left() const {
|
||||
inline bool DefaultLeft() const {
|
||||
return (sindex_ >> 31) != 0;
|
||||
}
|
||||
/*! \brief whether current node is leaf node */
|
||||
inline bool is_leaf() const {
|
||||
inline bool IsLeaf() const {
|
||||
return cleft_ == -1;
|
||||
}
|
||||
/*! \return get leaf value of leaf node */
|
||||
inline bst_float leaf_value() const {
|
||||
inline bst_float LeafValue() const {
|
||||
return (this->info_).leaf_value;
|
||||
}
|
||||
/*! \return get split condition of the node */
|
||||
inline TSplitCond split_cond() const {
|
||||
inline TSplitCond SplitCond() const {
|
||||
return (this->info_).split_cond;
|
||||
}
|
||||
/*! \brief get parent of the node */
|
||||
inline int parent() const {
|
||||
inline int Parent() const {
|
||||
return parent_ & ((1U << 31) - 1);
|
||||
}
|
||||
/*! \brief whether current node is left child */
|
||||
inline bool is_left_child() const {
|
||||
inline bool IsLeftChild() const {
|
||||
return (parent_ & (1U << 31)) != 0;
|
||||
}
|
||||
/*! \brief whether this node is deleted */
|
||||
inline bool is_deleted() const {
|
||||
inline bool IsDeleted() const {
|
||||
return sindex_ == std::numeric_limits<unsigned>::max();
|
||||
}
|
||||
/*! \brief whether current node is root */
|
||||
inline bool is_root() const {
|
||||
inline bool IsRoot() const {
|
||||
return parent_ == -1;
|
||||
}
|
||||
/*!
|
||||
* \brief set the right child
|
||||
* \param nid node id to right child
|
||||
*/
|
||||
inline void set_right_child(int nid) {
|
||||
inline void SetRightChild(int nid) {
|
||||
this->cright_ = nid;
|
||||
}
|
||||
/*!
|
||||
@@ -143,7 +143,7 @@ class TreeModel {
|
||||
* \param split_cond split condition
|
||||
* \param default_left the default direction when feature is unknown
|
||||
*/
|
||||
inline void set_split(unsigned split_index, TSplitCond split_cond,
|
||||
inline void SetSplit(unsigned split_index, TSplitCond split_cond,
|
||||
bool default_left = false) {
|
||||
if (default_left) split_index |= (1U << 31);
|
||||
this->sindex_ = split_index;
|
||||
@@ -155,13 +155,13 @@ class TreeModel {
|
||||
* \param right right index, could be used to store
|
||||
* additional information
|
||||
*/
|
||||
inline void set_leaf(bst_float value, int right = -1) {
|
||||
inline void SetLeaf(bst_float value, int right = -1) {
|
||||
(this->info_).leaf_value = value;
|
||||
this->cleft_ = -1;
|
||||
this->cright_ = right;
|
||||
}
|
||||
/*! \brief mark that this node is deleted */
|
||||
inline void mark_delete() {
|
||||
inline void MarkDelete() {
|
||||
this->sindex_ = std::numeric_limits<unsigned>::max();
|
||||
}
|
||||
|
||||
@@ -181,11 +181,11 @@ class TreeModel {
|
||||
// pointer to left, right
|
||||
int cleft_, cright_;
|
||||
// split feature index, left split or right split depends on the highest bit
|
||||
unsigned sindex_;
|
||||
unsigned sindex_{0};
|
||||
// extra info
|
||||
Info info_;
|
||||
// set parent
|
||||
inline void set_parent(int pidx, bool is_left_child = true) {
|
||||
inline void SetParent(int pidx, bool is_left_child = true) {
|
||||
if (is_left_child) pidx |= (1U << 31);
|
||||
this->parent_ = pidx;
|
||||
}
|
||||
@@ -193,35 +193,35 @@ class TreeModel {
|
||||
|
||||
protected:
|
||||
// vector of nodes
|
||||
std::vector<Node> nodes;
|
||||
std::vector<Node> nodes_;
|
||||
// free node space, used during training process
|
||||
std::vector<int> deleted_nodes;
|
||||
std::vector<int> deleted_nodes_;
|
||||
// stats of nodes
|
||||
std::vector<TNodeStat> stats;
|
||||
std::vector<TNodeStat> stats_;
|
||||
// leaf vector, that is used to store additional information
|
||||
std::vector<bst_float> leaf_vector;
|
||||
std::vector<bst_float> leaf_vector_;
|
||||
// allocate a new node,
|
||||
// !!!!!! NOTE: may cause BUG here, nodes.resize
|
||||
inline int AllocNode() {
|
||||
if (param.num_deleted != 0) {
|
||||
int nd = deleted_nodes.back();
|
||||
deleted_nodes.pop_back();
|
||||
int nd = deleted_nodes_.back();
|
||||
deleted_nodes_.pop_back();
|
||||
--param.num_deleted;
|
||||
return nd;
|
||||
}
|
||||
int nd = param.num_nodes++;
|
||||
CHECK_LT(param.num_nodes, std::numeric_limits<int>::max())
|
||||
<< "number of nodes in the tree exceed 2^31";
|
||||
nodes.resize(param.num_nodes);
|
||||
stats.resize(param.num_nodes);
|
||||
leaf_vector.resize(param.num_nodes * param.size_leaf_vector);
|
||||
nodes_.resize(param.num_nodes);
|
||||
stats_.resize(param.num_nodes);
|
||||
leaf_vector_.resize(param.num_nodes * param.size_leaf_vector);
|
||||
return nd;
|
||||
}
|
||||
// delete a tree node, keep the parent field to allow trace back
|
||||
inline void DeleteNode(int nid) {
|
||||
CHECK_GE(nid, param.num_roots);
|
||||
deleted_nodes.push_back(nid);
|
||||
nodes[nid].mark_delete();
|
||||
deleted_nodes_.push_back(nid);
|
||||
nodes_[nid].MarkDelete();
|
||||
++param.num_deleted;
|
||||
}
|
||||
|
||||
@@ -232,11 +232,11 @@ class TreeModel {
|
||||
* \param value new leaf value
|
||||
*/
|
||||
inline void ChangeToLeaf(int rid, bst_float value) {
|
||||
CHECK(nodes[nodes[rid].cleft() ].is_leaf());
|
||||
CHECK(nodes[nodes[rid].cright()].is_leaf());
|
||||
this->DeleteNode(nodes[rid].cleft());
|
||||
this->DeleteNode(nodes[rid].cright());
|
||||
nodes[rid].set_leaf(value);
|
||||
CHECK(nodes_[nodes_[rid].LeftChild() ].IsLeaf());
|
||||
CHECK(nodes_[nodes_[rid].RightChild()].IsLeaf());
|
||||
this->DeleteNode(nodes_[rid].LeftChild());
|
||||
this->DeleteNode(nodes_[rid].RightChild());
|
||||
nodes_[rid].SetLeaf(value);
|
||||
}
|
||||
/*!
|
||||
* \brief collapse a non leaf node to a leaf node, delete its children
|
||||
@@ -244,12 +244,12 @@ class TreeModel {
|
||||
* \param value new leaf value
|
||||
*/
|
||||
inline void CollapseToLeaf(int rid, bst_float value) {
|
||||
if (nodes[rid].is_leaf()) return;
|
||||
if (!nodes[nodes[rid].cleft() ].is_leaf()) {
|
||||
CollapseToLeaf(nodes[rid].cleft(), 0.0f);
|
||||
if (nodes_[rid].IsLeaf()) return;
|
||||
if (!nodes_[nodes_[rid].LeftChild() ].IsLeaf()) {
|
||||
CollapseToLeaf(nodes_[rid].LeftChild(), 0.0f);
|
||||
}
|
||||
if (!nodes[nodes[rid].cright() ].is_leaf()) {
|
||||
CollapseToLeaf(nodes[rid].cright(), 0.0f);
|
||||
if (!nodes_[nodes_[rid].RightChild() ].IsLeaf()) {
|
||||
CollapseToLeaf(nodes_[rid].RightChild(), 0.0f);
|
||||
}
|
||||
this->ChangeToLeaf(rid, value);
|
||||
}
|
||||
@@ -262,47 +262,47 @@ class TreeModel {
|
||||
param.num_nodes = 1;
|
||||
param.num_roots = 1;
|
||||
param.num_deleted = 0;
|
||||
nodes.resize(1);
|
||||
nodes_.resize(1);
|
||||
}
|
||||
/*! \brief get node given nid */
|
||||
inline Node& operator[](int nid) {
|
||||
return nodes[nid];
|
||||
return nodes_[nid];
|
||||
}
|
||||
/*! \brief get node given nid */
|
||||
inline const Node& operator[](int nid) const {
|
||||
return nodes[nid];
|
||||
return nodes_[nid];
|
||||
}
|
||||
|
||||
/*! \brief get const reference to nodes */
|
||||
inline const std::vector<Node>& GetNodes() const { return nodes; }
|
||||
inline const std::vector<Node>& GetNodes() const { return nodes_; }
|
||||
|
||||
/*! \brief get node statistics given nid */
|
||||
inline NodeStat& stat(int nid) {
|
||||
return stats[nid];
|
||||
inline NodeStat& Stat(int nid) {
|
||||
return stats_[nid];
|
||||
}
|
||||
/*! \brief get node statistics given nid */
|
||||
inline const NodeStat& stat(int nid) const {
|
||||
return stats[nid];
|
||||
inline const NodeStat& Stat(int nid) const {
|
||||
return stats_[nid];
|
||||
}
|
||||
/*! \brief get leaf vector given nid */
|
||||
inline bst_float* leafvec(int nid) {
|
||||
if (leaf_vector.size() == 0) return nullptr;
|
||||
return& leaf_vector[nid * param.size_leaf_vector];
|
||||
inline bst_float* Leafvec(int nid) {
|
||||
if (leaf_vector_.size() == 0) return nullptr;
|
||||
return& leaf_vector_[nid * param.size_leaf_vector];
|
||||
}
|
||||
/*! \brief get leaf vector given nid */
|
||||
inline const bst_float* leafvec(int nid) const {
|
||||
if (leaf_vector.size() == 0) return nullptr;
|
||||
return& leaf_vector[nid * param.size_leaf_vector];
|
||||
inline const bst_float* Leafvec(int nid) const {
|
||||
if (leaf_vector_.size() == 0) return nullptr;
|
||||
return& leaf_vector_[nid * param.size_leaf_vector];
|
||||
}
|
||||
/*! \brief initialize the model */
|
||||
inline void InitModel() {
|
||||
param.num_nodes = param.num_roots;
|
||||
nodes.resize(param.num_nodes);
|
||||
stats.resize(param.num_nodes);
|
||||
leaf_vector.resize(param.num_nodes * param.size_leaf_vector, 0.0f);
|
||||
nodes_.resize(param.num_nodes);
|
||||
stats_.resize(param.num_nodes);
|
||||
leaf_vector_.resize(param.num_nodes * param.size_leaf_vector, 0.0f);
|
||||
for (int i = 0; i < param.num_nodes; i ++) {
|
||||
nodes[i].set_leaf(0.0f);
|
||||
nodes[i].set_parent(-1);
|
||||
nodes_[i].SetLeaf(0.0f);
|
||||
nodes_[i].SetParent(-1);
|
||||
}
|
||||
}
|
||||
/*!
|
||||
@@ -311,35 +311,35 @@ class TreeModel {
|
||||
*/
|
||||
inline void Load(dmlc::Stream* fi) {
|
||||
CHECK_EQ(fi->Read(¶m, sizeof(TreeParam)), sizeof(TreeParam));
|
||||
nodes.resize(param.num_nodes);
|
||||
stats.resize(param.num_nodes);
|
||||
nodes_.resize(param.num_nodes);
|
||||
stats_.resize(param.num_nodes);
|
||||
CHECK_NE(param.num_nodes, 0);
|
||||
CHECK_EQ(fi->Read(dmlc::BeginPtr(nodes), sizeof(Node) * nodes.size()),
|
||||
sizeof(Node) * nodes.size());
|
||||
CHECK_EQ(fi->Read(dmlc::BeginPtr(stats), sizeof(NodeStat) * stats.size()),
|
||||
sizeof(NodeStat) * stats.size());
|
||||
CHECK_EQ(fi->Read(dmlc::BeginPtr(nodes_), sizeof(Node) * nodes_.size()),
|
||||
sizeof(Node) * nodes_.size());
|
||||
CHECK_EQ(fi->Read(dmlc::BeginPtr(stats_), sizeof(NodeStat) * stats_.size()),
|
||||
sizeof(NodeStat) * stats_.size());
|
||||
if (param.size_leaf_vector != 0) {
|
||||
CHECK(fi->Read(&leaf_vector));
|
||||
CHECK(fi->Read(&leaf_vector_));
|
||||
}
|
||||
// chg deleted nodes
|
||||
deleted_nodes.resize(0);
|
||||
deleted_nodes_.resize(0);
|
||||
for (int i = param.num_roots; i < param.num_nodes; ++i) {
|
||||
if (nodes[i].is_deleted()) deleted_nodes.push_back(i);
|
||||
if (nodes_[i].IsDeleted()) deleted_nodes_.push_back(i);
|
||||
}
|
||||
CHECK_EQ(static_cast<int>(deleted_nodes.size()), param.num_deleted);
|
||||
CHECK_EQ(static_cast<int>(deleted_nodes_.size()), param.num_deleted);
|
||||
}
|
||||
/*!
|
||||
* \brief save model to stream
|
||||
* \param fo output stream
|
||||
*/
|
||||
inline void Save(dmlc::Stream* fo) const {
|
||||
CHECK_EQ(param.num_nodes, static_cast<int>(nodes.size()));
|
||||
CHECK_EQ(param.num_nodes, static_cast<int>(stats.size()));
|
||||
CHECK_EQ(param.num_nodes, static_cast<int>(nodes_.size()));
|
||||
CHECK_EQ(param.num_nodes, static_cast<int>(stats_.size()));
|
||||
fo->Write(¶m, sizeof(TreeParam));
|
||||
CHECK_NE(param.num_nodes, 0);
|
||||
fo->Write(dmlc::BeginPtr(nodes), sizeof(Node) * nodes.size());
|
||||
fo->Write(dmlc::BeginPtr(stats), sizeof(NodeStat) * nodes.size());
|
||||
if (param.size_leaf_vector != 0) fo->Write(leaf_vector);
|
||||
fo->Write(dmlc::BeginPtr(nodes_), sizeof(Node) * nodes_.size());
|
||||
fo->Write(dmlc::BeginPtr(stats_), sizeof(NodeStat) * nodes_.size());
|
||||
if (param.size_leaf_vector != 0) fo->Write(leaf_vector_);
|
||||
}
|
||||
/*!
|
||||
* \brief add child nodes to node
|
||||
@@ -348,10 +348,10 @@ class TreeModel {
|
||||
inline void AddChilds(int nid) {
|
||||
int pleft = this->AllocNode();
|
||||
int pright = this->AllocNode();
|
||||
nodes[nid].cleft_ = pleft;
|
||||
nodes[nid].cright_ = pright;
|
||||
nodes[nodes[nid].cleft() ].set_parent(nid, true);
|
||||
nodes[nodes[nid].cright()].set_parent(nid, false);
|
||||
nodes_[nid].cleft_ = pleft;
|
||||
nodes_[nid].cright_ = pright;
|
||||
nodes_[nodes_[nid].LeftChild() ].SetParent(nid, true);
|
||||
nodes_[nodes_[nid].RightChild()].SetParent(nid, false);
|
||||
}
|
||||
/*!
|
||||
* \brief only add a right child to a leaf node
|
||||
@@ -359,8 +359,8 @@ class TreeModel {
|
||||
*/
|
||||
inline void AddRightChild(int nid) {
|
||||
int pright = this->AllocNode();
|
||||
nodes[nid].right = pright;
|
||||
nodes[nodes[nid].right].set_parent(nid, false);
|
||||
nodes_[nid].right = pright;
|
||||
nodes_[nodes_[nid].right].SetParent(nid, false);
|
||||
}
|
||||
/*!
|
||||
* \brief get current depth
|
||||
@@ -369,9 +369,9 @@ class TreeModel {
|
||||
*/
|
||||
inline int GetDepth(int nid, bool pass_rchild = false) const {
|
||||
int depth = 0;
|
||||
while (!nodes[nid].is_root()) {
|
||||
if (!pass_rchild || nodes[nid].is_left_child()) ++depth;
|
||||
nid = nodes[nid].parent();
|
||||
while (!nodes_[nid].IsRoot()) {
|
||||
if (!pass_rchild || nodes_[nid].IsLeftChild()) ++depth;
|
||||
nid = nodes_[nid].Parent();
|
||||
}
|
||||
return depth;
|
||||
}
|
||||
@@ -380,9 +380,9 @@ class TreeModel {
|
||||
* \param nid node id
|
||||
*/
|
||||
inline int MaxDepth(int nid) const {
|
||||
if (nodes[nid].is_leaf()) return 0;
|
||||
return std::max(MaxDepth(nodes[nid].cleft())+1,
|
||||
MaxDepth(nodes[nid].cright())+1);
|
||||
if (nodes_[nid].IsLeaf()) return 0;
|
||||
return std::max(MaxDepth(nodes_[nid].LeftChild())+1,
|
||||
MaxDepth(nodes_[nid].RightChild())+1);
|
||||
}
|
||||
/*!
|
||||
* \brief get maximum depth
|
||||
@@ -395,7 +395,7 @@ class TreeModel {
|
||||
return maxd;
|
||||
}
|
||||
/*! \brief number of extra nodes besides the root */
|
||||
inline int num_extra_nodes() const {
|
||||
inline int NumExtraNodes() const {
|
||||
return param.num_nodes - param.num_roots - param.num_deleted;
|
||||
}
|
||||
};
|
||||
@@ -421,7 +421,7 @@ struct PathElement {
|
||||
bst_float zero_fraction;
|
||||
bst_float one_fraction;
|
||||
bst_float pweight;
|
||||
PathElement() {}
|
||||
PathElement() = default;
|
||||
PathElement(int i, bst_float z, bst_float o, bst_float w) :
|
||||
feature_index(i), zero_fraction(z), one_fraction(o), pweight(w) {}
|
||||
};
|
||||
@@ -457,19 +457,19 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat> {
|
||||
* \brief returns the size of the feature vector
|
||||
* \return the size of the feature vector
|
||||
*/
|
||||
inline size_t size() const;
|
||||
inline size_t Size() const;
|
||||
/*!
|
||||
* \brief get ith value
|
||||
* \param i feature index.
|
||||
* \return the i-th feature value
|
||||
*/
|
||||
inline bst_float fvalue(size_t i) const;
|
||||
inline bst_float Fvalue(size_t i) const;
|
||||
/*!
|
||||
* \brief check whether i-th entry is missing
|
||||
* \param i feature index.
|
||||
* \return whether i-th value is missing.
|
||||
*/
|
||||
inline bool is_missing(size_t i) const;
|
||||
inline bool IsMissing(size_t i) const;
|
||||
|
||||
private:
|
||||
/*!
|
||||
@@ -480,7 +480,7 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat> {
|
||||
bst_float fvalue;
|
||||
int flag;
|
||||
};
|
||||
std::vector<Entry> data;
|
||||
std::vector<Entry> data_;
|
||||
};
|
||||
/*!
|
||||
* \brief get the leaf index
|
||||
@@ -562,63 +562,63 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat> {
|
||||
private:
|
||||
inline bst_float FillNodeMeanValue(int nid);
|
||||
|
||||
std::vector<bst_float> node_mean_values;
|
||||
std::vector<bst_float> node_mean_values_;
|
||||
};
|
||||
|
||||
// implementations of inline functions
|
||||
// do not need to read if only use the model
|
||||
inline void RegTree::FVec::Init(size_t size) {
|
||||
Entry e; e.flag = -1;
|
||||
data.resize(size);
|
||||
std::fill(data.begin(), data.end(), e);
|
||||
data_.resize(size);
|
||||
std::fill(data_.begin(), data_.end(), e);
|
||||
}
|
||||
|
||||
inline void RegTree::FVec::Fill(const RowBatch::Inst& inst) {
|
||||
for (bst_uint i = 0; i < inst.length; ++i) {
|
||||
if (inst[i].index >= data.size()) continue;
|
||||
data[inst[i].index].fvalue = inst[i].fvalue;
|
||||
if (inst[i].index >= data_.size()) continue;
|
||||
data_[inst[i].index].fvalue = inst[i].fvalue;
|
||||
}
|
||||
}
|
||||
|
||||
inline void RegTree::FVec::Drop(const RowBatch::Inst& inst) {
|
||||
for (bst_uint i = 0; i < inst.length; ++i) {
|
||||
if (inst[i].index >= data.size()) continue;
|
||||
data[inst[i].index].flag = -1;
|
||||
if (inst[i].index >= data_.size()) continue;
|
||||
data_[inst[i].index].flag = -1;
|
||||
}
|
||||
}
|
||||
|
||||
inline size_t RegTree::FVec::size() const {
|
||||
return data.size();
|
||||
inline size_t RegTree::FVec::Size() const {
|
||||
return data_.size();
|
||||
}
|
||||
|
||||
inline bst_float RegTree::FVec::fvalue(size_t i) const {
|
||||
return data[i].fvalue;
|
||||
inline bst_float RegTree::FVec::Fvalue(size_t i) const {
|
||||
return data_[i].fvalue;
|
||||
}
|
||||
|
||||
inline bool RegTree::FVec::is_missing(size_t i) const {
|
||||
return data[i].flag == -1;
|
||||
inline bool RegTree::FVec::IsMissing(size_t i) const {
|
||||
return data_[i].flag == -1;
|
||||
}
|
||||
|
||||
inline int RegTree::GetLeafIndex(const RegTree::FVec& feat, unsigned root_id) const {
|
||||
int pid = static_cast<int>(root_id);
|
||||
while (!(*this)[pid].is_leaf()) {
|
||||
unsigned split_index = (*this)[pid].split_index();
|
||||
pid = this->GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index));
|
||||
auto pid = static_cast<int>(root_id);
|
||||
while (!(*this)[pid].IsLeaf()) {
|
||||
unsigned split_index = (*this)[pid].SplitIndex();
|
||||
pid = this->GetNext(pid, feat.Fvalue(split_index), feat.IsMissing(split_index));
|
||||
}
|
||||
return pid;
|
||||
}
|
||||
|
||||
inline bst_float RegTree::Predict(const RegTree::FVec& feat, unsigned root_id) const {
|
||||
int pid = this->GetLeafIndex(feat, root_id);
|
||||
return (*this)[pid].leaf_value();
|
||||
return (*this)[pid].LeafValue();
|
||||
}
|
||||
|
||||
inline void RegTree::FillNodeMeanValues() {
|
||||
size_t num_nodes = this->param.num_nodes;
|
||||
if (this->node_mean_values.size() == num_nodes) {
|
||||
if (this->node_mean_values_.size() == num_nodes) {
|
||||
return;
|
||||
}
|
||||
this->node_mean_values.resize(num_nodes);
|
||||
this->node_mean_values_.resize(num_nodes);
|
||||
for (int root_id = 0; root_id < param.num_roots; ++root_id) {
|
||||
this->FillNodeMeanValue(root_id);
|
||||
}
|
||||
@@ -627,40 +627,39 @@ inline void RegTree::FillNodeMeanValues() {
|
||||
inline bst_float RegTree::FillNodeMeanValue(int nid) {
|
||||
bst_float result;
|
||||
auto& node = (*this)[nid];
|
||||
if (node.is_leaf()) {
|
||||
result = node.leaf_value();
|
||||
if (node.IsLeaf()) {
|
||||
result = node.LeafValue();
|
||||
} else {
|
||||
result = this->FillNodeMeanValue(node.cleft()) * this->stat(node.cleft()).sum_hess;
|
||||
result += this->FillNodeMeanValue(node.cright()) * this->stat(node.cright()).sum_hess;
|
||||
result /= this->stat(nid).sum_hess;
|
||||
result = this->FillNodeMeanValue(node.LeftChild()) * this->Stat(node.LeftChild()).sum_hess;
|
||||
result += this->FillNodeMeanValue(node.RightChild()) * this->Stat(node.RightChild()).sum_hess;
|
||||
result /= this->Stat(nid).sum_hess;
|
||||
}
|
||||
this->node_mean_values[nid] = result;
|
||||
this->node_mean_values_[nid] = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
inline void RegTree::CalculateContributionsApprox(const RegTree::FVec& feat, unsigned root_id,
|
||||
bst_float *out_contribs) const {
|
||||
CHECK_GT(this->node_mean_values.size(), 0U);
|
||||
CHECK_GT(this->node_mean_values_.size(), 0U);
|
||||
// this follows the idea of http://blog.datadive.net/interpreting-random-forests/
|
||||
bst_float node_value;
|
||||
unsigned split_index;
|
||||
int pid = static_cast<int>(root_id);
|
||||
unsigned split_index = 0;
|
||||
auto pid = static_cast<int>(root_id);
|
||||
// update bias value
|
||||
node_value = this->node_mean_values[pid];
|
||||
out_contribs[feat.size()] += node_value;
|
||||
if ((*this)[pid].is_leaf()) {
|
||||
bst_float node_value = this->node_mean_values_[pid];
|
||||
out_contribs[feat.Size()] += node_value;
|
||||
if ((*this)[pid].IsLeaf()) {
|
||||
// nothing to do anymore
|
||||
return;
|
||||
}
|
||||
while (!(*this)[pid].is_leaf()) {
|
||||
split_index = (*this)[pid].split_index();
|
||||
pid = this->GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index));
|
||||
bst_float new_value = this->node_mean_values[pid];
|
||||
while (!(*this)[pid].IsLeaf()) {
|
||||
split_index = (*this)[pid].SplitIndex();
|
||||
pid = this->GetNext(pid, feat.Fvalue(split_index), feat.IsMissing(split_index));
|
||||
bst_float new_value = this->node_mean_values_[pid];
|
||||
// update feature weight
|
||||
out_contribs[split_index] += new_value - node_value;
|
||||
node_value = new_value;
|
||||
}
|
||||
bst_float leaf_value = (*this)[pid].leaf_value();
|
||||
bst_float leaf_value = (*this)[pid].LeafValue();
|
||||
// update leaf feature weight
|
||||
out_contribs[split_index] += leaf_value - node_value;
|
||||
}
|
||||
@@ -749,33 +748,33 @@ inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
|
||||
ExtendPath(unique_path, unique_depth, parent_zero_fraction,
|
||||
parent_one_fraction, parent_feature_index);
|
||||
}
|
||||
const unsigned split_index = node.split_index();
|
||||
const unsigned split_index = node.SplitIndex();
|
||||
|
||||
// leaf node
|
||||
if (node.is_leaf()) {
|
||||
if (node.IsLeaf()) {
|
||||
for (unsigned i = 1; i <= unique_depth; ++i) {
|
||||
const bst_float w = UnwoundPathSum(unique_path, unique_depth, i);
|
||||
const PathElement &el = unique_path[i];
|
||||
phi[el.feature_index] += w * (el.one_fraction - el.zero_fraction)
|
||||
* node.leaf_value() * condition_fraction;
|
||||
* node.LeafValue() * condition_fraction;
|
||||
}
|
||||
|
||||
// internal node
|
||||
} else {
|
||||
// find which branch is "hot" (meaning x would follow it)
|
||||
unsigned hot_index = 0;
|
||||
if (feat.is_missing(split_index)) {
|
||||
hot_index = node.cdefault();
|
||||
} else if (feat.fvalue(split_index) < node.split_cond()) {
|
||||
hot_index = node.cleft();
|
||||
if (feat.IsMissing(split_index)) {
|
||||
hot_index = node.DefaultChild();
|
||||
} else if (feat.Fvalue(split_index) < node.SplitCond()) {
|
||||
hot_index = node.LeftChild();
|
||||
} else {
|
||||
hot_index = node.cright();
|
||||
hot_index = node.RightChild();
|
||||
}
|
||||
const unsigned cold_index = (static_cast<int>(hot_index) == node.cleft() ?
|
||||
node.cright() : node.cleft());
|
||||
const bst_float w = this->stat(node_index).sum_hess;
|
||||
const bst_float hot_zero_fraction = this->stat(hot_index).sum_hess / w;
|
||||
const bst_float cold_zero_fraction = this->stat(cold_index).sum_hess / w;
|
||||
const unsigned cold_index = (static_cast<int>(hot_index) == node.LeftChild() ?
|
||||
node.RightChild() : node.LeftChild());
|
||||
const bst_float w = this->Stat(node_index).sum_hess;
|
||||
const bst_float hot_zero_fraction = this->Stat(hot_index).sum_hess / w;
|
||||
const bst_float cold_zero_fraction = this->Stat(cold_index).sum_hess / w;
|
||||
bst_float incoming_zero_fraction = 1;
|
||||
bst_float incoming_one_fraction = 1;
|
||||
|
||||
@@ -820,13 +819,13 @@ inline void RegTree::CalculateContributions(const RegTree::FVec& feat, unsigned
|
||||
unsigned condition_feature) const {
|
||||
// find the expected value of the tree's predictions
|
||||
if (condition == 0) {
|
||||
bst_float node_value = this->node_mean_values[static_cast<int>(root_id)];
|
||||
out_contribs[feat.size()] += node_value;
|
||||
bst_float node_value = this->node_mean_values_[static_cast<int>(root_id)];
|
||||
out_contribs[feat.Size()] += node_value;
|
||||
}
|
||||
|
||||
// Preallocate space for the unique path data
|
||||
const int maxd = this->MaxDepth(root_id) + 2;
|
||||
PathElement *unique_path_data = new PathElement[(maxd * (maxd + 1)) / 2];
|
||||
auto *unique_path_data = new PathElement[(maxd * (maxd + 1)) / 2];
|
||||
|
||||
TreeShap(feat, out_contribs, root_id, 0, unique_path_data,
|
||||
1, 1, -1, condition, condition_feature, 1);
|
||||
@@ -835,14 +834,14 @@ inline void RegTree::CalculateContributions(const RegTree::FVec& feat, unsigned
|
||||
|
||||
/*! \brief get next position of the tree given current pid */
|
||||
inline int RegTree::GetNext(int pid, bst_float fvalue, bool is_unknown) const {
|
||||
bst_float split_value = (*this)[pid].split_cond();
|
||||
bst_float split_value = (*this)[pid].SplitCond();
|
||||
if (is_unknown) {
|
||||
return (*this)[pid].cdefault();
|
||||
return (*this)[pid].DefaultChild();
|
||||
} else {
|
||||
if (fvalue < split_value) {
|
||||
return (*this)[pid].cleft();
|
||||
return (*this)[pid].LeftChild();
|
||||
} else {
|
||||
return (*this)[pid].cright();
|
||||
return (*this)[pid].RightChild();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,7 +25,7 @@ namespace xgboost {
|
||||
class TreeUpdater {
|
||||
public:
|
||||
/*! \brief virtual destructor */
|
||||
virtual ~TreeUpdater() {}
|
||||
virtual ~TreeUpdater() = default;
|
||||
/*!
|
||||
* \brief Initialize the updater with given arguments.
|
||||
* \param args arguments to the objective function.
|
||||
@@ -40,7 +40,7 @@ class TreeUpdater {
|
||||
* but maybe different random seeds, usually one tree is passed in at a time,
|
||||
* there can be multiple trees when we train random forest style model
|
||||
*/
|
||||
virtual void Update(HostDeviceVector<bst_gpair>* gpair,
|
||||
virtual void Update(HostDeviceVector<GradientPair>* gpair,
|
||||
DMatrix* data,
|
||||
const std::vector<RegTree*>& trees) = 0;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user