Use bst_float consistently throughout (#1824)
* Fix various typos * Add override to functions that are overridden gcc gives warnings about functions that are being overridden by not being marked as oveirridden. This fixes it. * Use bst_float consistently Use bst_float for all the variables that involve weight, leaf value, gradient, hessian, gain, loss_chg, predictions, base_margin, feature values. In some cases, when due to additions and so on the value can take a larger value, double is used. This ensures that type conversions are minimal and reduces loss of precision.
This commit is contained in:
@@ -42,7 +42,7 @@
|
||||
/*! \brief namespace of xgboo st*/
|
||||
namespace xgboost {
|
||||
/*!
|
||||
* \brief unsigned interger type used in boost,
|
||||
* \brief unsigned integer type used in boost,
|
||||
* used for feature index and row index.
|
||||
*/
|
||||
typedef uint32_t bst_uint;
|
||||
@@ -62,7 +62,7 @@ struct bst_gpair {
|
||||
};
|
||||
|
||||
/*! \brief small eps gap for minimum split decision. */
|
||||
const float rt_eps = 1e-6f;
|
||||
const bst_float rt_eps = 1e-6f;
|
||||
|
||||
/*! \brief define unsigned long for openmp loop */
|
||||
typedef dmlc::omp_ulong omp_ulong;
|
||||
|
||||
@@ -23,9 +23,10 @@ XGB_EXTERN_C {
|
||||
#define XGB_DLL XGB_EXTERN_C
|
||||
#endif
|
||||
|
||||
// manually define unsign long
|
||||
// manually define unsigned long
|
||||
typedef uint64_t bst_ulong; // NOLINT(*)
|
||||
|
||||
|
||||
/*! \brief handle to DMatrix */
|
||||
typedef void *DMatrixHandle;
|
||||
/*! \brief handle to Booster */
|
||||
@@ -86,11 +87,11 @@ XGB_EXTERN_C typedef int XGBCallbackDataIterNext(
|
||||
* \brief get string message of the last error
|
||||
*
|
||||
* all function in this file will return 0 when success
|
||||
* and -1 when an error occured,
|
||||
* and -1 when an error occurred,
|
||||
* XGBGetLastError can be called to retrieve the error
|
||||
*
|
||||
* this function is threadsafe and can be called by different thread
|
||||
* \return const char* error inforomation
|
||||
* this function is thread safe and can be called by different thread
|
||||
* \return const char* error information
|
||||
*/
|
||||
XGB_DLL const char *XGBGetLastError();
|
||||
|
||||
@@ -124,7 +125,7 @@ XGB_DLL int XGDMatrixCreateFromDataIter(
|
||||
* \param indptr pointer to row headers
|
||||
* \param indices findex
|
||||
* \param data fvalue
|
||||
* \param nindptr number of rows in the matix + 1
|
||||
* \param nindptr number of rows in the matrix + 1
|
||||
* \param nelem number of nonzero elements in the matrix
|
||||
* \param num_col number of columns; when it's set to 0, then guess from data
|
||||
* \param out created dmatrix
|
||||
@@ -143,7 +144,7 @@ XGB_DLL int XGDMatrixCreateFromCSREx(const size_t* indptr,
|
||||
* \param indptr pointer to row headers
|
||||
* \param indices findex
|
||||
* \param data fvalue
|
||||
* \param nindptr number of rows in the matix + 1
|
||||
* \param nindptr number of rows in the matrix + 1
|
||||
* \param nelem number of nonzero elements in the matrix
|
||||
* \param out created dmatrix
|
||||
* \return 0 when success, -1 when failure happens
|
||||
@@ -159,7 +160,7 @@ XGB_DLL int XGDMatrixCreateFromCSR(const bst_ulong *indptr,
|
||||
* \param col_ptr pointer to col headers
|
||||
* \param indices findex
|
||||
* \param data fvalue
|
||||
* \param nindptr number of rows in the matix + 1
|
||||
* \param nindptr number of rows in the matrix + 1
|
||||
* \param nelem number of nonzero elements in the matrix
|
||||
* \param num_row number of rows; when it's set to 0, then guess from data
|
||||
* \param out created dmatrix
|
||||
@@ -178,7 +179,7 @@ XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t* col_ptr,
|
||||
* \param col_ptr pointer to col headers
|
||||
* \param indices findex
|
||||
* \param data fvalue
|
||||
* \param nindptr number of rows in the matix + 1
|
||||
* \param nindptr number of rows in the matrix + 1
|
||||
* \param nelem number of nonzero elements in the matrix
|
||||
* \param out created dmatrix
|
||||
* \return 0 when success, -1 when failure happens
|
||||
@@ -201,7 +202,7 @@ XGB_DLL int XGDMatrixCreateFromCSC(const bst_ulong *col_ptr,
|
||||
XGB_DLL int XGDMatrixCreateFromMat(const float *data,
|
||||
bst_ulong nrow,
|
||||
bst_ulong ncol,
|
||||
float missing,
|
||||
float missing,
|
||||
DMatrixHandle *out);
|
||||
/*!
|
||||
* \brief create a new dmatrix from sliced content of existing matrix
|
||||
|
||||
@@ -65,7 +65,7 @@ struct MetaInfo {
|
||||
* \param i Instance index.
|
||||
* \return The weight.
|
||||
*/
|
||||
inline float GetWeight(size_t i) const {
|
||||
inline bst_float GetWeight(size_t i) const {
|
||||
return weights.size() != 0 ? weights[i] : 1.0f;
|
||||
}
|
||||
/*!
|
||||
@@ -253,7 +253,7 @@ class DMatrix {
|
||||
* \brief check if column access is supported, if not, initialize column access.
|
||||
* \param enabled whether certain feature should be included in column access.
|
||||
* \param subsample subsample ratio when generating column access.
|
||||
* \param max_row_perbatch auxilary information, maximum row used in each column batch.
|
||||
* \param max_row_perbatch auxiliary information, maximum row used in each column batch.
|
||||
* this is a hint information that can be ignored by the implementation.
|
||||
* \return Number of column blocks in the column access.
|
||||
*/
|
||||
@@ -304,7 +304,7 @@ class DMatrix {
|
||||
static DMatrix* Create(std::unique_ptr<DataSource>&& source,
|
||||
const std::string& cache_prefix = "");
|
||||
/*!
|
||||
* \brief Create a DMatrix by loaidng data from parser.
|
||||
* \brief Create a DMatrix by loading data from parser.
|
||||
* Parser can later be deleted after the DMatrix i created.
|
||||
* \param parser The input data parser
|
||||
* \param cache_prefix The path to prefix of temporary cache file of the DMatrix when used in external memory mode.
|
||||
|
||||
@@ -78,7 +78,7 @@ class GradientBooster {
|
||||
* we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
|
||||
*/
|
||||
virtual void Predict(DMatrix* dmat,
|
||||
std::vector<float>* out_preds,
|
||||
std::vector<bst_float>* out_preds,
|
||||
unsigned ntree_limit = 0) = 0;
|
||||
/*!
|
||||
* \brief online prediction function, predict score for one instance at a time
|
||||
@@ -93,7 +93,7 @@ class GradientBooster {
|
||||
* \sa Predict
|
||||
*/
|
||||
virtual void Predict(const SparseBatch::Inst& inst,
|
||||
std::vector<float>* out_preds,
|
||||
std::vector<bst_float>* out_preds,
|
||||
unsigned ntree_limit = 0,
|
||||
unsigned root_index = 0) = 0;
|
||||
/*!
|
||||
@@ -105,7 +105,7 @@ class GradientBooster {
|
||||
* we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
|
||||
*/
|
||||
virtual void PredictLeaf(DMatrix* dmat,
|
||||
std::vector<float>* out_preds,
|
||||
std::vector<bst_float>* out_preds,
|
||||
unsigned ntree_limit = 0) = 0;
|
||||
/*!
|
||||
* \brief dump the model in the requested format
|
||||
@@ -127,7 +127,7 @@ class GradientBooster {
|
||||
static GradientBooster* Create(
|
||||
const std::string& name,
|
||||
const std::vector<std::shared_ptr<DMatrix> >& cache_mats,
|
||||
float base_margin);
|
||||
bst_float base_margin);
|
||||
};
|
||||
|
||||
// implementing configure.
|
||||
@@ -144,7 +144,7 @@ struct GradientBoosterReg
|
||||
: public dmlc::FunctionRegEntryBase<
|
||||
GradientBoosterReg,
|
||||
std::function<GradientBooster* (const std::vector<std::shared_ptr<DMatrix> > &cached_mats,
|
||||
float base_margin)> > {
|
||||
bst_float base_margin)> > {
|
||||
};
|
||||
|
||||
/*!
|
||||
|
||||
@@ -106,7 +106,7 @@ class Learner : public rabit::Serializable {
|
||||
*/
|
||||
virtual void Predict(DMatrix* data,
|
||||
bool output_margin,
|
||||
std::vector<float> *out_preds,
|
||||
std::vector<bst_float> *out_preds,
|
||||
unsigned ntree_limit = 0,
|
||||
bool pred_leaf = false) const = 0;
|
||||
/*!
|
||||
@@ -162,7 +162,7 @@ class Learner : public rabit::Serializable {
|
||||
*/
|
||||
inline void Predict(const SparseBatch::Inst &inst,
|
||||
bool output_margin,
|
||||
std::vector<float> *out_preds,
|
||||
std::vector<bst_float> *out_preds,
|
||||
unsigned ntree_limit = 0) const;
|
||||
/*!
|
||||
* \brief Create a new instance of learner.
|
||||
@@ -185,7 +185,7 @@ class Learner : public rabit::Serializable {
|
||||
// implementation of inline functions.
|
||||
inline void Learner::Predict(const SparseBatch::Inst& inst,
|
||||
bool output_margin,
|
||||
std::vector<float>* out_preds,
|
||||
std::vector<bst_float>* out_preds,
|
||||
unsigned ntree_limit) const {
|
||||
gbm_->Predict(inst, out_preds, ntree_limit);
|
||||
if (out_preds->size() == 1) {
|
||||
|
||||
@@ -29,9 +29,9 @@ class Metric {
|
||||
* the average statistics across all the node,
|
||||
* this is only supported by some metrics
|
||||
*/
|
||||
virtual float Eval(const std::vector<float>& preds,
|
||||
const MetaInfo& info,
|
||||
bool distributed) const = 0;
|
||||
virtual bst_float Eval(const std::vector<bst_float>& preds,
|
||||
const MetaInfo& info,
|
||||
bool distributed) const = 0;
|
||||
/*! \return name of metric */
|
||||
virtual const char* Name() const = 0;
|
||||
/*! \brief virtual destructor */
|
||||
|
||||
@@ -41,7 +41,7 @@ class ObjFunction {
|
||||
* \param iteration current iteration number.
|
||||
* \param out_gpair output of get gradient, saves gradient and second order gradient in
|
||||
*/
|
||||
virtual void GetGradient(const std::vector<float>& preds,
|
||||
virtual void GetGradient(const std::vector<bst_float>& preds,
|
||||
const MetaInfo& info,
|
||||
int iteration,
|
||||
std::vector<bst_gpair>* out_gpair) = 0;
|
||||
@@ -52,13 +52,13 @@ class ObjFunction {
|
||||
* \brief transform prediction values, this is only called when Prediction is called
|
||||
* \param io_preds prediction values, saves to this vector as well
|
||||
*/
|
||||
virtual void PredTransform(std::vector<float> *io_preds) {}
|
||||
virtual void PredTransform(std::vector<bst_float> *io_preds) {}
|
||||
/*!
|
||||
* \brief transform prediction values, this is only called when Eval is called,
|
||||
* usually it redirect to PredTransform
|
||||
* \param io_preds prediction values, saves to this vector as well
|
||||
*/
|
||||
virtual void EvalTransform(std::vector<float> *io_preds) {
|
||||
virtual void EvalTransform(std::vector<bst_float> *io_preds) {
|
||||
this->PredTransform(io_preds);
|
||||
}
|
||||
/*!
|
||||
@@ -67,7 +67,7 @@ class ObjFunction {
|
||||
* used by gradient boosting
|
||||
* \return transformed value
|
||||
*/
|
||||
virtual float ProbToMargin(float base_score) const {
|
||||
virtual bst_float ProbToMargin(bst_float base_score) const {
|
||||
return base_score;
|
||||
}
|
||||
/*!
|
||||
|
||||
@@ -106,7 +106,7 @@ class TreeModel {
|
||||
return cleft_ == -1;
|
||||
}
|
||||
/*! \return get leaf value of leaf node */
|
||||
inline float leaf_value() const {
|
||||
inline bst_float leaf_value() const {
|
||||
return (this->info_).leaf_value;
|
||||
}
|
||||
/*! \return get split condition of the node */
|
||||
@@ -154,7 +154,7 @@ class TreeModel {
|
||||
* \param right right index, could be used to store
|
||||
* additional information
|
||||
*/
|
||||
inline void set_leaf(float value, int right = -1) {
|
||||
inline void set_leaf(bst_float value, int right = -1) {
|
||||
(this->info_).leaf_value = value;
|
||||
this->cleft_ = -1;
|
||||
this->cright_ = right;
|
||||
@@ -171,7 +171,7 @@ class TreeModel {
|
||||
* we have split condition
|
||||
*/
|
||||
union Info{
|
||||
float leaf_value;
|
||||
bst_float leaf_value;
|
||||
TSplitCond split_cond;
|
||||
};
|
||||
// pointer to parent, highest bit is used to
|
||||
@@ -230,7 +230,7 @@ class TreeModel {
|
||||
* \param rid node id of the node
|
||||
* \param value new leaf value
|
||||
*/
|
||||
inline void ChangeToLeaf(int rid, float value) {
|
||||
inline void ChangeToLeaf(int rid, bst_float value) {
|
||||
CHECK(nodes[nodes[rid].cleft() ].is_leaf());
|
||||
CHECK(nodes[nodes[rid].cright()].is_leaf());
|
||||
this->DeleteNode(nodes[rid].cleft());
|
||||
@@ -242,7 +242,7 @@ class TreeModel {
|
||||
* \param rid node id of the node
|
||||
* \param value new leaf value
|
||||
*/
|
||||
inline void CollapseToLeaf(int rid, float value) {
|
||||
inline void CollapseToLeaf(int rid, bst_float value) {
|
||||
if (nodes[rid].is_leaf()) return;
|
||||
if (!nodes[nodes[rid].cleft() ].is_leaf()) {
|
||||
CollapseToLeaf(nodes[rid].cleft(), 0.0f);
|
||||
@@ -338,7 +338,7 @@ class TreeModel {
|
||||
}
|
||||
/*!
|
||||
* \brief add child nodes to node
|
||||
* \param nid node id to add childs
|
||||
* \param nid node id to add children to
|
||||
*/
|
||||
inline void AddChilds(int nid) {
|
||||
int pleft = this->AllocNode();
|
||||
@@ -398,11 +398,11 @@ class TreeModel {
|
||||
/*! \brief node statistics used in regression tree */
|
||||
struct RTreeNodeStat {
|
||||
/*! \brief loss change caused by current split */
|
||||
float loss_chg;
|
||||
bst_float loss_chg;
|
||||
/*! \brief sum of hessian values, used to measure coverage of data */
|
||||
float sum_hess;
|
||||
bst_float sum_hess;
|
||||
/*! \brief weight of current node */
|
||||
float base_weight;
|
||||
bst_float base_weight;
|
||||
/*! \brief number of child that is leaf node known up to now */
|
||||
int leaf_child_cnt;
|
||||
};
|
||||
@@ -426,12 +426,12 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat> {
|
||||
inline void Init(size_t size);
|
||||
/*!
|
||||
* \brief fill the vector with sparse vector
|
||||
* \param inst The sparse instance to fil.
|
||||
* \param inst The sparse instance to fill.
|
||||
*/
|
||||
inline void Fill(const RowBatch::Inst& inst);
|
||||
/*!
|
||||
* \brief drop the trace after fill, must be called after fill.
|
||||
* \param inst The sparse instanc to drop.
|
||||
* \param inst The sparse instance to drop.
|
||||
*/
|
||||
inline void Drop(const RowBatch::Inst& inst);
|
||||
/*!
|
||||
@@ -439,7 +439,7 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat> {
|
||||
* \param i feature index.
|
||||
* \return the i-th feature value
|
||||
*/
|
||||
inline float fvalue(size_t i) const;
|
||||
inline bst_float fvalue(size_t i) const;
|
||||
/*!
|
||||
* \brief check whether i-th entry is missing
|
||||
* \param i feature index.
|
||||
@@ -453,7 +453,7 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat> {
|
||||
* when flag == -1, this indicate the value is missing
|
||||
*/
|
||||
union Entry {
|
||||
float fvalue;
|
||||
bst_float fvalue;
|
||||
int flag;
|
||||
};
|
||||
std::vector<Entry> data;
|
||||
@@ -471,14 +471,14 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat> {
|
||||
* \param root_id starting root index of the instance
|
||||
* \return the leaf index of the given feature
|
||||
*/
|
||||
inline float Predict(const FVec& feat, unsigned root_id = 0) const;
|
||||
inline bst_float Predict(const FVec& feat, unsigned root_id = 0) const;
|
||||
/*!
|
||||
* \brief get next position of the tree given current pid
|
||||
* \param pid Current node id.
|
||||
* \param fvalue feature value if not missing.
|
||||
* \param is_unknown Whether current required feature is missing.
|
||||
*/
|
||||
inline int GetNext(int pid, float fvalue, bool is_unknown) const;
|
||||
inline int GetNext(int pid, bst_float fvalue, bool is_unknown) const;
|
||||
/*!
|
||||
* \brief dump the model in the requested format as a text string
|
||||
* \param fmap feature map that may help give interpretations of feature
|
||||
@@ -513,7 +513,7 @@ inline void RegTree::FVec::Drop(const RowBatch::Inst& inst) {
|
||||
}
|
||||
}
|
||||
|
||||
inline float RegTree::FVec::fvalue(size_t i) const {
|
||||
inline bst_float RegTree::FVec::fvalue(size_t i) const {
|
||||
return data[i].fvalue;
|
||||
}
|
||||
|
||||
@@ -530,14 +530,14 @@ inline int RegTree::GetLeafIndex(const RegTree::FVec& feat, unsigned root_id) co
|
||||
return pid;
|
||||
}
|
||||
|
||||
inline float RegTree::Predict(const RegTree::FVec& feat, unsigned root_id) const {
|
||||
inline bst_float RegTree::Predict(const RegTree::FVec& feat, unsigned root_id) const {
|
||||
int pid = this->GetLeafIndex(feat, root_id);
|
||||
return (*this)[pid].leaf_value();
|
||||
}
|
||||
|
||||
/*! \brief get next position of the tree given current pid */
|
||||
inline int RegTree::GetNext(int pid, float fvalue, bool is_unknown) const {
|
||||
float split_value = (*this)[pid].split_cond();
|
||||
inline int RegTree::GetNext(int pid, bst_float fvalue, bool is_unknown) const {
|
||||
bst_float split_value = (*this)[pid].split_cond();
|
||||
if (is_unknown) {
|
||||
return (*this)[pid].cdefault();
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user