Use bst_float consistently throughout (#1824)

* Fix various typos

* Add override to functions that are overridden

gcc gives warnings about functions that are being overridden by not
being marked as oveirridden. This fixes it.

* Use bst_float consistently

Use bst_float for all the variables that involve weight,
leaf value, gradient, hessian, gain, loss_chg, predictions,
base_margin, feature values.

In some cases, when due to additions and so on the value can
take a larger value, double is used.

This ensures that type conversions are minimal and reduces loss of
precision.
This commit is contained in:
AbdealiJK
2016-11-30 23:32:10 +05:30
committed by Tianqi Chen
parent da2556f58a
commit 6f16f0ef58
50 changed files with 392 additions and 389 deletions

View File

@@ -408,7 +408,7 @@ struct SplitEntry {
/*! \brief split index */
unsigned sindex;
/*! \brief split value */
float split_value;
bst_float split_value;
/*! \brief constructor */
SplitEntry() : loss_chg(0.0f), sindex(0), split_value(0.0f) {}
/*!
@@ -452,7 +452,7 @@ struct SplitEntry {
* \return whether the proposed split is better and can replace current split
*/
inline bool Update(bst_float new_loss_chg, unsigned split_index,
float new_split_value, bool default_left) {
bst_float new_split_value, bool default_left) {
if (this->NeedReplace(new_loss_chg, split_index)) {
this->loss_chg = new_loss_chg;
if (default_left)

View File

@@ -68,13 +68,13 @@ void DumpRegTree(std::stringstream& fo, // NOLINT(*)
fo << "{ \"nodeid\": " << nid
<< ", \"depth\": " << depth
<< ", \"split\": \"" << fmap.name(split_index) << "\""
<< ", \"split_condition\": " << int(float(cond) + 1.0f)
<< ", \"split_condition\": " << int(cond + 1.0)
<< ", \"yes\": " << tree[nid].cleft()
<< ", \"no\": " << tree[nid].cright()
<< ", \"missing\": " << tree[nid].cdefault();
} else {
fo << nid << ":[" << fmap.name(split_index) << "<"
<< int(float(cond)+1.0f)
<< int(cond + 1.0)
<< "] yes=" << tree[nid].cleft()
<< ",no=" << tree[nid].cright()
<< ",missing=" << tree[nid].cdefault();
@@ -87,12 +87,12 @@ void DumpRegTree(std::stringstream& fo, // NOLINT(*)
fo << "{ \"nodeid\": " << nid
<< ", \"depth\": " << depth
<< ", \"split\": \"" << fmap.name(split_index) << "\""
<< ", \"split_condition\": " << float(cond)
<< ", \"split_condition\": " << cond
<< ", \"yes\": " << tree[nid].cleft()
<< ", \"no\": " << tree[nid].cright()
<< ", \"missing\": " << tree[nid].cdefault();
} else {
fo << nid << ":[" << fmap.name(split_index) << "<" << float(cond)
fo << nid << ":[" << fmap.name(split_index) << "<" << cond
<< "] yes=" << tree[nid].cleft()
<< ",no=" << tree[nid].cright()
<< ",missing=" << tree[nid].cdefault();
@@ -106,12 +106,12 @@ void DumpRegTree(std::stringstream& fo, // NOLINT(*)
fo << "{ \"nodeid\": " << nid
<< ", \"depth\": " << depth
<< ", \"split\": " << split_index
<< ", \"split_condition\": " << float(cond)
<< ", \"split_condition\": " << cond
<< ", \"yes\": " << tree[nid].cleft()
<< ", \"no\": " << tree[nid].cright()
<< ", \"missing\": " << tree[nid].cdefault();
} else {
fo << nid << ":[f" << split_index << "<"<< float(cond)
fo << nid << ":[f" << split_index << "<"<< cond
<< "] yes=" << tree[nid].cleft()
<< ",no=" << tree[nid].cright()
<< ",missing=" << tree[nid].cdefault();

View File

@@ -267,7 +267,7 @@ class BaseMaker: public TreeUpdater {
#pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < ndata; ++j) {
const bst_uint ridx = col[j].index;
const float fvalue = col[j].fvalue;
const bst_float fvalue = col[j].fvalue;
const int nid = this->DecodePosition(ridx);
CHECK(tree[nid].is_leaf());
int pid = tree[nid].parent();
@@ -327,7 +327,7 @@ class BaseMaker: public TreeUpdater {
#pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < ndata; ++j) {
const bst_uint ridx = col[j].index;
const float fvalue = col[j].fvalue;
const bst_float fvalue = col[j].fvalue;
const int nid = this->DecodePosition(ridx);
// go back to parent, correct those who are not default
if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) {

View File

@@ -53,9 +53,9 @@ class ColMaker: public TreeUpdater {
/*! \brief extra statistics of data */
TStats stats_extra;
/*! \brief last feature value scanned */
float last_fvalue;
bst_float last_fvalue;
/*! \brief first feature value scanned */
float first_fvalue;
bst_float first_fvalue;
/*! \brief current best solution */
SplitEntry best;
// constructor
@@ -69,7 +69,7 @@ class ColMaker: public TreeUpdater {
/*! \brief loss of this node, without split */
bst_float root_gain;
/*! \brief weight calculated related to current data */
float weight;
bst_float weight;
/*! \brief current best solution */
SplitEntry best;
// constructor
@@ -284,7 +284,7 @@ class ColMaker: public TreeUpdater {
const bst_uint ridx = col[i].index;
const int nid = position[ridx];
if (nid < 0) continue;
const float fvalue = col[i].fvalue;
const bst_float fvalue = col[i].fvalue;
if (temp[nid].stats.Empty()) {
temp[nid].first_fvalue = fvalue;
}
@@ -309,7 +309,7 @@ class ColMaker: public TreeUpdater {
for (int tid = 0; tid < nthread; ++tid) {
stemp[tid][nid].stats_extra = sum;
ThreadEntry &e = stemp[tid][nid];
float fsplit;
bst_float fsplit;
if (tid != 0) {
if (stemp[tid - 1][nid].last_fvalue != e.first_fvalue) {
fsplit = (stemp[tid - 1][nid].last_fvalue + e.first_fvalue) * 0.5f;
@@ -364,7 +364,7 @@ class ColMaker: public TreeUpdater {
const bst_uint ridx = col[i].index;
const int nid = position[ridx];
if (nid < 0) continue;
const float fvalue = col[i].fvalue;
const bst_float fvalue = col[i].fvalue;
// get the statistics of nid
ThreadEntry &e = temp[nid];
if (e.stats.Empty()) {
@@ -403,7 +403,7 @@ class ColMaker: public TreeUpdater {
}
// update enumeration solution
inline void UpdateEnumeration(int nid, bst_gpair gstats,
float fvalue, int d_step, bst_uint fid,
bst_float fvalue, int d_step, bst_uint fid,
TStats &c, std::vector<ThreadEntry> &temp) { // NOLINT(*)
// get the statistics of nid
ThreadEntry &e = temp[nid];
@@ -503,8 +503,8 @@ class ColMaker: public TreeUpdater {
loss_chg = static_cast<bst_float>(
constraints_[nid].CalcSplitGain(param, fid, e.stats, c) - snode[nid].root_gain);
}
const float gap = std::abs(e.last_fvalue) + rt_eps;
const float delta = d_step == +1 ? gap: -gap;
const bst_float gap = std::abs(e.last_fvalue) + rt_eps;
const bst_float delta = d_step == +1 ? gap: -gap;
e.best.Update(loss_chg, fid, e.last_fvalue + delta, d_step == -1);
}
}
@@ -535,7 +535,7 @@ class ColMaker: public TreeUpdater {
const int nid = position[ridx];
if (nid < 0) continue;
// start working
const float fvalue = it->fvalue;
const bst_float fvalue = it->fvalue;
// get the statistics of nid
ThreadEntry &e = temp[nid];
// test if first hit, this is fine, because we set 0 during init
@@ -580,8 +580,8 @@ class ColMaker: public TreeUpdater {
loss_chg = static_cast<bst_float>(
constraints_[nid].CalcSplitGain(param, fid, e.stats, c) - snode[nid].root_gain);
}
const float gap = std::abs(e.last_fvalue) + rt_eps;
const float delta = d_step == +1 ? gap: -gap;
const bst_float gap = std::abs(e.last_fvalue) + rt_eps;
const bst_float delta = d_step == +1 ? gap: -gap;
e.best.Update(loss_chg, fid, e.last_fvalue + delta, d_step == -1);
}
}
@@ -730,7 +730,7 @@ class ColMaker: public TreeUpdater {
for (bst_omp_uint j = 0; j < ndata; ++j) {
const bst_uint ridx = col[j].index;
const int nid = this->DecodePosition(ridx);
const float fvalue = col[j].fvalue;
const bst_float fvalue = col[j].fvalue;
// go back to parent, correct those who are not default
if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) {
if (fvalue < tree[nid].split_cond()) {
@@ -864,7 +864,7 @@ class DistColMaker : public ColMaker<TStats, TConstraint> {
#pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < ndata; ++j) {
const bst_uint ridx = col[j].index;
const float fvalue = col[j].fvalue;
const bst_float fvalue = col[j].fvalue;
const int nid = this->DecodePosition(ridx);
if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) {
if (fvalue < tree[nid].split_cond()) {
@@ -898,7 +898,7 @@ class DistColMaker : public ColMaker<TStats, TConstraint> {
}
}
// synchronize the best solution of each node
virtual void SyncBestSolution(const std::vector<int> &qexpand) {
void SyncBestSolution(const std::vector<int> &qexpand) override {
std::vector<SplitEntry> vec;
for (size_t i = 0; i < qexpand.size(); ++i) {
const int nid = qexpand[i];

View File

@@ -191,7 +191,7 @@ class HistMaker: public BaseMaker {
c.SetSubstract(node_sum, s);
if (c.sum_hess >= param.min_child_weight) {
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
if (best->Update(static_cast<float>(loss_chg), fid, hist.cut[i], false)) {
if (best->Update(static_cast<bst_float>(loss_chg), fid, hist.cut[i], false)) {
*left_sum = s;
}
}
@@ -204,7 +204,7 @@ class HistMaker: public BaseMaker {
c.SetSubstract(node_sum, s);
if (c.sum_hess >= param.min_child_weight) {
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
if (best->Update(static_cast<float>(loss_chg), fid, hist.cut[i-1], true)) {
if (best->Update(static_cast<bst_float>(loss_chg), fid, hist.cut[i-1], true)) {
*left_sum = c;
}
}
@@ -260,8 +260,8 @@ class HistMaker: public BaseMaker {
}
inline void SetStats(RegTree *p_tree, int nid, const TStats &node_sum) {
p_tree->stat(nid).base_weight = static_cast<float>(node_sum.CalcWeight(param));
p_tree->stat(nid).sum_hess = static_cast<float>(node_sum.sum_hess);
p_tree->stat(nid).base_weight = static_cast<bst_float>(node_sum.CalcWeight(param));
p_tree->stat(nid).sum_hess = static_cast<bst_float>(node_sum.sum_hess);
node_sum.SetLeafVec(param, p_tree->leafvec(nid));
}
};

View File

@@ -27,7 +27,7 @@ class TreeRefresher: public TreeUpdater {
// update the tree, do pruning
void Update(const std::vector<bst_gpair> &gpair,
DMatrix *p_fmat,
const std::vector<RegTree*> &trees) {
const std::vector<RegTree*> &trees) override {
if (trees.size() == 0) return;
// number of threads
// thread temporal space
@@ -130,13 +130,13 @@ class TreeRefresher: public TreeUpdater {
inline void Refresh(const TStats *gstats,
int nid, RegTree *p_tree) {
RegTree &tree = *p_tree;
tree.stat(nid).base_weight = static_cast<float>(gstats[nid].CalcWeight(param));
tree.stat(nid).sum_hess = static_cast<float>(gstats[nid].sum_hess);
tree.stat(nid).base_weight = static_cast<bst_float>(gstats[nid].CalcWeight(param));
tree.stat(nid).sum_hess = static_cast<bst_float>(gstats[nid].sum_hess);
gstats[nid].SetLeafVec(param, tree.leafvec(nid));
if (tree[nid].is_leaf()) {
tree[nid].set_leaf(tree.stat(nid).base_weight * param.learning_rate);
} else {
tree.stat(nid).loss_chg = static_cast<float>(
tree.stat(nid).loss_chg = static_cast<bst_float>(
gstats[tree[nid].cleft()].CalcGain(param) +
gstats[tree[nid].cright()].CalcGain(param) -
gstats[nid].CalcGain(param));

View File

@@ -60,7 +60,7 @@ class SketchMaker: public BaseMaker {
for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) {
this->SetStats(nid, node_stats[nid], p_tree);
if (!(*p_tree)[nid].is_leaf()) {
p_tree->stat(nid).loss_chg = static_cast<float>(
p_tree->stat(nid).loss_chg = static_cast<bst_float>(
node_stats[(*p_tree)[nid].cleft()].CalcGain(param) +
node_stats[(*p_tree)[nid].cright()].CalcGain(param) -
node_stats[nid].CalcGain(param));
@@ -310,8 +310,8 @@ class SketchMaker: public BaseMaker {
}
// set statistics on ptree
inline void SetStats(int nid, const SKStats &node_sum, RegTree *p_tree) {
p_tree->stat(nid).base_weight = static_cast<float>(node_sum.CalcWeight(param));
p_tree->stat(nid).sum_hess = static_cast<float>(node_sum.sum_hess);
p_tree->stat(nid).base_weight = static_cast<bst_float>(node_sum.CalcWeight(param));
p_tree->stat(nid).sum_hess = static_cast<bst_float>(node_sum.sum_hess);
node_sum.SetLeafVec(param, p_tree->leafvec(nid));
}
inline void EnumerateSplit(const WXQSketch::Summary &pos_grad,
@@ -372,7 +372,8 @@ class SketchMaker: public BaseMaker {
c.sum_hess >= param.min_child_weight) {
bst_float cpt = fsplits.back();
double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain;
best->Update(static_cast<bst_float>(loss_chg), fid, cpt + fabsf(cpt) + 1.0f, false);
best->Update(static_cast<bst_float>(loss_chg),
fid, cpt + std::abs(cpt) + 1.0f, false);
}
}
}