Use bst_float consistently throughout (#1824)

* Fix various typos

* Add override to functions that are overridden

gcc gives warnings about functions that are being overridden by not
being marked as oveirridden. This fixes it.

* Use bst_float consistently

Use bst_float for all the variables that involve weight,
leaf value, gradient, hessian, gain, loss_chg, predictions,
base_margin, feature values.

In some cases, when due to additions and so on the value can
take a larger value, double is used.

This ensures that type conversions are minimal and reduces loss of
precision.
This commit is contained in:
AbdealiJK
2016-11-30 23:32:10 +05:30
committed by Tianqi Chen
parent da2556f58a
commit 6f16f0ef58
50 changed files with 392 additions and 389 deletions

View File

@@ -35,7 +35,7 @@ class SoftmaxMultiClassObj : public ObjFunction {
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
param_.InitAllowUnknown(args);
}
void GetGradient(const std::vector<float>& preds,
void GetGradient(const std::vector<bst_float>& preds,
const MetaInfo& info,
int iter,
std::vector<bst_gpair>* out_gpair) override {
@@ -49,7 +49,7 @@ class SoftmaxMultiClassObj : public ObjFunction {
int label_error = 0;
#pragma omp parallel
{
std::vector<float> rec(nclass);
std::vector<bst_float> rec(nclass);
#pragma omp for schedule(static)
for (omp_ulong i = 0; i < ndata; ++i) {
for (int k = 0; k < nclass; ++k) {
@@ -60,10 +60,10 @@ class SoftmaxMultiClassObj : public ObjFunction {
if (label < 0 || label >= nclass) {
label_error = label; label = 0;
}
const float wt = info.GetWeight(i);
const bst_float wt = info.GetWeight(i);
for (int k = 0; k < nclass; ++k) {
float p = rec[k];
const float h = 2.0f * p * (1.0f - p) * wt;
bst_float p = rec[k];
const bst_float h = 2.0f * p * (1.0f - p) * wt;
if (label == k) {
out_gpair->at(i * nclass + k) = bst_gpair((p - 1.0f) * wt, h);
} else {
@@ -77,10 +77,10 @@ class SoftmaxMultiClassObj : public ObjFunction {
<< " num_class=" << nclass
<< " but found " << label_error << " in label.";
}
void PredTransform(std::vector<float>* io_preds) override {
void PredTransform(std::vector<bst_float>* io_preds) override {
this->Transform(io_preds, output_prob_);
}
void EvalTransform(std::vector<float>* io_preds) override {
void EvalTransform(std::vector<bst_float>* io_preds) override {
this->Transform(io_preds, true);
}
const char* DefaultEvalMetric() const override {
@@ -88,23 +88,23 @@ class SoftmaxMultiClassObj : public ObjFunction {
}
private:
inline void Transform(std::vector<float> *io_preds, bool prob) {
std::vector<float> &preds = *io_preds;
std::vector<float> tmp;
inline void Transform(std::vector<bst_float> *io_preds, bool prob) {
std::vector<bst_float> &preds = *io_preds;
std::vector<bst_float> tmp;
const int nclass = param_.num_class;
const omp_ulong ndata = static_cast<omp_ulong>(preds.size() / nclass);
if (!prob) tmp.resize(ndata);
#pragma omp parallel
{
std::vector<float> rec(nclass);
std::vector<bst_float> rec(nclass);
#pragma omp for schedule(static)
for (omp_ulong j = 0; j < ndata; ++j) {
for (int k = 0; k < nclass; ++k) {
rec[k] = preds[j * nclass + k];
}
if (!prob) {
tmp[j] = static_cast<float>(
tmp[j] = static_cast<bst_float>(
common::FindMaxIndex(rec.begin(), rec.end()) - rec.begin());
} else {
common::Softmax(&rec);
@@ -122,7 +122,7 @@ class SoftmaxMultiClassObj : public ObjFunction {
SoftmaxMultiClassParam param_;
};
// register the ojective functions
// register the objective functions
DMLC_REGISTER_PARAMETER(SoftmaxMultiClassParam);
XGBOOST_REGISTER_OBJECTIVE(SoftmaxMultiClass, "multi:softmax")

View File

@@ -37,7 +37,7 @@ class LambdaRankObj : public ObjFunction {
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
param_.InitAllowUnknown(args);
}
void GetGradient(const std::vector<float>& preds,
void GetGradient(const std::vector<bst_float>& preds,
const MetaInfo& info,
int iter,
std::vector<bst_gpair>* out_gpair) override {
@@ -58,7 +58,7 @@ class LambdaRankObj : public ObjFunction {
std::vector<LambdaPair> pairs;
std::vector<ListEntry> lst;
std::vector< std::pair<float, unsigned> > rec;
std::vector< std::pair<bst_float, unsigned> > rec;
#pragma omp for schedule(static)
for (bst_omp_uint k = 0; k < ngroup; ++k) {
lst.clear(); pairs.clear();
@@ -103,11 +103,11 @@ class LambdaRankObj : public ObjFunction {
for (size_t i = 0; i < pairs.size(); ++i) {
const ListEntry &pos = lst[pairs[i].pos_index];
const ListEntry &neg = lst[pairs[i].neg_index];
const float w = pairs[i].weight * scale;
const bst_float w = pairs[i].weight * scale;
const float eps = 1e-16f;
float p = common::Sigmoid(pos.pred - neg.pred);
float g = p - 1.0f;
float h = std::max(p * (1.0f - p), eps);
bst_float p = common::Sigmoid(pos.pred - neg.pred);
bst_float g = p - 1.0f;
bst_float h = std::max(p * (1.0f - p), eps);
// accumulate gradient and hessian in both pid, and nid
gpair[pos.rindex].grad += g * w;
gpair[pos.rindex].hess += 2.0f * w * h;
@@ -125,13 +125,13 @@ class LambdaRankObj : public ObjFunction {
/*! \brief helper information in a list */
struct ListEntry {
/*! \brief the predict score we in the data */
float pred;
bst_float pred;
/*! \brief the actual label of the entry */
float label;
bst_float label;
/*! \brief row index in the data matrix */
unsigned rindex;
// constructor
ListEntry(float pred, float label, unsigned rindex)
ListEntry(bst_float pred, bst_float label, unsigned rindex)
: pred(pred), label(label), rindex(rindex) {}
// comparator by prediction
inline static bool CmpPred(const ListEntry &a, const ListEntry &b) {
@@ -149,7 +149,7 @@ class LambdaRankObj : public ObjFunction {
/*! \brief negative index: this is a position in the list */
unsigned neg_index;
/*! \brief weight to be filled in */
float weight;
bst_float weight;
// constructor
LambdaPair(unsigned pos_index, unsigned neg_index)
: pos_index(pos_index), neg_index(neg_index), weight(1.0f) {}
@@ -180,11 +180,11 @@ class LambdaRankObjNDCG : public LambdaRankObj {
std::vector<LambdaPair> &pairs = *io_pairs;
float IDCG;
{
std::vector<float> labels(sorted_list.size());
std::vector<bst_float> labels(sorted_list.size());
for (size_t i = 0; i < sorted_list.size(); ++i) {
labels[i] = sorted_list[i].label;
}
std::sort(labels.begin(), labels.end(), std::greater<float>());
std::sort(labels.begin(), labels.end(), std::greater<bst_float>());
IDCG = CalcDCG(labels);
}
if (IDCG == 0.0) {
@@ -200,25 +200,25 @@ class LambdaRankObjNDCG : public LambdaRankObj {
float neg_loginv = 1.0f / std::log(neg_idx + 2.0f);
int pos_label = static_cast<int>(sorted_list[pos_idx].label);
int neg_label = static_cast<int>(sorted_list[neg_idx].label);
float original =
bst_float original =
((1 << pos_label) - 1) * pos_loginv + ((1 << neg_label) - 1) * neg_loginv;
float changed =
((1 << neg_label) - 1) * pos_loginv + ((1 << pos_label) - 1) * neg_loginv;
float delta = (original - changed) * IDCG;
bst_float delta = (original - changed) * IDCG;
if (delta < 0.0f) delta = - delta;
pairs[i].weight = delta;
}
}
}
inline static float CalcDCG(const std::vector<float> &labels) {
inline static bst_float CalcDCG(const std::vector<bst_float> &labels) {
double sumdcg = 0.0;
for (size_t i = 0; i < labels.size(); ++i) {
const unsigned rel = static_cast<unsigned>(labels[i]);
if (rel != 0) {
sumdcg += ((1 << rel) - 1) / std::log2(static_cast<float>(i + 2));
sumdcg += ((1 << rel) - 1) / std::log2(static_cast<bst_float>(i + 2));
}
}
return static_cast<float>(sumdcg);
return static_cast<bst_float>(sumdcg);
}
};
@@ -250,19 +250,19 @@ class LambdaRankObjMAP : public LambdaRankObj {
* \param index1,index2 the instances switched
* \param map_stats a vector containing the accumulated precisions for each position in a list
*/
inline float GetLambdaMAP(const std::vector<ListEntry> &sorted_list,
int index1, int index2,
std::vector<MAPStats> *p_map_stats) {
inline bst_float GetLambdaMAP(const std::vector<ListEntry> &sorted_list,
int index1, int index2,
std::vector<MAPStats> *p_map_stats) {
std::vector<MAPStats> &map_stats = *p_map_stats;
if (index1 == index2 || map_stats[map_stats.size() - 1].hits == 0) {
return 0.0f;
}
if (index1 > index2) std::swap(index1, index2);
float original = map_stats[index2].ap_acc;
bst_float original = map_stats[index2].ap_acc;
if (index1 != 0) original -= map_stats[index1 - 1].ap_acc;
float changed = 0;
float label1 = sorted_list[index1].label > 0.0f ? 1.0f : 0.0f;
float label2 = sorted_list[index2].label > 0.0f ? 1.0f : 0.0f;
bst_float changed = 0;
bst_float label1 = sorted_list[index1].label > 0.0f ? 1.0f : 0.0f;
bst_float label2 = sorted_list[index2].label > 0.0f ? 1.0f : 0.0f;
if (label1 == label2) {
return 0.0;
} else if (label1 < label2) {
@@ -272,7 +272,7 @@ class LambdaRankObjMAP : public LambdaRankObj {
changed += map_stats[index2 - 1].ap_acc_miss - map_stats[index1].ap_acc_miss;
changed += map_stats[index2].hits / (index2 + 1);
}
float ans = (changed - original) / (map_stats[map_stats.size() - 1].hits);
bst_float ans = (changed - original) / (map_stats[map_stats.size() - 1].hits);
if (ans < 0) ans = -ans;
return ans;
}
@@ -285,7 +285,7 @@ class LambdaRankObjMAP : public LambdaRankObj {
std::vector<MAPStats> *p_map_acc) {
std::vector<MAPStats> &map_acc = *p_map_acc;
map_acc.resize(sorted_list.size());
float hit = 0, acc1 = 0, acc2 = 0, acc3 = 0;
bst_float hit = 0, acc1 = 0, acc2 = 0, acc3 = 0;
for (size_t i = 1; i <= sorted_list.size(); ++i) {
if (sorted_list[i - 1].label > 0.0f) {
hit++;
@@ -309,7 +309,7 @@ class LambdaRankObjMAP : public LambdaRankObj {
}
};
// register the ojective functions
// register the objective functions
DMLC_REGISTER_PARAMETER(LambdaRankParam);
XGBOOST_REGISTER_OBJECTIVE(PairwieRankObj, "rank:pairwise")

View File

@@ -20,24 +20,24 @@ DMLC_REGISTRY_FILE_TAG(regression_obj);
// common regressions
// linear regression
struct LinearSquareLoss {
static float PredTransform(float x) { return x; }
static bool CheckLabel(float x) { return true; }
static float FirstOrderGradient(float predt, float label) { return predt - label; }
static float SecondOrderGradient(float predt, float label) { return 1.0f; }
static float ProbToMargin(float base_score) { return base_score; }
static bst_float PredTransform(bst_float x) { return x; }
static bool CheckLabel(bst_float x) { return true; }
static bst_float FirstOrderGradient(bst_float predt, bst_float label) { return predt - label; }
static bst_float SecondOrderGradient(bst_float predt, bst_float label) { return 1.0f; }
static bst_float ProbToMargin(bst_float base_score) { return base_score; }
static const char* LabelErrorMsg() { return ""; }
static const char* DefaultEvalMetric() { return "rmse"; }
};
// logistic loss for probability regression task
struct LogisticRegression {
static float PredTransform(float x) { return common::Sigmoid(x); }
static bool CheckLabel(float x) { return x >= 0.0f && x <= 1.0f; }
static float FirstOrderGradient(float predt, float label) { return predt - label; }
static float SecondOrderGradient(float predt, float label) {
static bst_float PredTransform(bst_float x) { return common::Sigmoid(x); }
static bool CheckLabel(bst_float x) { return x >= 0.0f && x <= 1.0f; }
static bst_float FirstOrderGradient(bst_float predt, bst_float label) { return predt - label; }
static bst_float SecondOrderGradient(bst_float predt, bst_float label) {
const float eps = 1e-16f;
return std::max(predt * (1.0f - predt), eps);
}
static float ProbToMargin(float base_score) {
static bst_float ProbToMargin(bst_float base_score) {
CHECK(base_score > 0.0f && base_score < 1.0f)
<< "base_score must be in (0,1) for logistic loss";
return -std::log(1.0f / base_score - 1.0f);
@@ -53,12 +53,12 @@ struct LogisticClassification : public LogisticRegression {
};
// logistic loss, but predict un-transformed margin
struct LogisticRaw : public LogisticRegression {
static float PredTransform(float x) { return x; }
static float FirstOrderGradient(float predt, float label) {
static bst_float PredTransform(bst_float x) { return x; }
static bst_float FirstOrderGradient(bst_float predt, bst_float label) {
predt = common::Sigmoid(predt);
return predt - label;
}
static float SecondOrderGradient(float predt, float label) {
static bst_float SecondOrderGradient(bst_float predt, bst_float label) {
const float eps = 1e-16f;
predt = common::Sigmoid(predt);
return std::max(predt * (1.0f - predt), eps);
@@ -75,14 +75,14 @@ struct RegLossParam : public dmlc::Parameter<RegLossParam> {
}
};
// regression los function
// regression loss function
template<typename Loss>
class RegLossObj : public ObjFunction {
public:
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
param_.InitAllowUnknown(args);
}
void GetGradient(const std::vector<float> &preds,
void GetGradient(const std::vector<bst_float> &preds,
const MetaInfo &info,
int iter,
std::vector<bst_gpair> *out_gpair) override {
@@ -97,8 +97,8 @@ class RegLossObj : public ObjFunction {
const omp_ulong ndata = static_cast<omp_ulong>(preds.size());
#pragma omp parallel for schedule(static)
for (omp_ulong i = 0; i < ndata; ++i) {
float p = Loss::PredTransform(preds[i]);
float w = info.GetWeight(i);
bst_float p = Loss::PredTransform(preds[i]);
bst_float w = info.GetWeight(i);
if (info.labels[i] == 1.0f) w *= param_.scale_pos_weight;
if (!Loss::CheckLabel(info.labels[i])) label_correct = false;
out_gpair->at(i) = bst_gpair(Loss::FirstOrderGradient(p, info.labels[i]) * w,
@@ -111,15 +111,15 @@ class RegLossObj : public ObjFunction {
const char* DefaultEvalMetric() const override {
return Loss::DefaultEvalMetric();
}
void PredTransform(std::vector<float> *io_preds) override {
std::vector<float> &preds = *io_preds;
void PredTransform(std::vector<bst_float> *io_preds) override {
std::vector<bst_float> &preds = *io_preds;
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
#pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < ndata; ++j) {
preds[j] = Loss::PredTransform(preds[j]);
}
}
float ProbToMargin(float base_score) const override {
bst_float ProbToMargin(bst_float base_score) const override {
return Loss::ProbToMargin(base_score);
}
@@ -127,7 +127,7 @@ class RegLossObj : public ObjFunction {
RegLossParam param_;
};
// register the ojective functions
// register the objective functions
DMLC_REGISTER_PARAMETER(RegLossParam);
XGBOOST_REGISTER_OBJECTIVE(LinearRegression, "reg:linear")
@@ -164,7 +164,7 @@ class PoissonRegression : public ObjFunction {
param_.InitAllowUnknown(args);
}
void GetGradient(const std::vector<float> &preds,
void GetGradient(const std::vector<bst_float> &preds,
const MetaInfo &info,
int iter,
std::vector<bst_gpair> *out_gpair) override {
@@ -177,9 +177,9 @@ class PoissonRegression : public ObjFunction {
const omp_ulong ndata = static_cast<omp_ulong>(preds.size()); // NOLINT(*)
#pragma omp parallel for schedule(static)
for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
float p = preds[i];
float w = info.GetWeight(i);
float y = info.labels[i];
bst_float p = preds[i];
bst_float w = info.GetWeight(i);
bst_float y = info.labels[i];
if (y >= 0.0f) {
out_gpair->at(i) = bst_gpair((std::exp(p) - y) * w,
std::exp(p + param_.max_delta_step) * w);
@@ -189,18 +189,18 @@ class PoissonRegression : public ObjFunction {
}
CHECK(label_correct) << "PoissonRegression: label must be nonnegative";
}
void PredTransform(std::vector<float> *io_preds) override {
std::vector<float> &preds = *io_preds;
void PredTransform(std::vector<bst_float> *io_preds) override {
std::vector<bst_float> &preds = *io_preds;
const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
#pragma omp parallel for schedule(static)
for (long j = 0; j < ndata; ++j) { // NOLINT(*)
preds[j] = std::exp(preds[j]);
}
}
void EvalTransform(std::vector<float> *io_preds) override {
void EvalTransform(std::vector<bst_float> *io_preds) override {
PredTransform(io_preds);
}
float ProbToMargin(float base_score) const override {
bst_float ProbToMargin(bst_float base_score) const override {
return std::log(base_score);
}
const char* DefaultEvalMetric(void) const override {
@@ -211,7 +211,7 @@ class PoissonRegression : public ObjFunction {
PoissonRegressionParam param_;
};
// register the ojective functions
// register the objective functions
DMLC_REGISTER_PARAMETER(PoissonRegressionParam);
XGBOOST_REGISTER_OBJECTIVE(PoissonRegression, "count:poisson")
@@ -225,7 +225,7 @@ class GammaRegression : public ObjFunction {
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
}
void GetGradient(const std::vector<float> &preds,
void GetGradient(const std::vector<bst_float> &preds,
const MetaInfo &info,
int iter,
std::vector<bst_gpair> *out_gpair) override {
@@ -238,9 +238,9 @@ class GammaRegression : public ObjFunction {
const omp_ulong ndata = static_cast<omp_ulong>(preds.size()); // NOLINT(*)
#pragma omp parallel for schedule(static)
for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
float p = preds[i];
float w = info.GetWeight(i);
float y = info.labels[i];
bst_float p = preds[i];
bst_float w = info.GetWeight(i);
bst_float y = info.labels[i];
if (y >= 0.0f) {
out_gpair->at(i) = bst_gpair((1 - y / std::exp(p)) * w, y / std::exp(p) * w);
} else {
@@ -249,18 +249,18 @@ class GammaRegression : public ObjFunction {
}
CHECK(label_correct) << "GammaRegression: label must be positive";
}
void PredTransform(std::vector<float> *io_preds) override {
std::vector<float> &preds = *io_preds;
void PredTransform(std::vector<bst_float> *io_preds) override {
std::vector<bst_float> &preds = *io_preds;
const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
#pragma omp parallel for schedule(static)
for (long j = 0; j < ndata; ++j) { // NOLINT(*)
preds[j] = std::exp(preds[j]);
}
}
void EvalTransform(std::vector<float> *io_preds) override {
void EvalTransform(std::vector<bst_float> *io_preds) override {
PredTransform(io_preds);
}
float ProbToMargin(float base_score) const override {
bst_float ProbToMargin(bst_float base_score) const override {
return std::log(base_score);
}
const char* DefaultEvalMetric(void) const override {
@@ -268,7 +268,7 @@ class GammaRegression : public ObjFunction {
}
};
// register the ojective functions
// register the objective functions
XGBOOST_REGISTER_OBJECTIVE(GammaRegression, "reg:gamma")
.describe("Gamma regression for severity data.")
.set_body([]() { return new GammaRegression(); });
@@ -290,7 +290,7 @@ class TweedieRegression : public ObjFunction {
param_.InitAllowUnknown(args);
}
void GetGradient(const std::vector<float> &preds,
void GetGradient(const std::vector<bst_float> &preds,
const MetaInfo &info,
int iter,
std::vector<bst_gpair> *out_gpair) override {
@@ -303,13 +303,14 @@ class TweedieRegression : public ObjFunction {
const omp_ulong ndata = static_cast<omp_ulong>(preds.size()); // NOLINT(*)
#pragma omp parallel for schedule(static)
for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
float p = preds[i];
float w = info.GetWeight(i);
float y = info.labels[i];
bst_float p = preds[i];
bst_float w = info.GetWeight(i);
bst_float y = info.labels[i];
float rho = param_.tweedie_variance_power;
if (y >= 0.0f) {
float grad = -y * std::exp((1 - rho) * p) + std::exp((2 - rho) * p);
float hess = -y * (1 - rho) * std::exp((1 - rho) * p) + (2 - rho) * std::exp((2 - rho) * p);
bst_float grad = -y * std::exp((1 - rho) * p) + std::exp((2 - rho) * p);
bst_float hess = -y * (1 - rho) * \
std::exp((1 - rho) * p) + (2 - rho) * std::exp((2 - rho) * p);
out_gpair->at(i) = bst_gpair(grad * w, hess * w);
} else {
label_correct = false;
@@ -317,8 +318,8 @@ class TweedieRegression : public ObjFunction {
}
CHECK(label_correct) << "TweedieRegression: label must be nonnegative";
}
void PredTransform(std::vector<float> *io_preds) override {
std::vector<float> &preds = *io_preds;
void PredTransform(std::vector<bst_float> *io_preds) override {
std::vector<bst_float> &preds = *io_preds;
const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
#pragma omp parallel for schedule(static)
for (long j = 0; j < ndata; ++j) { // NOLINT(*)
@@ -336,7 +337,7 @@ class TweedieRegression : public ObjFunction {
TweedieRegressionParam param_;
};
// register the ojective functions
// register the objective functions
DMLC_REGISTER_PARAMETER(TweedieRegressionParam);
XGBOOST_REGISTER_OBJECTIVE(TweedieRegression, "reg:tweedie")