Merge branch 'master' into unity

Conflicts:
	src/learner/evaluation-inl.hpp
	wrapper/xgboost_R.cpp
	wrapper/xgboost_wrapper.cpp
	wrapper/xgboost_wrapper.h
This commit is contained in:
tqchen
2014-08-26 20:32:07 -07:00
21 changed files with 169 additions and 146 deletions

View File

@@ -27,10 +27,12 @@ struct EvalEWiseBase : public IEvaluator {
utils::Check(info.labels.size() != 0, "label set cannot be empty");
utils::Check(preds.size() % info.labels.size() == 0,
"label and prediction size not match");
const unsigned ndata = static_cast<unsigned>(info.labels.size());
const bst_omp_uint ndata = static_cast<bst_omp_uint>(info.labels.size());
float sum = 0.0, wsum = 0.0;
#pragma omp parallel for reduction(+: sum, wsum) schedule(static)
for (unsigned i = 0; i < ndata; ++i) {
for (bst_omp_uint i = 0; i < ndata; ++i) {
const float wt = info.GetWeight(i);
sum += Derived::EvalRow(info.labels[i], preds[i]) * wt;
wsum += wt;
@@ -149,12 +151,13 @@ struct EvalAMS : public IEvaluator {
}
virtual float Eval(const std::vector<float> &preds,
const MetaInfo &info) const {
const unsigned ndata = static_cast<unsigned>(info.labels.size());
const bst_omp_uint ndata = static_cast<bst_omp_uint>(info.labels.size());
utils::Check(info.weights.size() == ndata, "we need weight to evaluate ams");
std::vector< std::pair<float, unsigned> > rec(ndata);
#pragma omp parallel for schedule(static)
for (unsigned i = 0; i < ndata; ++i) {
for (bst_omp_uint i = 0; i < ndata; ++i) {
rec[i] = std::make_pair(preds[i], i);
}
std::sort(rec.begin(), rec.end(), CmpFirst);
@@ -163,7 +166,7 @@ struct EvalAMS : public IEvaluator {
const double br = 10.0;
unsigned thresindex = 0;
double s_tp = 0.0, b_fp = 0.0, tams = 0.0;
for (unsigned i = 0; i < ndata-1 && i < ntop; ++i) {
for (unsigned i = 0; i < static_cast<unsigned>(ndata-1) && i < ntop; ++i) {
const unsigned ridx = rec[i].second;
const float wt = info.weights[ridx];
if (info.labels[ridx] > 0.5f) {
@@ -257,7 +260,7 @@ struct EvalAuc : public IEvaluator {
const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
utils::Check(gptr.back() == info.labels.size(),
"EvalAuc: group structure must match number of prediction");
const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
// sum statictis
double sum_auc = 0.0f;
#pragma omp parallel reduction(+:sum_auc)
@@ -265,7 +268,7 @@ struct EvalAuc : public IEvaluator {
// each thread takes a local rec
std::vector< std::pair<float, unsigned> > rec;
#pragma omp for schedule(static)
for (unsigned k = 0; k < ngroup; ++k) {
for (bst_omp_uint k = 0; k < ngroup; ++k) {
rec.clear();
for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
rec.push_back(std::make_pair(preds[j], j));
@@ -315,7 +318,7 @@ struct EvalRankList : public IEvaluator {
utils::Assert(gptr.size() != 0, "must specify group when constructing rank file");
utils::Assert(gptr.back() == preds.size(),
"EvalRanklist: group structure must match number of prediction");
const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
// sum statistics
double sum_metric = 0.0f;
#pragma omp parallel reduction(+:sum_metric)
@@ -323,7 +326,7 @@ struct EvalRankList : public IEvaluator {
// each thread takes a local rec
std::vector< std::pair<float, unsigned> > rec;
#pragma omp for schedule(static)
for (unsigned k = 0; k < ngroup; ++k) {
for (bst_omp_uint k = 0; k < ngroup; ++k) {
rec.clear();
for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
rec.push_back(std::make_pair(preds[j], static_cast<int>(info.labels[j])));

View File

@@ -79,6 +79,11 @@ class BoostLearner {
* \param val value of the parameter
*/
inline void SetParam(const char *name, const char *val) {
// in this version, bst: prefix is no longer required
if (strncmp(name, "bst:", 4) != 0) {
std::string n = "bst:"; n += name;
this->SetParam(n.c_str(), val);
}
if (!strcmp(name, "silent")) silent = atoi(val);
if (!strcmp(name, "prob_buffer_row")) prob_buffer_row = static_cast<float>(atof(val));
if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val);
@@ -91,7 +96,7 @@ class BoostLearner {
if (!strcmp(name, "objective")) name_obj_ = val;
if (!strcmp(name, "booster")) name_gbm_ = val;
mparam.SetParam(name, val);
}
}
if (gbm_ != NULL) gbm_->SetParam(name, val);
if (obj_ != NULL) obj_->SetParam(name, val);
if (gbm_ == NULL || obj_ == NULL) {
@@ -248,17 +253,17 @@ class BoostLearner {
data.info.info, out_preds);
// add base margin
std::vector<float> &preds = *out_preds;
const unsigned ndata = static_cast<unsigned>(preds.size());
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
if (data.info.base_margin.size() != 0) {
utils::Check(preds.size() == data.info.base_margin.size(),
"base_margin.size does not match with prediction size");
#pragma omp parallel for schedule(static)
for (unsigned j = 0; j < ndata; ++j) {
for (bst_omp_uint j = 0; j < ndata; ++j) {
preds[j] += data.info.base_margin[j];
}
} else {
#pragma omp parallel for schedule(static)
for (unsigned j = 0; j < ndata; ++j) {
for (bst_omp_uint j = 0; j < ndata; ++j) {
preds[j] += mparam.base_score;
}
}

View File

@@ -116,9 +116,9 @@ class RegLossObj : public IObjFunction{
gpair.resize(preds.size());
// start calculating gradient
const unsigned nstep = static_cast<unsigned>(info.labels.size());
const unsigned ndata = static_cast<unsigned>(preds.size());
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
#pragma omp parallel for schedule(static)
for (unsigned i = 0; i < ndata; ++i) {
for (bst_omp_uint i = 0; i < ndata; ++i) {
const unsigned j = i % nstep;
float p = loss.PredTransform(preds[i]);
float w = info.GetWeight(j);
@@ -132,9 +132,9 @@ class RegLossObj : public IObjFunction{
}
virtual void PredTransform(std::vector<float> *io_preds) {
std::vector<float> &preds = *io_preds;
const unsigned ndata = static_cast<unsigned>(preds.size());
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size());
#pragma omp parallel for schedule(static)
for (unsigned j = 0; j < ndata; ++j) {
for (bst_omp_uint j = 0; j < ndata; ++j) {
preds[j] = loss.PredTransform(preds[j]);
}
}
@@ -169,12 +169,12 @@ class SoftmaxMultiClassObj : public IObjFunction {
std::vector<bst_gpair> &gpair = *out_gpair;
gpair.resize(preds.size());
const unsigned nstep = static_cast<unsigned>(info.labels.size() * nclass);
const unsigned ndata = static_cast<unsigned>(preds.size() / nclass);
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size() / nclass);
#pragma omp parallel
{
std::vector<float> rec(nclass);
#pragma omp for schedule(static)
for (unsigned i = 0; i < ndata; ++i) {
for (bst_omp_uint i = 0; i < ndata; ++i) {
for (int k = 0; k < nclass; ++k) {
rec[k] = preds[i * nclass + k];
}
@@ -210,13 +210,13 @@ class SoftmaxMultiClassObj : public IObjFunction {
utils::Check(nclass != 0, "must set num_class to use softmax");
std::vector<float> &preds = *io_preds;
std::vector<float> tmp;
const unsigned ndata = static_cast<unsigned>(preds.size()/nclass);
const bst_omp_uint ndata = static_cast<bst_omp_uint>(preds.size()/nclass);
if (prob == 0) tmp.resize(ndata);
#pragma omp parallel
{
std::vector<float> rec(nclass);
#pragma omp for schedule(static)
for (unsigned j = 0; j < ndata; ++j) {
for (bst_omp_uint j = 0; j < ndata; ++j) {
for (int k = 0; k < nclass; ++k) {
rec[k] = preds[j * nclass + k];
}
@@ -263,7 +263,7 @@ class LambdaRankObj : public IObjFunction {
const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
utils::Check(gptr.size() != 0 && gptr.back() == info.labels.size(),
"group structure not consistent with #rows");
const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
#pragma omp parallel
{
// parall construct, declare random number generator here, so that each
@@ -273,7 +273,7 @@ class LambdaRankObj : public IObjFunction {
std::vector<ListEntry> lst;
std::vector< std::pair<float, unsigned> > rec;
#pragma omp for schedule(static)
for (unsigned k = 0; k < ngroup; ++k) {
for (bst_omp_uint k = 0; k < ngroup; ++k) {
lst.clear(); pairs.clear();
for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) {
lst.push_back(ListEntry(preds[j], info.labels[j], j));