lint learner finish

This commit is contained in:
tqchen 2015-07-03 19:20:45 -07:00
parent 1581de08da
commit aba41d07cd
8 changed files with 127 additions and 112 deletions

View File

@ -1,11 +1,13 @@
#ifndef XGBOOST_LEARNER_DMATRIX_H_
#define XGBOOST_LEARNER_DMATRIX_H_
/*! /*!
* Copyright 2014 by Contributors
* \file dmatrix.h * \file dmatrix.h
* \brief meta data and template data structure * \brief meta data and template data structure
* used for regression/classification/ranking * used for regression/classification/ranking
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_LEARNER_DMATRIX_H_
#define XGBOOST_LEARNER_DMATRIX_H_
#include <vector> #include <vector>
#include <cstring> #include <cstring>
#include "../data.h" #include "../data.h"
@ -16,8 +18,8 @@ namespace learner {
* \brief meta information needed in training, including label, weight * \brief meta information needed in training, including label, weight
*/ */
struct MetaInfo { struct MetaInfo {
/*! /*!
* \brief information needed by booster * \brief information needed by booster
* BoosterInfo does not implement save and load, * BoosterInfo does not implement save and load,
* all serialization is done in MetaInfo * all serialization is done in MetaInfo
*/ */
@ -31,7 +33,7 @@ struct MetaInfo {
std::vector<bst_uint> group_ptr; std::vector<bst_uint> group_ptr;
/*! \brief weights of each instance, optional */ /*! \brief weights of each instance, optional */
std::vector<float> weights; std::vector<float> weights;
/*! /*!
* \brief initialized margins, * \brief initialized margins,
* if specified, xgboost will start from this init margin * if specified, xgboost will start from this init margin
* can be used to specify initial prediction to boost from * can be used to specify initial prediction to boost from
@ -66,7 +68,7 @@ struct MetaInfo {
return 1.0f; return 1.0f;
} }
} }
inline void SaveBinary(utils::IStream &fo) const { inline void SaveBinary(utils::IStream &fo) const { // NOLINT(*)
int version = kVersion; int version = kVersion;
fo.Write(&version, sizeof(version)); fo.Write(&version, sizeof(version));
fo.Write(&info.num_row, sizeof(info.num_row)); fo.Write(&info.num_row, sizeof(info.num_row));
@ -77,7 +79,7 @@ struct MetaInfo {
fo.Write(info.root_index); fo.Write(info.root_index);
fo.Write(base_margin); fo.Write(base_margin);
} }
inline void LoadBinary(utils::IStream &fi) { inline void LoadBinary(utils::IStream &fi) { // NOLINT(*)
int version; int version;
utils::Check(fi.Read(&version, sizeof(version)) != 0, "MetaInfo: invalid format"); utils::Check(fi.Read(&version, sizeof(version)) != 0, "MetaInfo: invalid format");
utils::Check(fi.Read(&info.num_row, sizeof(info.num_row)) != 0, "MetaInfo: invalid format"); utils::Check(fi.Read(&info.num_row, sizeof(info.num_row)) != 0, "MetaInfo: invalid format");
@ -114,7 +116,7 @@ struct MetaInfo {
return labels; return labels;
} }
inline const std::vector<float>& GetFloatInfo(const char *field) const { inline const std::vector<float>& GetFloatInfo(const char *field) const {
return ((MetaInfo*)this)->GetFloatInfo(field); return ((MetaInfo*)this)->GetFloatInfo(field); // NOLINT(*)
} }
inline std::vector<unsigned> &GetUIntInfo(const char *field) { inline std::vector<unsigned> &GetUIntInfo(const char *field) {
using namespace std; using namespace std;
@ -124,7 +126,7 @@ struct MetaInfo {
return info.root_index; return info.root_index;
} }
inline const std::vector<unsigned> &GetUIntInfo(const char *field) const { inline const std::vector<unsigned> &GetUIntInfo(const char *field) const {
return ((MetaInfo*)this)->GetUIntInfo(field); return ((MetaInfo*)this)->GetUIntInfo(field); // NOLINT(*)
} }
// try to load weight information from file, if exists // try to load weight information from file, if exists
inline bool TryLoadFloatInfo(const char *field, const char* fname, bool silent = false) { inline bool TryLoadFloatInfo(const char *field, const char* fname, bool silent = false) {
@ -149,14 +151,14 @@ struct MetaInfo {
* \tparam FMatrix type of feature data source * \tparam FMatrix type of feature data source
*/ */
struct DMatrix { struct DMatrix {
/*! /*!
* \brief magic number associated with this object * \brief magic number associated with this object
* used to check if it is specific instance * used to check if it is specific instance
*/ */
const int magic; const int magic;
/*! \brief meta information about the dataset */ /*! \brief meta information about the dataset */
MetaInfo info; MetaInfo info;
/*! /*!
* \brief cache pointer to verify if the data structure is cached in some learner * \brief cache pointer to verify if the data structure is cached in some learner
* used to verify if DMatrix is cached * used to verify if DMatrix is cached
*/ */

View File

@ -1,10 +1,12 @@
/*!
* Copyright 2014 by Contributors
* \file xgboost_evaluation-inl.hpp
* \brief evaluation metrics for regression and classification and rank
* \author Kailong Chen, Tianqi Chen
*/
#ifndef XGBOOST_LEARNER_EVALUATION_INL_HPP_ #ifndef XGBOOST_LEARNER_EVALUATION_INL_HPP_
#define XGBOOST_LEARNER_EVALUATION_INL_HPP_ #define XGBOOST_LEARNER_EVALUATION_INL_HPP_
/*!
* \file xgboost_evaluation-inl.hpp
* \brief evaluation metrics for regression and classification and rank
* \author Kailong Chen, Tianqi Chen
*/
#include <vector> #include <vector>
#include <utility> #include <utility>
#include <string> #include <string>
@ -18,8 +20,8 @@
namespace xgboost { namespace xgboost {
namespace learner { namespace learner {
/*! /*!
* \brief base class of elementwise evaluation * \brief base class of elementwise evaluation
* \tparam Derived the name of subclass * \tparam Derived the name of subclass
*/ */
template<typename Derived> template<typename Derived>
@ -47,15 +49,15 @@ struct EvalEWiseBase : public IEvaluator {
} }
return Derived::GetFinal(dat[0], dat[1]); return Derived::GetFinal(dat[0], dat[1]);
} }
/*! /*!
* \brief to be implemented by subclass, * \brief to be implemented by subclass,
* get evaluation result from one row * get evaluation result from one row
* \param label label of current instance * \param label label of current instance
* \param pred prediction value of current instance * \param pred prediction value of current instance
*/ */
inline static float EvalRow(float label, float pred); inline static float EvalRow(float label, float pred);
/*! /*!
* \brief to be overide by subclas, final trasnformation * \brief to be overide by subclas, final trasnformation
* \param esum the sum statistics returned by EvalRow * \param esum the sum statistics returned by EvalRow
* \param wsum sum of weight * \param wsum sum of weight
*/ */
@ -87,9 +89,9 @@ struct EvalLogLoss : public EvalEWiseBase<EvalLogLoss> {
const float eps = 1e-16f; const float eps = 1e-16f;
const float pneg = 1.0f - py; const float pneg = 1.0f - py;
if (py < eps) { if (py < eps) {
return -y * std::log(eps) - (1.0f - y) * std::log(1.0f - eps); return -y * std::log(eps) - (1.0f - y) * std::log(1.0f - eps);
} else if (pneg < eps) { } else if (pneg < eps) {
return -y * std::log(1.0f - eps) - (1.0f - y) * std::log(eps); return -y * std::log(1.0f - eps) - (1.0f - y) * std::log(eps);
} else { } else {
return -y * std::log(py) - (1.0f - y) * std::log(pneg); return -y * std::log(py) - (1.0f - y) * std::log(pneg);
} }
@ -119,7 +121,7 @@ struct EvalPoissionNegLogLik : public EvalEWiseBase<EvalPoissionNegLogLik> {
} }
}; };
/*! /*!
* \brief base class of multi-class evaluation * \brief base class of multi-class evaluation
* \tparam Derived the name of subclass * \tparam Derived the name of subclass
*/ */
@ -139,7 +141,7 @@ struct EvalMClassBase : public IEvaluator {
float sum = 0.0, wsum = 0.0; float sum = 0.0, wsum = 0.0;
int label_error = 0; int label_error = 0;
#pragma omp parallel for reduction(+: sum, wsum) schedule(static) #pragma omp parallel for reduction(+: sum, wsum) schedule(static)
for (bst_omp_uint i = 0; i < ndata; ++i) { for (bst_omp_uint i = 0; i < ndata; ++i) {
const float wt = info.GetWeight(i); const float wt = info.GetWeight(i);
int label = static_cast<int>(info.labels[i]); int label = static_cast<int>(info.labels[i]);
if (label >= 0 && label < static_cast<int>(nclass)) { if (label >= 0 && label < static_cast<int>(nclass)) {
@ -161,18 +163,18 @@ struct EvalMClassBase : public IEvaluator {
} }
return Derived::GetFinal(dat[0], dat[1]); return Derived::GetFinal(dat[0], dat[1]);
} }
/*! /*!
* \brief to be implemented by subclass, * \brief to be implemented by subclass,
* get evaluation result from one row * get evaluation result from one row
* \param label label of current instance * \param label label of current instance
* \param pred prediction value of current instance * \param pred prediction value of current instance
* \param nclass number of class in the prediction * \param nclass number of class in the prediction
*/ */
inline static float EvalRow(int label, inline static float EvalRow(int label,
const float *pred, const float *pred,
size_t nclass); size_t nclass);
/*! /*!
* \brief to be overide by subclas, final trasnformation * \brief to be overide by subclas, final trasnformation
* \param esum the sum statistics returned by EvalRow * \param esum the sum statistics returned by EvalRow
* \param wsum sum of weight * \param wsum sum of weight
*/ */
@ -208,7 +210,7 @@ struct EvalMultiLogLoss : public EvalMClassBase<EvalMultiLogLoss> {
} else { } else {
return -std::log(eps); return -std::log(eps);
} }
} }
}; };
/*! \brief ctest */ /*! \brief ctest */
@ -240,7 +242,7 @@ struct EvalCTest: public IEvaluator {
tpred.push_back(preds[i + (k + 1) * ndata]); tpred.push_back(preds[i + (k + 1) * ndata]);
tinfo.labels.push_back(info.labels[i]); tinfo.labels.push_back(info.labels[i]);
tinfo.weights.push_back(info.GetWeight(i)); tinfo.weights.push_back(info.GetWeight(i));
} }
} }
wsum += base_->Eval(tpred, tinfo); wsum += base_->Eval(tpred, tinfo);
} }
@ -328,7 +330,7 @@ struct EvalPrecisionRatio : public IEvaluator{
const MetaInfo &info, const MetaInfo &info,
bool distributed) const { bool distributed) const {
utils::Check(!distributed, "metric %s do not support distributed evaluation", Name()); utils::Check(!distributed, "metric %s do not support distributed evaluation", Name());
utils::Check(info.labels.size() != 0, "label set cannot be empty"); utils::Check(info.labels.size() != 0, "label set cannot be empty");
utils::Assert(preds.size() % info.labels.size() == 0, utils::Assert(preds.size() % info.labels.size() == 0,
"label size predict size not match"); "label size predict size not match");
std::vector< std::pair<float, unsigned> > rec; std::vector< std::pair<float, unsigned> > rec;
@ -344,7 +346,8 @@ struct EvalPrecisionRatio : public IEvaluator{
} }
protected: protected:
inline double CalcPRatio(const std::vector< std::pair<float, unsigned> >& rec, const MetaInfo &info) const { inline double CalcPRatio(const std::vector< std::pair<float, unsigned> >& rec,
const MetaInfo &info) const {
size_t cutoff = static_cast<size_t>(ratio_ * rec.size()); size_t cutoff = static_cast<size_t>(ratio_ * rec.size());
double wt_hit = 0.0, wsum = 0.0, wt_sum = 0.0; double wt_hit = 0.0, wsum = 0.0, wt_sum = 0.0;
for (size_t j = 0; j < cutoff; ++j) { for (size_t j = 0; j < cutoff; ++j) {
@ -372,7 +375,7 @@ struct EvalAuc : public IEvaluator {
utils::Check(info.labels.size() != 0, "label set cannot be empty"); utils::Check(info.labels.size() != 0, "label set cannot be empty");
utils::Check(preds.size() % info.labels.size() == 0, utils::Check(preds.size() % info.labels.size() == 0,
"label size predict size not match"); "label size predict size not match");
std::vector<unsigned> tgptr(2, 0); std::vector<unsigned> tgptr(2, 0);
tgptr[1] = static_cast<unsigned>(info.labels.size()); tgptr[1] = static_cast<unsigned>(info.labels.size());
const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr; const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
@ -417,8 +420,8 @@ struct EvalAuc : public IEvaluator {
} }
if (distributed) { if (distributed) {
float dat[2]; float dat[2];
dat[0] = static_cast<float>(sum_auc); dat[0] = static_cast<float>(sum_auc);
dat[1] = static_cast<float>(ngroup); dat[1] = static_cast<float>(ngroup);
// approximately estimate auc using mean // approximately estimate auc using mean
rabit::Allreduce<rabit::op::Sum>(dat, 2); rabit::Allreduce<rabit::op::Sum>(dat, 2);
return dat[0] / dat[1]; return dat[0] / dat[1];
@ -463,8 +466,8 @@ struct EvalRankList : public IEvaluator {
} }
if (distributed) { if (distributed) {
float dat[2]; float dat[2];
dat[0] = static_cast<float>(sum_metric); dat[0] = static_cast<float>(sum_metric);
dat[1] = static_cast<float>(ngroup); dat[1] = static_cast<float>(ngroup);
// approximately estimate auc using mean // approximately estimate auc using mean
rabit::Allreduce<rabit::op::Sum>(dat, 2); rabit::Allreduce<rabit::op::Sum>(dat, 2);
return dat[0] / dat[1]; return dat[0] / dat[1];
@ -489,7 +492,7 @@ struct EvalRankList : public IEvaluator {
} }
} }
/*! \return evaluation metric, given the pair_sort record, (pred,label) */ /*! \return evaluation metric, given the pair_sort record, (pred,label) */
virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &pair_sort) const = 0; virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &pair_sort) const = 0; // NOLINT(*)
protected: protected:
unsigned topn_; unsigned topn_;
@ -524,13 +527,13 @@ struct EvalNDCG : public EvalRankList{
double sumdcg = 0.0; double sumdcg = 0.0;
for (size_t i = 0; i < rec.size() && i < this->topn_; ++i) { for (size_t i = 0; i < rec.size() && i < this->topn_; ++i) {
const unsigned rel = rec[i].second; const unsigned rel = rec[i].second;
if (rel != 0) { if (rel != 0) {
sumdcg += ((1 << rel) - 1) / std::log(i + 2.0); sumdcg += ((1 << rel) - 1) / std::log(i + 2.0);
} }
} }
return static_cast<float>(sumdcg); return static_cast<float>(sumdcg);
} }
virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &rec) const { virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &rec) const { // NOLINT(*)
std::stable_sort(rec.begin(), rec.end(), CmpFirst); std::stable_sort(rec.begin(), rec.end(), CmpFirst);
float dcg = this->CalcDCG(rec); float dcg = this->CalcDCG(rec);
std::stable_sort(rec.begin(), rec.end(), CmpSecond); std::stable_sort(rec.begin(), rec.end(), CmpSecond);

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_LEARNER_EVALUATION_H_
#define XGBOOST_LEARNER_EVALUATION_H_
/*! /*!
* Copyright 2014 by Contributors
* \file evaluation.h * \file evaluation.h
* \brief interface of evaluation function supported in xgboost * \brief interface of evaluation function supported in xgboost
* \author Tianqi Chen, Kailong Chen * \author Tianqi Chen, Kailong Chen
*/ */
#ifndef XGBOOST_LEARNER_EVALUATION_H_
#define XGBOOST_LEARNER_EVALUATION_H_
#include <string> #include <string>
#include <vector> #include <vector>
#include <cstdio> #include <cstdio>
@ -19,7 +21,7 @@ struct IEvaluator{
* \brief evaluate a specific metric * \brief evaluate a specific metric
* \param preds prediction * \param preds prediction
* \param info information, including label etc. * \param info information, including label etc.
* \param distributed whether a call to Allreduce is needed to gather * \param distributed whether a call to Allreduce is needed to gather
* the average statistics across all the node, * the average statistics across all the node,
* this is only supported by some metrics * this is only supported by some metrics
*/ */

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_LEARNER_HELPER_UTILS_H_
#define XGBOOST_LEARNER_HELPER_UTILS_H_
/*! /*!
* Copyright 2014 by Contributors
* \file helper_utils.h * \file helper_utils.h
* \brief useful helper functions * \brief useful helper functions
* \author Tianqi Chen, Kailong Chen * \author Tianqi Chen, Kailong Chen
*/ */
#ifndef XGBOOST_LEARNER_HELPER_UTILS_H_
#define XGBOOST_LEARNER_HELPER_UTILS_H_
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <cmath> #include <cmath>
@ -61,7 +63,7 @@ inline float LogSum(const float *rec, size_t size) {
for (size_t i = 0; i < size; ++i) { for (size_t i = 0; i < size; ++i) {
sum += std::exp(rec[i] - mx); sum += std::exp(rec[i] - mx);
} }
return mx + std::log(sum); return mx + std::log(sum);
} }
inline static bool CmpFirst(const std::pair<float, unsigned> &a, inline static bool CmpFirst(const std::pair<float, unsigned> &a,

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_LEARNER_LEARNER_INL_HPP_
#define XGBOOST_LEARNER_LEARNER_INL_HPP_
/*! /*!
* Copyright 2014 by Contributors
* \file learner-inl.hpp * \file learner-inl.hpp
* \brief learning algorithm * \brief learning algorithm
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_LEARNER_LEARNER_INL_HPP_
#define XGBOOST_LEARNER_LEARNER_INL_HPP_
#include <algorithm> #include <algorithm>
#include <vector> #include <vector>
#include <utility> #include <utility>
@ -19,7 +21,7 @@
namespace xgboost { namespace xgboost {
/*! \brief namespace for learning algorithm */ /*! \brief namespace for learning algorithm */
namespace learner { namespace learner {
/*! /*!
* \brief learner that takes do gradient boosting on specific objective functions * \brief learner that takes do gradient boosting on specific objective functions
* and do training and prediction * and do training and prediction
*/ */
@ -30,7 +32,7 @@ class BoostLearner : public rabit::Serializable {
gbm_ = NULL; gbm_ = NULL;
name_obj_ = "reg:linear"; name_obj_ = "reg:linear";
name_gbm_ = "gbtree"; name_gbm_ = "gbtree";
silent= 0; silent = 0;
prob_buffer_row = 1.0f; prob_buffer_row = 1.0f;
distributed_mode = 0; distributed_mode = 0;
updater_mode = 0; updater_mode = 0;
@ -47,10 +49,10 @@ class BoostLearner : public rabit::Serializable {
* \brief add internal cache space for mat, this can speedup prediction for matrix, * \brief add internal cache space for mat, this can speedup prediction for matrix,
* please cache prediction for training and eval data * please cache prediction for training and eval data
* warning: if the model is loaded from file from some previous training history * warning: if the model is loaded from file from some previous training history
* set cache data must be called with exactly SAME * set cache data must be called with exactly SAME
* data matrices to continue training otherwise it will cause error * data matrices to continue training otherwise it will cause error
* \param mats array of pointers to matrix whose prediction result need to be cached * \param mats array of pointers to matrix whose prediction result need to be cached
*/ */
inline void SetCacheData(const std::vector<DMatrix*>& mats) { inline void SetCacheData(const std::vector<DMatrix*>& mats) {
utils::Assert(cache_.size() == 0, "can only call cache data once"); utils::Assert(cache_.size() == 0, "can only call cache data once");
// assign buffer index // assign buffer index
@ -67,10 +69,10 @@ class BoostLearner : public rabit::Serializable {
buffer_size += mats[i]->info.num_row(); buffer_size += mats[i]->info.num_row();
} }
char str_temp[25]; char str_temp[25];
utils::SPrintf(str_temp, sizeof(str_temp), "%lu", utils::SPrintf(str_temp, sizeof(str_temp), "%lu",
static_cast<unsigned long>(buffer_size)); static_cast<unsigned long>(buffer_size)); // NOLINT(*)
this->SetParam("num_pbuffer", str_temp); this->SetParam("num_pbuffer", str_temp);
this->pred_buffer_size = buffer_size; this->pred_buffer_size = buffer_size;
} }
/*! /*!
* \brief set parameters from outside * \brief set parameters from outside
@ -79,7 +81,7 @@ class BoostLearner : public rabit::Serializable {
*/ */
inline void SetParam(const char *name, const char *val) { inline void SetParam(const char *name, const char *val) {
using namespace std; using namespace std;
// in this version, bst: prefix is no longer required // in this version, bst: prefix is no longer required
if (strncmp(name, "bst:", 4) != 0) { if (strncmp(name, "bst:", 4) != 0) {
std::string n = "bst:"; n += name; std::string n = "bst:"; n += name;
this->SetParam(n.c_str(), val); this->SetParam(n.c_str(), val);
@ -119,7 +121,7 @@ class BoostLearner : public rabit::Serializable {
if (!strcmp(name, "objective")) name_obj_ = val; if (!strcmp(name, "objective")) name_obj_ = val;
if (!strcmp(name, "booster")) name_gbm_ = val; if (!strcmp(name, "booster")) name_gbm_ = val;
mparam.SetParam(name, val); mparam.SetParam(name, val);
} }
if (gbm_ != NULL) gbm_->SetParam(name, val); if (gbm_ != NULL) gbm_->SetParam(name, val);
if (obj_ != NULL) obj_->SetParam(name, val); if (obj_ != NULL) obj_->SetParam(name, val);
if (gbm_ == NULL || obj_ == NULL) { if (gbm_ == NULL || obj_ == NULL) {
@ -133,16 +135,16 @@ class BoostLearner : public rabit::Serializable {
// estimate feature bound // estimate feature bound
unsigned num_feature = 0; unsigned num_feature = 0;
for (size_t i = 0; i < cache_.size(); ++i) { for (size_t i = 0; i < cache_.size(); ++i) {
num_feature = std::max(num_feature, num_feature = std::max(num_feature,
static_cast<unsigned>(cache_[i].mat_->info.num_col())); static_cast<unsigned>(cache_[i].mat_->info.num_col()));
} }
// run allreduce on num_feature to find the maximum value // run allreduce on num_feature to find the maximum value
rabit::Allreduce<rabit::op::Max>(&num_feature, 1); rabit::Allreduce<rabit::op::Max>(&num_feature, 1);
if (num_feature > mparam.num_feature) mparam.num_feature = num_feature; if (num_feature > mparam.num_feature) mparam.num_feature = num_feature;
} }
char str_temp[25]; char str_temp[25];
utils::SPrintf(str_temp, sizeof(str_temp), "%d", mparam.num_feature); utils::SPrintf(str_temp, sizeof(str_temp), "%d", mparam.num_feature);
this->SetParam("bst:num_feature", str_temp); this->SetParam("bst:num_feature", str_temp);
} }
/*! /*!
* \brief initialize the model * \brief initialize the model
@ -161,13 +163,13 @@ class BoostLearner : public rabit::Serializable {
* \param fi input stream * \param fi input stream
* \param calc_num_feature whether call InitTrainer with calc_num_feature * \param calc_num_feature whether call InitTrainer with calc_num_feature
*/ */
inline void LoadModel(utils::IStream &fi, inline void LoadModel(utils::IStream &fi, // NOLINT(*)
bool calc_num_feature = true) { bool calc_num_feature = true) {
utils::Check(fi.Read(&mparam, sizeof(ModelParam)) != 0, utils::Check(fi.Read(&mparam, sizeof(ModelParam)) != 0,
"BoostLearner: wrong model format"); "BoostLearner: wrong model format");
{ {
// backward compatibility code for compatible with old model type // backward compatibility code for compatible with old model type
// for new model, Read(&name_obj_) is suffice // for new model, Read(&name_obj_) is suffice
uint64_t len; uint64_t len;
utils::Check(fi.Read(&len, sizeof(len)) != 0, "BoostLearner: wrong model format"); utils::Check(fi.Read(&len, sizeof(len)) != 0, "BoostLearner: wrong model format");
if (len >= std::numeric_limits<unsigned>::max()) { if (len >= std::numeric_limits<unsigned>::max()) {
@ -226,9 +228,9 @@ class BoostLearner : public rabit::Serializable {
fi = utils::IStream::Create(fname, "r"); fi = utils::IStream::Create(fname, "r");
this->LoadModel(*fi, true); this->LoadModel(*fi, true);
} }
delete fi; delete fi;
} }
inline void SaveModel(utils::IStream &fo, bool with_pbuffer) const { inline void SaveModel(utils::IStream &fo, bool with_pbuffer) const { // NOLINT(*)
ModelParam p = mparam; ModelParam p = mparam;
p.saved_with_pbuffer = static_cast<int>(with_pbuffer); p.saved_with_pbuffer = static_cast<int>(with_pbuffer);
fo.Write(&p, sizeof(ModelParam)); fo.Write(&p, sizeof(ModelParam));
@ -247,7 +249,7 @@ class BoostLearner : public rabit::Serializable {
fo->Write("bs64\t", 5); fo->Write("bs64\t", 5);
utils::Base64OutStream bout(fo); utils::Base64OutStream bout(fo);
this->SaveModel(bout, with_pbuffer); this->SaveModel(bout, with_pbuffer);
bout.Finish('\n'); bout.Finish('\n');
} else { } else {
fo->Write("binf", 4); fo->Write("binf", 4);
this->SaveModel(*fo, with_pbuffer); this->SaveModel(*fo, with_pbuffer);
@ -260,7 +262,7 @@ class BoostLearner : public rabit::Serializable {
* \param p_train pointer to the matrix used by training * \param p_train pointer to the matrix used by training
*/ */
inline void CheckInit(DMatrix *p_train) { inline void CheckInit(DMatrix *p_train) {
int ncol = static_cast<int>(p_train->info.info.num_col); int ncol = static_cast<int>(p_train->info.info.num_col);
std::vector<bool> enabled(ncol, true); std::vector<bool> enabled(ncol, true);
// set max row per batch to limited value // set max row per batch to limited value
// in distributed mode, use safe choice otherwise // in distributed mode, use safe choice otherwise
@ -345,10 +347,9 @@ class BoostLearner : public rabit::Serializable {
bool output_margin, bool output_margin,
std::vector<float> *out_preds, std::vector<float> *out_preds,
unsigned ntree_limit = 0, unsigned ntree_limit = 0,
bool pred_leaf = false bool pred_leaf = false) const {
) const {
if (pred_leaf) { if (pred_leaf) {
gbm_->PredictLeaf(data.fmat(), data.info.info, out_preds, ntree_limit); gbm_->PredictLeaf(data.fmat(), data.info.info, out_preds, ntree_limit);
} else { } else {
this->PredictRaw(data, out_preds, ntree_limit); this->PredictRaw(data, out_preds, ntree_limit);
if (!output_margin) { if (!output_margin) {
@ -361,7 +362,7 @@ class BoostLearner : public rabit::Serializable {
* NOTE: use the batch prediction interface if possible, batch prediction is usually * NOTE: use the batch prediction interface if possible, batch prediction is usually
* more efficient than online prediction * more efficient than online prediction
* This function is NOT threadsafe, make sure you only call from one thread * This function is NOT threadsafe, make sure you only call from one thread
* *
* \param inst the instance you want to predict * \param inst the instance you want to predict
* \param output_margin whether to only predict margin value instead of transformed prediction * \param output_margin whether to only predict margin value instead of transformed prediction
* \param out_preds output vector to hold the predictions * \param out_preds output vector to hold the predictions
@ -387,8 +388,8 @@ class BoostLearner : public rabit::Serializable {
} }
protected: protected:
/*! /*!
* \brief initialize the objective function and GBM, * \brief initialize the objective function and GBM,
* if not yet done * if not yet done
*/ */
inline void InitObjGBM(void) { inline void InitObjGBM(void) {
@ -401,12 +402,12 @@ class BoostLearner : public rabit::Serializable {
for (size_t i = 0; i < cfg_.size(); ++i) { for (size_t i = 0; i < cfg_.size(); ++i) {
obj_->SetParam(cfg_[i].first.c_str(), cfg_[i].second.c_str()); obj_->SetParam(cfg_[i].first.c_str(), cfg_[i].second.c_str());
gbm_->SetParam(cfg_[i].first.c_str(), cfg_[i].second.c_str()); gbm_->SetParam(cfg_[i].first.c_str(), cfg_[i].second.c_str());
} }
if (evaluator_.Size() == 0) { if (evaluator_.Size() == 0) {
evaluator_.AddEval(obj_->DefaultEvalMetric()); evaluator_.AddEval(obj_->DefaultEvalMetric());
} }
} }
/*! /*!
* \brief additional default value for specific objs * \brief additional default value for specific objs
*/ */
inline void InitAdditionDefaultParam(void) { inline void InitAdditionDefaultParam(void) {
@ -415,12 +416,12 @@ class BoostLearner : public rabit::Serializable {
gbm_->SetParam("max_delta_step", "0.7"); gbm_->SetParam("max_delta_step", "0.7");
} }
} }
/*! /*!
* \brief get un-transformed prediction * \brief get un-transformed prediction
* \param data training data matrix * \param data training data matrix
* \param out_preds output vector that stores the prediction * \param out_preds output vector that stores the prediction
* \param ntree_limit limit number of trees used for boosted tree * \param ntree_limit limit number of trees used for boosted tree
* predictor, when it equals 0, this means we are using all the trees * predictor, when it equals 0, this means we are using all the trees
*/ */
inline void PredictRaw(const DMatrix &data, inline void PredictRaw(const DMatrix &data,
std::vector<float> *out_preds, std::vector<float> *out_preds,
@ -517,7 +518,7 @@ class BoostLearner : public rabit::Serializable {
protected: protected:
// magic number to transform random seed // magic number to transform random seed
const static int kRandSeedMagic = 127; static const int kRandSeedMagic = 127;
// cache entry object that helps handle feature caching // cache entry object that helps handle feature caching
struct CacheEntry { struct CacheEntry {
const DMatrix *mat_; const DMatrix *mat_;

View File

@ -1,10 +1,12 @@
#ifndef XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
#define XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
/*! /*!
* Copyright 2014 by Contributors
* \file objective-inl.hpp * \file objective-inl.hpp
* \brief objective function implementations * \brief objective function implementations
* \author Tianqi Chen, Kailong Chen * \author Tianqi Chen, Kailong Chen
*/ */
#ifndef XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
#define XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
#include <vector> #include <vector>
#include <algorithm> #include <algorithm>
#include <utility> #include <utility>
@ -176,14 +178,14 @@ class RegLossObj : public IObjFunction {
// poisson regression for count // poisson regression for count
class PoissonRegression : public IObjFunction { class PoissonRegression : public IObjFunction {
public: public:
explicit PoissonRegression(void) { PoissonRegression(void) {
max_delta_step = 0.0f; max_delta_step = 0.0f;
} }
virtual ~PoissonRegression(void) {} virtual ~PoissonRegression(void) {}
virtual void SetParam(const char *name, const char *val) { virtual void SetParam(const char *name, const char *val) {
using namespace std; using namespace std;
if (!strcmp( "max_delta_step", name )) { if (!strcmp("max_delta_step", name)) {
max_delta_step = static_cast<float>(atof(val)); max_delta_step = static_cast<float>(atof(val));
} }
} }
@ -201,9 +203,9 @@ class PoissonRegression : public IObjFunction {
// check if label in range // check if label in range
bool label_correct = true; bool label_correct = true;
// start calculating gradient // start calculating gradient
const long ndata = static_cast<bst_omp_uint>(preds.size()); const long ndata = static_cast<bst_omp_uint>(preds.size()); // NOLINT(*)
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (long i = 0; i < ndata; ++i) { for (long i = 0; i < ndata; ++i) { // NOLINT(*)
float p = preds[i]; float p = preds[i];
float w = info.GetWeight(i); float w = info.GetWeight(i);
float y = info.labels[i]; float y = info.labels[i];
@ -219,9 +221,9 @@ class PoissonRegression : public IObjFunction {
} }
virtual void PredTransform(std::vector<float> *io_preds) { virtual void PredTransform(std::vector<float> *io_preds) {
std::vector<float> &preds = *io_preds; std::vector<float> &preds = *io_preds;
const long ndata = static_cast<long>(preds.size()); const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for (long j = 0; j < ndata; ++j) { for (long j = 0; j < ndata; ++j) { // NOLINT(*)
preds[j] = std::exp(preds[j]); preds[j] = std::exp(preds[j]);
} }
} }
@ -234,7 +236,7 @@ class PoissonRegression : public IObjFunction {
virtual const char* DefaultEvalMetric(void) const { virtual const char* DefaultEvalMetric(void) const {
return "poisson-nloglik"; return "poisson-nloglik";
} }
private: private:
float max_delta_step; float max_delta_step;
}; };
@ -467,7 +469,7 @@ class LambdaRankObj : public IObjFunction {
: pos_index(pos_index), neg_index(neg_index), weight(1.0f) {} : pos_index(pos_index), neg_index(neg_index), weight(1.0f) {}
}; };
/*! /*!
* \brief get lambda weight for existing pairs * \brief get lambda weight for existing pairs
* \param list a list that is sorted by pred score * \param list a list that is sorted by pred score
* \param io_pairs record of pairs, containing the pairs to fill in weights * \param io_pairs record of pairs, containing the pairs to fill in weights
*/ */
@ -555,10 +557,10 @@ class LambdaRankObjMAP : public LambdaRankObj {
float ap_acc; float ap_acc;
/*! /*!
* \brief the accumulated precision, * \brief the accumulated precision,
* assuming a positive instance is missing * assuming a positive instance is missing
*/ */
float ap_acc_miss; float ap_acc_miss;
/*! /*!
* \brief the accumulated precision, * \brief the accumulated precision,
* assuming that one more positive instance is inserted ahead * assuming that one more positive instance is inserted ahead
*/ */

View File

@ -1,11 +1,14 @@
#ifndef XGBOOST_LEARNER_OBJECTIVE_H_
#define XGBOOST_LEARNER_OBJECTIVE_H_
/*! /*!
* Copyright 2014 by Contributors
* \file objective.h * \file objective.h
* \brief interface of objective function used for gradient boosting * \brief interface of objective function used for gradient boosting
* \author Tianqi Chen, Kailong Chen * \author Tianqi Chen, Kailong Chen
*/ */
#include "dmatrix.h" #ifndef XGBOOST_LEARNER_OBJECTIVE_H_
#define XGBOOST_LEARNER_OBJECTIVE_H_
#include <vector>
#include "./dmatrix.h"
namespace xgboost { namespace xgboost {
namespace learner { namespace learner {
@ -13,13 +16,13 @@ namespace learner {
class IObjFunction{ class IObjFunction{
public: public:
/*! \brief virtual destructor */ /*! \brief virtual destructor */
virtual ~IObjFunction(void){} virtual ~IObjFunction(void) {}
/*! /*!
* \brief set parameters from outside * \brief set parameters from outside
* \param name name of the parameter * \param name name of the parameter
* \param val value of the parameter * \param val value of the parameter
*/ */
virtual void SetParam(const char *name, const char *val) = 0; virtual void SetParam(const char *name, const char *val) = 0;
/*! /*!
* \brief get gradient over each of predictions, given existing information * \brief get gradient over each of predictions, given existing information
* \param preds prediction of current round * \param preds prediction of current round
@ -38,9 +41,9 @@ class IObjFunction{
* \brief transform prediction values, this is only called when Prediction is called * \brief transform prediction values, this is only called when Prediction is called
* \param io_preds prediction values, saves to this vector as well * \param io_preds prediction values, saves to this vector as well
*/ */
virtual void PredTransform(std::vector<float> *io_preds){} virtual void PredTransform(std::vector<float> *io_preds) {}
/*! /*!
* \brief transform prediction values, this is only called when Eval is called, * \brief transform prediction values, this is only called when Eval is called,
* usually it redirect to PredTransform * usually it redirect to PredTransform
* \param io_preds prediction values, saves to this vector as well * \param io_preds prediction values, saves to this vector as well
*/ */
@ -49,7 +52,7 @@ class IObjFunction{
} }
/*! /*!
* \brief transform probability value back to margin * \brief transform probability value back to margin
* this is used to transform user-set base_score back to margin * this is used to transform user-set base_score back to margin
* used by gradient boosting * used by gradient boosting
* \return transformed value * \return transformed value
*/ */
@ -77,7 +80,7 @@ inline IObjFunction* CreateObjFunction(const char *name) {
if (!strcmp("multi:softprob", name)) return new SoftmaxMultiClassObj(1); if (!strcmp("multi:softprob", name)) return new SoftmaxMultiClassObj(1);
if (!strcmp("rank:pairwise", name )) return new PairwiseRankObj(); if (!strcmp("rank:pairwise", name )) return new PairwiseRankObj();
if (!strcmp("rank:ndcg", name)) return new LambdaRankObjNDCG(); if (!strcmp("rank:ndcg", name)) return new LambdaRankObjNDCG();
if (!strcmp("rank:map", name)) return new LambdaRankObjMAP(); if (!strcmp("rank:map", name)) return new LambdaRankObjMAP();
utils::Error("unknown objective function type: %s", name); utils::Error("unknown objective function type: %s", name);
return NULL; return NULL;
} }

View File

@ -1,13 +1,13 @@
#ifndef XGBOOST_SYNC_H_
#define XGBOOST_SYNC_H_
/*! /*!
* Copyright 2014 by Contributors
* \file sync.h * \file sync.h
* \brief the synchronization module of rabit * \brief the synchronization module of rabit
* redirects to subtree rabit header * redirects to subtree rabit header
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#ifndef XGBOOST_SYNC_SYNC_H_
#define XGBOOST_SYNC_SYNC_H_
#include "../../subtree/rabit/include/rabit.h" #include "../../subtree/rabit/include/rabit.h"
#include "../../subtree/rabit/include/rabit/timer.h" #include "../../subtree/rabit/include/rabit/timer.h"
#endif // XGBOOST_SYNC_H_ #endif // XGBOOST_SYNC_SYNC_H_