lint learner finish
This commit is contained in:
parent
1581de08da
commit
aba41d07cd
@ -1,11 +1,13 @@
|
||||
#ifndef XGBOOST_LEARNER_DMATRIX_H_
|
||||
#define XGBOOST_LEARNER_DMATRIX_H_
|
||||
/*!
|
||||
* Copyright 2014 by Contributors
|
||||
* \file dmatrix.h
|
||||
* \brief meta data and template data structure
|
||||
* \brief meta data and template data structure
|
||||
* used for regression/classification/ranking
|
||||
* \author Tianqi Chen
|
||||
*/
|
||||
#ifndef XGBOOST_LEARNER_DMATRIX_H_
|
||||
#define XGBOOST_LEARNER_DMATRIX_H_
|
||||
|
||||
#include <vector>
|
||||
#include <cstring>
|
||||
#include "../data.h"
|
||||
@ -16,8 +18,8 @@ namespace learner {
|
||||
* \brief meta information needed in training, including label, weight
|
||||
*/
|
||||
struct MetaInfo {
|
||||
/*!
|
||||
* \brief information needed by booster
|
||||
/*!
|
||||
* \brief information needed by booster
|
||||
* BoosterInfo does not implement save and load,
|
||||
* all serialization is done in MetaInfo
|
||||
*/
|
||||
@ -31,7 +33,7 @@ struct MetaInfo {
|
||||
std::vector<bst_uint> group_ptr;
|
||||
/*! \brief weights of each instance, optional */
|
||||
std::vector<float> weights;
|
||||
/*!
|
||||
/*!
|
||||
* \brief initialized margins,
|
||||
* if specified, xgboost will start from this init margin
|
||||
* can be used to specify initial prediction to boost from
|
||||
@ -66,7 +68,7 @@ struct MetaInfo {
|
||||
return 1.0f;
|
||||
}
|
||||
}
|
||||
inline void SaveBinary(utils::IStream &fo) const {
|
||||
inline void SaveBinary(utils::IStream &fo) const { // NOLINT(*)
|
||||
int version = kVersion;
|
||||
fo.Write(&version, sizeof(version));
|
||||
fo.Write(&info.num_row, sizeof(info.num_row));
|
||||
@ -77,7 +79,7 @@ struct MetaInfo {
|
||||
fo.Write(info.root_index);
|
||||
fo.Write(base_margin);
|
||||
}
|
||||
inline void LoadBinary(utils::IStream &fi) {
|
||||
inline void LoadBinary(utils::IStream &fi) { // NOLINT(*)
|
||||
int version;
|
||||
utils::Check(fi.Read(&version, sizeof(version)) != 0, "MetaInfo: invalid format");
|
||||
utils::Check(fi.Read(&info.num_row, sizeof(info.num_row)) != 0, "MetaInfo: invalid format");
|
||||
@ -114,7 +116,7 @@ struct MetaInfo {
|
||||
return labels;
|
||||
}
|
||||
inline const std::vector<float>& GetFloatInfo(const char *field) const {
|
||||
return ((MetaInfo*)this)->GetFloatInfo(field);
|
||||
return ((MetaInfo*)this)->GetFloatInfo(field); // NOLINT(*)
|
||||
}
|
||||
inline std::vector<unsigned> &GetUIntInfo(const char *field) {
|
||||
using namespace std;
|
||||
@ -124,7 +126,7 @@ struct MetaInfo {
|
||||
return info.root_index;
|
||||
}
|
||||
inline const std::vector<unsigned> &GetUIntInfo(const char *field) const {
|
||||
return ((MetaInfo*)this)->GetUIntInfo(field);
|
||||
return ((MetaInfo*)this)->GetUIntInfo(field); // NOLINT(*)
|
||||
}
|
||||
// try to load weight information from file, if exists
|
||||
inline bool TryLoadFloatInfo(const char *field, const char* fname, bool silent = false) {
|
||||
@ -149,14 +151,14 @@ struct MetaInfo {
|
||||
* \tparam FMatrix type of feature data source
|
||||
*/
|
||||
struct DMatrix {
|
||||
/*!
|
||||
* \brief magic number associated with this object
|
||||
/*!
|
||||
* \brief magic number associated with this object
|
||||
* used to check if it is specific instance
|
||||
*/
|
||||
const int magic;
|
||||
/*! \brief meta information about the dataset */
|
||||
MetaInfo info;
|
||||
/*!
|
||||
/*!
|
||||
* \brief cache pointer to verify if the data structure is cached in some learner
|
||||
* used to verify if DMatrix is cached
|
||||
*/
|
||||
|
||||
@ -1,10 +1,12 @@
|
||||
/*!
|
||||
* Copyright 2014 by Contributors
|
||||
* \file xgboost_evaluation-inl.hpp
|
||||
* \brief evaluation metrics for regression and classification and rank
|
||||
* \author Kailong Chen, Tianqi Chen
|
||||
*/
|
||||
#ifndef XGBOOST_LEARNER_EVALUATION_INL_HPP_
|
||||
#define XGBOOST_LEARNER_EVALUATION_INL_HPP_
|
||||
/*!
|
||||
* \file xgboost_evaluation-inl.hpp
|
||||
* \brief evaluation metrics for regression and classification and rank
|
||||
* \author Kailong Chen, Tianqi Chen
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
@ -18,8 +20,8 @@
|
||||
|
||||
namespace xgboost {
|
||||
namespace learner {
|
||||
/*!
|
||||
* \brief base class of elementwise evaluation
|
||||
/*!
|
||||
* \brief base class of elementwise evaluation
|
||||
* \tparam Derived the name of subclass
|
||||
*/
|
||||
template<typename Derived>
|
||||
@ -47,15 +49,15 @@ struct EvalEWiseBase : public IEvaluator {
|
||||
}
|
||||
return Derived::GetFinal(dat[0], dat[1]);
|
||||
}
|
||||
/*!
|
||||
* \brief to be implemented by subclass,
|
||||
* get evaluation result from one row
|
||||
/*!
|
||||
* \brief to be implemented by subclass,
|
||||
* get evaluation result from one row
|
||||
* \param label label of current instance
|
||||
* \param pred prediction value of current instance
|
||||
*/
|
||||
inline static float EvalRow(float label, float pred);
|
||||
/*!
|
||||
* \brief to be overide by subclas, final trasnformation
|
||||
/*!
|
||||
* \brief to be overide by subclas, final trasnformation
|
||||
* \param esum the sum statistics returned by EvalRow
|
||||
* \param wsum sum of weight
|
||||
*/
|
||||
@ -87,9 +89,9 @@ struct EvalLogLoss : public EvalEWiseBase<EvalLogLoss> {
|
||||
const float eps = 1e-16f;
|
||||
const float pneg = 1.0f - py;
|
||||
if (py < eps) {
|
||||
return -y * std::log(eps) - (1.0f - y) * std::log(1.0f - eps);
|
||||
return -y * std::log(eps) - (1.0f - y) * std::log(1.0f - eps);
|
||||
} else if (pneg < eps) {
|
||||
return -y * std::log(1.0f - eps) - (1.0f - y) * std::log(eps);
|
||||
return -y * std::log(1.0f - eps) - (1.0f - y) * std::log(eps);
|
||||
} else {
|
||||
return -y * std::log(py) - (1.0f - y) * std::log(pneg);
|
||||
}
|
||||
@ -119,7 +121,7 @@ struct EvalPoissionNegLogLik : public EvalEWiseBase<EvalPoissionNegLogLik> {
|
||||
}
|
||||
};
|
||||
|
||||
/*!
|
||||
/*!
|
||||
* \brief base class of multi-class evaluation
|
||||
* \tparam Derived the name of subclass
|
||||
*/
|
||||
@ -139,7 +141,7 @@ struct EvalMClassBase : public IEvaluator {
|
||||
float sum = 0.0, wsum = 0.0;
|
||||
int label_error = 0;
|
||||
#pragma omp parallel for reduction(+: sum, wsum) schedule(static)
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
const float wt = info.GetWeight(i);
|
||||
int label = static_cast<int>(info.labels[i]);
|
||||
if (label >= 0 && label < static_cast<int>(nclass)) {
|
||||
@ -161,18 +163,18 @@ struct EvalMClassBase : public IEvaluator {
|
||||
}
|
||||
return Derived::GetFinal(dat[0], dat[1]);
|
||||
}
|
||||
/*!
|
||||
* \brief to be implemented by subclass,
|
||||
* get evaluation result from one row
|
||||
/*!
|
||||
* \brief to be implemented by subclass,
|
||||
* get evaluation result from one row
|
||||
* \param label label of current instance
|
||||
* \param pred prediction value of current instance
|
||||
* \param pred prediction value of current instance
|
||||
* \param nclass number of class in the prediction
|
||||
*/
|
||||
inline static float EvalRow(int label,
|
||||
const float *pred,
|
||||
size_t nclass);
|
||||
/*!
|
||||
* \brief to be overide by subclas, final trasnformation
|
||||
/*!
|
||||
* \brief to be overide by subclas, final trasnformation
|
||||
* \param esum the sum statistics returned by EvalRow
|
||||
* \param wsum sum of weight
|
||||
*/
|
||||
@ -208,7 +210,7 @@ struct EvalMultiLogLoss : public EvalMClassBase<EvalMultiLogLoss> {
|
||||
} else {
|
||||
return -std::log(eps);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/*! \brief ctest */
|
||||
@ -240,7 +242,7 @@ struct EvalCTest: public IEvaluator {
|
||||
tpred.push_back(preds[i + (k + 1) * ndata]);
|
||||
tinfo.labels.push_back(info.labels[i]);
|
||||
tinfo.weights.push_back(info.GetWeight(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
wsum += base_->Eval(tpred, tinfo);
|
||||
}
|
||||
@ -328,7 +330,7 @@ struct EvalPrecisionRatio : public IEvaluator{
|
||||
const MetaInfo &info,
|
||||
bool distributed) const {
|
||||
utils::Check(!distributed, "metric %s do not support distributed evaluation", Name());
|
||||
utils::Check(info.labels.size() != 0, "label set cannot be empty");
|
||||
utils::Check(info.labels.size() != 0, "label set cannot be empty");
|
||||
utils::Assert(preds.size() % info.labels.size() == 0,
|
||||
"label size predict size not match");
|
||||
std::vector< std::pair<float, unsigned> > rec;
|
||||
@ -344,7 +346,8 @@ struct EvalPrecisionRatio : public IEvaluator{
|
||||
}
|
||||
|
||||
protected:
|
||||
inline double CalcPRatio(const std::vector< std::pair<float, unsigned> >& rec, const MetaInfo &info) const {
|
||||
inline double CalcPRatio(const std::vector< std::pair<float, unsigned> >& rec,
|
||||
const MetaInfo &info) const {
|
||||
size_t cutoff = static_cast<size_t>(ratio_ * rec.size());
|
||||
double wt_hit = 0.0, wsum = 0.0, wt_sum = 0.0;
|
||||
for (size_t j = 0; j < cutoff; ++j) {
|
||||
@ -372,7 +375,7 @@ struct EvalAuc : public IEvaluator {
|
||||
utils::Check(info.labels.size() != 0, "label set cannot be empty");
|
||||
utils::Check(preds.size() % info.labels.size() == 0,
|
||||
"label size predict size not match");
|
||||
std::vector<unsigned> tgptr(2, 0);
|
||||
std::vector<unsigned> tgptr(2, 0);
|
||||
tgptr[1] = static_cast<unsigned>(info.labels.size());
|
||||
|
||||
const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
|
||||
@ -417,8 +420,8 @@ struct EvalAuc : public IEvaluator {
|
||||
}
|
||||
if (distributed) {
|
||||
float dat[2];
|
||||
dat[0] = static_cast<float>(sum_auc);
|
||||
dat[1] = static_cast<float>(ngroup);
|
||||
dat[0] = static_cast<float>(sum_auc);
|
||||
dat[1] = static_cast<float>(ngroup);
|
||||
// approximately estimate auc using mean
|
||||
rabit::Allreduce<rabit::op::Sum>(dat, 2);
|
||||
return dat[0] / dat[1];
|
||||
@ -463,8 +466,8 @@ struct EvalRankList : public IEvaluator {
|
||||
}
|
||||
if (distributed) {
|
||||
float dat[2];
|
||||
dat[0] = static_cast<float>(sum_metric);
|
||||
dat[1] = static_cast<float>(ngroup);
|
||||
dat[0] = static_cast<float>(sum_metric);
|
||||
dat[1] = static_cast<float>(ngroup);
|
||||
// approximately estimate auc using mean
|
||||
rabit::Allreduce<rabit::op::Sum>(dat, 2);
|
||||
return dat[0] / dat[1];
|
||||
@ -489,7 +492,7 @@ struct EvalRankList : public IEvaluator {
|
||||
}
|
||||
}
|
||||
/*! \return evaluation metric, given the pair_sort record, (pred,label) */
|
||||
virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &pair_sort) const = 0;
|
||||
virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &pair_sort) const = 0; // NOLINT(*)
|
||||
|
||||
protected:
|
||||
unsigned topn_;
|
||||
@ -524,13 +527,13 @@ struct EvalNDCG : public EvalRankList{
|
||||
double sumdcg = 0.0;
|
||||
for (size_t i = 0; i < rec.size() && i < this->topn_; ++i) {
|
||||
const unsigned rel = rec[i].second;
|
||||
if (rel != 0) {
|
||||
if (rel != 0) {
|
||||
sumdcg += ((1 << rel) - 1) / std::log(i + 2.0);
|
||||
}
|
||||
}
|
||||
return static_cast<float>(sumdcg);
|
||||
}
|
||||
virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &rec) const {
|
||||
virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &rec) const { // NOLINT(*)
|
||||
std::stable_sort(rec.begin(), rec.end(), CmpFirst);
|
||||
float dcg = this->CalcDCG(rec);
|
||||
std::stable_sort(rec.begin(), rec.end(), CmpSecond);
|
||||
|
||||
@ -1,10 +1,12 @@
|
||||
#ifndef XGBOOST_LEARNER_EVALUATION_H_
|
||||
#define XGBOOST_LEARNER_EVALUATION_H_
|
||||
/*!
|
||||
* Copyright 2014 by Contributors
|
||||
* \file evaluation.h
|
||||
* \brief interface of evaluation function supported in xgboost
|
||||
* \author Tianqi Chen, Kailong Chen
|
||||
*/
|
||||
#ifndef XGBOOST_LEARNER_EVALUATION_H_
|
||||
#define XGBOOST_LEARNER_EVALUATION_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <cstdio>
|
||||
@ -19,7 +21,7 @@ struct IEvaluator{
|
||||
* \brief evaluate a specific metric
|
||||
* \param preds prediction
|
||||
* \param info information, including label etc.
|
||||
* \param distributed whether a call to Allreduce is needed to gather
|
||||
* \param distributed whether a call to Allreduce is needed to gather
|
||||
* the average statistics across all the node,
|
||||
* this is only supported by some metrics
|
||||
*/
|
||||
|
||||
@ -1,10 +1,12 @@
|
||||
#ifndef XGBOOST_LEARNER_HELPER_UTILS_H_
|
||||
#define XGBOOST_LEARNER_HELPER_UTILS_H_
|
||||
/*!
|
||||
* Copyright 2014 by Contributors
|
||||
* \file helper_utils.h
|
||||
* \brief useful helper functions
|
||||
* \author Tianqi Chen, Kailong Chen
|
||||
*/
|
||||
#ifndef XGBOOST_LEARNER_HELPER_UTILS_H_
|
||||
#define XGBOOST_LEARNER_HELPER_UTILS_H_
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <cmath>
|
||||
@ -61,7 +63,7 @@ inline float LogSum(const float *rec, size_t size) {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
sum += std::exp(rec[i] - mx);
|
||||
}
|
||||
return mx + std::log(sum);
|
||||
return mx + std::log(sum);
|
||||
}
|
||||
|
||||
inline static bool CmpFirst(const std::pair<float, unsigned> &a,
|
||||
|
||||
@ -1,10 +1,12 @@
|
||||
#ifndef XGBOOST_LEARNER_LEARNER_INL_HPP_
|
||||
#define XGBOOST_LEARNER_LEARNER_INL_HPP_
|
||||
/*!
|
||||
* Copyright 2014 by Contributors
|
||||
* \file learner-inl.hpp
|
||||
* \brief learning algorithm
|
||||
* \brief learning algorithm
|
||||
* \author Tianqi Chen
|
||||
*/
|
||||
#ifndef XGBOOST_LEARNER_LEARNER_INL_HPP_
|
||||
#define XGBOOST_LEARNER_LEARNER_INL_HPP_
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
@ -19,7 +21,7 @@
|
||||
namespace xgboost {
|
||||
/*! \brief namespace for learning algorithm */
|
||||
namespace learner {
|
||||
/*!
|
||||
/*!
|
||||
* \brief learner that takes do gradient boosting on specific objective functions
|
||||
* and do training and prediction
|
||||
*/
|
||||
@ -30,7 +32,7 @@ class BoostLearner : public rabit::Serializable {
|
||||
gbm_ = NULL;
|
||||
name_obj_ = "reg:linear";
|
||||
name_gbm_ = "gbtree";
|
||||
silent= 0;
|
||||
silent = 0;
|
||||
prob_buffer_row = 1.0f;
|
||||
distributed_mode = 0;
|
||||
updater_mode = 0;
|
||||
@ -47,10 +49,10 @@ class BoostLearner : public rabit::Serializable {
|
||||
* \brief add internal cache space for mat, this can speedup prediction for matrix,
|
||||
* please cache prediction for training and eval data
|
||||
* warning: if the model is loaded from file from some previous training history
|
||||
* set cache data must be called with exactly SAME
|
||||
* set cache data must be called with exactly SAME
|
||||
* data matrices to continue training otherwise it will cause error
|
||||
* \param mats array of pointers to matrix whose prediction result need to be cached
|
||||
*/
|
||||
*/
|
||||
inline void SetCacheData(const std::vector<DMatrix*>& mats) {
|
||||
utils::Assert(cache_.size() == 0, "can only call cache data once");
|
||||
// assign buffer index
|
||||
@ -67,10 +69,10 @@ class BoostLearner : public rabit::Serializable {
|
||||
buffer_size += mats[i]->info.num_row();
|
||||
}
|
||||
char str_temp[25];
|
||||
utils::SPrintf(str_temp, sizeof(str_temp), "%lu",
|
||||
static_cast<unsigned long>(buffer_size));
|
||||
utils::SPrintf(str_temp, sizeof(str_temp), "%lu",
|
||||
static_cast<unsigned long>(buffer_size)); // NOLINT(*)
|
||||
this->SetParam("num_pbuffer", str_temp);
|
||||
this->pred_buffer_size = buffer_size;
|
||||
this->pred_buffer_size = buffer_size;
|
||||
}
|
||||
/*!
|
||||
* \brief set parameters from outside
|
||||
@ -79,7 +81,7 @@ class BoostLearner : public rabit::Serializable {
|
||||
*/
|
||||
inline void SetParam(const char *name, const char *val) {
|
||||
using namespace std;
|
||||
// in this version, bst: prefix is no longer required
|
||||
// in this version, bst: prefix is no longer required
|
||||
if (strncmp(name, "bst:", 4) != 0) {
|
||||
std::string n = "bst:"; n += name;
|
||||
this->SetParam(n.c_str(), val);
|
||||
@ -119,7 +121,7 @@ class BoostLearner : public rabit::Serializable {
|
||||
if (!strcmp(name, "objective")) name_obj_ = val;
|
||||
if (!strcmp(name, "booster")) name_gbm_ = val;
|
||||
mparam.SetParam(name, val);
|
||||
}
|
||||
}
|
||||
if (gbm_ != NULL) gbm_->SetParam(name, val);
|
||||
if (obj_ != NULL) obj_->SetParam(name, val);
|
||||
if (gbm_ == NULL || obj_ == NULL) {
|
||||
@ -133,16 +135,16 @@ class BoostLearner : public rabit::Serializable {
|
||||
// estimate feature bound
|
||||
unsigned num_feature = 0;
|
||||
for (size_t i = 0; i < cache_.size(); ++i) {
|
||||
num_feature = std::max(num_feature,
|
||||
num_feature = std::max(num_feature,
|
||||
static_cast<unsigned>(cache_[i].mat_->info.num_col()));
|
||||
}
|
||||
// run allreduce on num_feature to find the maximum value
|
||||
rabit::Allreduce<rabit::op::Max>(&num_feature, 1);
|
||||
if (num_feature > mparam.num_feature) mparam.num_feature = num_feature;
|
||||
}
|
||||
}
|
||||
char str_temp[25];
|
||||
utils::SPrintf(str_temp, sizeof(str_temp), "%d", mparam.num_feature);
|
||||
this->SetParam("bst:num_feature", str_temp);
|
||||
this->SetParam("bst:num_feature", str_temp);
|
||||
}
|
||||
/*!
|
||||
* \brief initialize the model
|
||||
@ -161,13 +163,13 @@ class BoostLearner : public rabit::Serializable {
|
||||
* \param fi input stream
|
||||
* \param calc_num_feature whether call InitTrainer with calc_num_feature
|
||||
*/
|
||||
inline void LoadModel(utils::IStream &fi,
|
||||
inline void LoadModel(utils::IStream &fi, // NOLINT(*)
|
||||
bool calc_num_feature = true) {
|
||||
utils::Check(fi.Read(&mparam, sizeof(ModelParam)) != 0,
|
||||
"BoostLearner: wrong model format");
|
||||
{
|
||||
// backward compatibility code for compatible with old model type
|
||||
// for new model, Read(&name_obj_) is suffice
|
||||
// for new model, Read(&name_obj_) is suffice
|
||||
uint64_t len;
|
||||
utils::Check(fi.Read(&len, sizeof(len)) != 0, "BoostLearner: wrong model format");
|
||||
if (len >= std::numeric_limits<unsigned>::max()) {
|
||||
@ -226,9 +228,9 @@ class BoostLearner : public rabit::Serializable {
|
||||
fi = utils::IStream::Create(fname, "r");
|
||||
this->LoadModel(*fi, true);
|
||||
}
|
||||
delete fi;
|
||||
delete fi;
|
||||
}
|
||||
inline void SaveModel(utils::IStream &fo, bool with_pbuffer) const {
|
||||
inline void SaveModel(utils::IStream &fo, bool with_pbuffer) const { // NOLINT(*)
|
||||
ModelParam p = mparam;
|
||||
p.saved_with_pbuffer = static_cast<int>(with_pbuffer);
|
||||
fo.Write(&p, sizeof(ModelParam));
|
||||
@ -247,7 +249,7 @@ class BoostLearner : public rabit::Serializable {
|
||||
fo->Write("bs64\t", 5);
|
||||
utils::Base64OutStream bout(fo);
|
||||
this->SaveModel(bout, with_pbuffer);
|
||||
bout.Finish('\n');
|
||||
bout.Finish('\n');
|
||||
} else {
|
||||
fo->Write("binf", 4);
|
||||
this->SaveModel(*fo, with_pbuffer);
|
||||
@ -260,7 +262,7 @@ class BoostLearner : public rabit::Serializable {
|
||||
* \param p_train pointer to the matrix used by training
|
||||
*/
|
||||
inline void CheckInit(DMatrix *p_train) {
|
||||
int ncol = static_cast<int>(p_train->info.info.num_col);
|
||||
int ncol = static_cast<int>(p_train->info.info.num_col);
|
||||
std::vector<bool> enabled(ncol, true);
|
||||
// set max row per batch to limited value
|
||||
// in distributed mode, use safe choice otherwise
|
||||
@ -345,10 +347,9 @@ class BoostLearner : public rabit::Serializable {
|
||||
bool output_margin,
|
||||
std::vector<float> *out_preds,
|
||||
unsigned ntree_limit = 0,
|
||||
bool pred_leaf = false
|
||||
) const {
|
||||
bool pred_leaf = false) const {
|
||||
if (pred_leaf) {
|
||||
gbm_->PredictLeaf(data.fmat(), data.info.info, out_preds, ntree_limit);
|
||||
gbm_->PredictLeaf(data.fmat(), data.info.info, out_preds, ntree_limit);
|
||||
} else {
|
||||
this->PredictRaw(data, out_preds, ntree_limit);
|
||||
if (!output_margin) {
|
||||
@ -361,7 +362,7 @@ class BoostLearner : public rabit::Serializable {
|
||||
* NOTE: use the batch prediction interface if possible, batch prediction is usually
|
||||
* more efficient than online prediction
|
||||
* This function is NOT threadsafe, make sure you only call from one thread
|
||||
*
|
||||
*
|
||||
* \param inst the instance you want to predict
|
||||
* \param output_margin whether to only predict margin value instead of transformed prediction
|
||||
* \param out_preds output vector to hold the predictions
|
||||
@ -387,8 +388,8 @@ class BoostLearner : public rabit::Serializable {
|
||||
}
|
||||
|
||||
protected:
|
||||
/*!
|
||||
* \brief initialize the objective function and GBM,
|
||||
/*!
|
||||
* \brief initialize the objective function and GBM,
|
||||
* if not yet done
|
||||
*/
|
||||
inline void InitObjGBM(void) {
|
||||
@ -401,12 +402,12 @@ class BoostLearner : public rabit::Serializable {
|
||||
for (size_t i = 0; i < cfg_.size(); ++i) {
|
||||
obj_->SetParam(cfg_[i].first.c_str(), cfg_[i].second.c_str());
|
||||
gbm_->SetParam(cfg_[i].first.c_str(), cfg_[i].second.c_str());
|
||||
}
|
||||
}
|
||||
if (evaluator_.Size() == 0) {
|
||||
evaluator_.AddEval(obj_->DefaultEvalMetric());
|
||||
}
|
||||
}
|
||||
/*!
|
||||
/*!
|
||||
* \brief additional default value for specific objs
|
||||
*/
|
||||
inline void InitAdditionDefaultParam(void) {
|
||||
@ -415,12 +416,12 @@ class BoostLearner : public rabit::Serializable {
|
||||
gbm_->SetParam("max_delta_step", "0.7");
|
||||
}
|
||||
}
|
||||
/*!
|
||||
/*!
|
||||
* \brief get un-transformed prediction
|
||||
* \param data training data matrix
|
||||
* \param out_preds output vector that stores the prediction
|
||||
* \param ntree_limit limit number of trees used for boosted tree
|
||||
* predictor, when it equals 0, this means we are using all the trees
|
||||
* predictor, when it equals 0, this means we are using all the trees
|
||||
*/
|
||||
inline void PredictRaw(const DMatrix &data,
|
||||
std::vector<float> *out_preds,
|
||||
@ -517,7 +518,7 @@ class BoostLearner : public rabit::Serializable {
|
||||
|
||||
protected:
|
||||
// magic number to transform random seed
|
||||
const static int kRandSeedMagic = 127;
|
||||
static const int kRandSeedMagic = 127;
|
||||
// cache entry object that helps handle feature caching
|
||||
struct CacheEntry {
|
||||
const DMatrix *mat_;
|
||||
|
||||
@ -1,10 +1,12 @@
|
||||
#ifndef XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
|
||||
#define XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
|
||||
/*!
|
||||
* Copyright 2014 by Contributors
|
||||
* \file objective-inl.hpp
|
||||
* \brief objective function implementations
|
||||
* \author Tianqi Chen, Kailong Chen
|
||||
*/
|
||||
#ifndef XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
|
||||
#define XGBOOST_LEARNER_OBJECTIVE_INL_HPP_
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
@ -176,14 +178,14 @@ class RegLossObj : public IObjFunction {
|
||||
// poisson regression for count
|
||||
class PoissonRegression : public IObjFunction {
|
||||
public:
|
||||
explicit PoissonRegression(void) {
|
||||
PoissonRegression(void) {
|
||||
max_delta_step = 0.0f;
|
||||
}
|
||||
virtual ~PoissonRegression(void) {}
|
||||
|
||||
|
||||
virtual void SetParam(const char *name, const char *val) {
|
||||
using namespace std;
|
||||
if (!strcmp( "max_delta_step", name )) {
|
||||
if (!strcmp("max_delta_step", name)) {
|
||||
max_delta_step = static_cast<float>(atof(val));
|
||||
}
|
||||
}
|
||||
@ -201,9 +203,9 @@ class PoissonRegression : public IObjFunction {
|
||||
// check if label in range
|
||||
bool label_correct = true;
|
||||
// start calculating gradient
|
||||
const long ndata = static_cast<bst_omp_uint>(preds.size());
|
||||
const long ndata = static_cast<bst_omp_uint>(preds.size()); // NOLINT(*)
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (long i = 0; i < ndata; ++i) {
|
||||
for (long i = 0; i < ndata; ++i) { // NOLINT(*)
|
||||
float p = preds[i];
|
||||
float w = info.GetWeight(i);
|
||||
float y = info.labels[i];
|
||||
@ -219,9 +221,9 @@ class PoissonRegression : public IObjFunction {
|
||||
}
|
||||
virtual void PredTransform(std::vector<float> *io_preds) {
|
||||
std::vector<float> &preds = *io_preds;
|
||||
const long ndata = static_cast<long>(preds.size());
|
||||
const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (long j = 0; j < ndata; ++j) {
|
||||
for (long j = 0; j < ndata; ++j) { // NOLINT(*)
|
||||
preds[j] = std::exp(preds[j]);
|
||||
}
|
||||
}
|
||||
@ -234,7 +236,7 @@ class PoissonRegression : public IObjFunction {
|
||||
virtual const char* DefaultEvalMetric(void) const {
|
||||
return "poisson-nloglik";
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
float max_delta_step;
|
||||
};
|
||||
@ -467,7 +469,7 @@ class LambdaRankObj : public IObjFunction {
|
||||
: pos_index(pos_index), neg_index(neg_index), weight(1.0f) {}
|
||||
};
|
||||
/*!
|
||||
* \brief get lambda weight for existing pairs
|
||||
* \brief get lambda weight for existing pairs
|
||||
* \param list a list that is sorted by pred score
|
||||
* \param io_pairs record of pairs, containing the pairs to fill in weights
|
||||
*/
|
||||
@ -555,10 +557,10 @@ class LambdaRankObjMAP : public LambdaRankObj {
|
||||
float ap_acc;
|
||||
/*!
|
||||
* \brief the accumulated precision,
|
||||
* assuming a positive instance is missing
|
||||
* assuming a positive instance is missing
|
||||
*/
|
||||
float ap_acc_miss;
|
||||
/*!
|
||||
/*!
|
||||
* \brief the accumulated precision,
|
||||
* assuming that one more positive instance is inserted ahead
|
||||
*/
|
||||
|
||||
@ -1,11 +1,14 @@
|
||||
#ifndef XGBOOST_LEARNER_OBJECTIVE_H_
|
||||
#define XGBOOST_LEARNER_OBJECTIVE_H_
|
||||
/*!
|
||||
* Copyright 2014 by Contributors
|
||||
* \file objective.h
|
||||
* \brief interface of objective function used for gradient boosting
|
||||
* \author Tianqi Chen, Kailong Chen
|
||||
*/
|
||||
#include "dmatrix.h"
|
||||
#ifndef XGBOOST_LEARNER_OBJECTIVE_H_
|
||||
#define XGBOOST_LEARNER_OBJECTIVE_H_
|
||||
|
||||
#include <vector>
|
||||
#include "./dmatrix.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace learner {
|
||||
@ -13,13 +16,13 @@ namespace learner {
|
||||
class IObjFunction{
|
||||
public:
|
||||
/*! \brief virtual destructor */
|
||||
virtual ~IObjFunction(void){}
|
||||
virtual ~IObjFunction(void) {}
|
||||
/*!
|
||||
* \brief set parameters from outside
|
||||
* \param name name of the parameter
|
||||
* \param val value of the parameter
|
||||
*/
|
||||
virtual void SetParam(const char *name, const char *val) = 0;
|
||||
virtual void SetParam(const char *name, const char *val) = 0;
|
||||
/*!
|
||||
* \brief get gradient over each of predictions, given existing information
|
||||
* \param preds prediction of current round
|
||||
@ -38,9 +41,9 @@ class IObjFunction{
|
||||
* \brief transform prediction values, this is only called when Prediction is called
|
||||
* \param io_preds prediction values, saves to this vector as well
|
||||
*/
|
||||
virtual void PredTransform(std::vector<float> *io_preds){}
|
||||
virtual void PredTransform(std::vector<float> *io_preds) {}
|
||||
/*!
|
||||
* \brief transform prediction values, this is only called when Eval is called,
|
||||
* \brief transform prediction values, this is only called when Eval is called,
|
||||
* usually it redirect to PredTransform
|
||||
* \param io_preds prediction values, saves to this vector as well
|
||||
*/
|
||||
@ -49,7 +52,7 @@ class IObjFunction{
|
||||
}
|
||||
/*!
|
||||
* \brief transform probability value back to margin
|
||||
* this is used to transform user-set base_score back to margin
|
||||
* this is used to transform user-set base_score back to margin
|
||||
* used by gradient boosting
|
||||
* \return transformed value
|
||||
*/
|
||||
@ -77,7 +80,7 @@ inline IObjFunction* CreateObjFunction(const char *name) {
|
||||
if (!strcmp("multi:softprob", name)) return new SoftmaxMultiClassObj(1);
|
||||
if (!strcmp("rank:pairwise", name )) return new PairwiseRankObj();
|
||||
if (!strcmp("rank:ndcg", name)) return new LambdaRankObjNDCG();
|
||||
if (!strcmp("rank:map", name)) return new LambdaRankObjMAP();
|
||||
if (!strcmp("rank:map", name)) return new LambdaRankObjMAP();
|
||||
utils::Error("unknown objective function type: %s", name);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
#ifndef XGBOOST_SYNC_H_
|
||||
#define XGBOOST_SYNC_H_
|
||||
/*!
|
||||
* Copyright 2014 by Contributors
|
||||
* \file sync.h
|
||||
* \brief the synchronization module of rabit
|
||||
* redirects to subtree rabit header
|
||||
* \author Tianqi Chen
|
||||
*/
|
||||
#ifndef XGBOOST_SYNC_SYNC_H_
|
||||
#define XGBOOST_SYNC_SYNC_H_
|
||||
|
||||
#include "../../subtree/rabit/include/rabit.h"
|
||||
#include "../../subtree/rabit/include/rabit/timer.h"
|
||||
#endif // XGBOOST_SYNC_H_
|
||||
|
||||
|
||||
#endif // XGBOOST_SYNC_SYNC_H_
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user