spelling, wording, and doc fixes in c++ code
I was reading through the code and fixing some things in the comments. Only a few trivial actual code changes were made to make things more readable.
This commit is contained in:
@@ -35,7 +35,7 @@ struct MetaInfo {
|
||||
std::vector<float> weights;
|
||||
/*!
|
||||
* \brief initialized margins,
|
||||
* if specified, xgboost will start from this init margin
|
||||
* if specified, xgboost will start from this initial margin
|
||||
* can be used to specify initial prediction to boost from
|
||||
*/
|
||||
std::vector<float> base_margin;
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
namespace xgboost {
|
||||
namespace learner {
|
||||
/*!
|
||||
* \brief base class of elementwise evaluation
|
||||
* \brief base class of element-wise evaluation
|
||||
* \tparam Derived the name of subclass
|
||||
*/
|
||||
template<typename Derived>
|
||||
@@ -57,7 +57,7 @@ struct EvalEWiseBase : public IEvaluator {
|
||||
*/
|
||||
inline static float EvalRow(float label, float pred);
|
||||
/*!
|
||||
* \brief to be overide by subclas, final trasnformation
|
||||
* \brief to be overridden by subclass, final transformation
|
||||
* \param esum the sum statistics returned by EvalRow
|
||||
* \param wsum sum of weight
|
||||
*/
|
||||
@@ -109,7 +109,7 @@ struct EvalError : public EvalEWiseBase<EvalError> {
|
||||
}
|
||||
};
|
||||
|
||||
/*! \brief loglikelihood of poission distribution */
|
||||
/*! \brief log-likelihood of Poission distribution */
|
||||
struct EvalPoissionNegLogLik : public EvalEWiseBase<EvalPoissionNegLogLik> {
|
||||
virtual const char *Name(void) const {
|
||||
return "poisson-nloglik";
|
||||
@@ -174,7 +174,7 @@ struct EvalMClassBase : public IEvaluator {
|
||||
const float *pred,
|
||||
size_t nclass);
|
||||
/*!
|
||||
* \brief to be overide by subclas, final trasnformation
|
||||
* \brief to be overridden by subclass, final transformation
|
||||
* \param esum the sum statistics returned by EvalRow
|
||||
* \param wsum sum of weight
|
||||
*/
|
||||
@@ -367,7 +367,7 @@ struct EvalPrecisionRatio : public IEvaluator{
|
||||
std::string name_;
|
||||
};
|
||||
|
||||
/*! \brief Area under curve, for both classification and rank */
|
||||
/*! \brief Area Under Curve, for both classification and rank */
|
||||
struct EvalAuc : public IEvaluator {
|
||||
virtual float Eval(const std::vector<float> &preds,
|
||||
const MetaInfo &info,
|
||||
@@ -382,7 +382,7 @@ struct EvalAuc : public IEvaluator {
|
||||
utils::Check(gptr.back() == info.labels.size(),
|
||||
"EvalAuc: group structure must match number of prediction");
|
||||
const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
|
||||
// sum statictis
|
||||
// sum statistics
|
||||
double sum_auc = 0.0f;
|
||||
#pragma omp parallel reduction(+:sum_auc)
|
||||
{
|
||||
@@ -404,13 +404,16 @@ struct EvalAuc : public IEvaluator {
|
||||
// keep bucketing predictions in same bucket
|
||||
if (j != 0 && rec[j].first != rec[j - 1].first) {
|
||||
sum_pospair += buf_neg * (sum_npos + buf_pos *0.5);
|
||||
sum_npos += buf_pos; sum_nneg += buf_neg;
|
||||
sum_npos += buf_pos;
|
||||
sum_nneg += buf_neg;
|
||||
buf_neg = buf_pos = 0.0f;
|
||||
}
|
||||
buf_pos += ctr * wt; buf_neg += (1.0f - ctr) * wt;
|
||||
buf_pos += ctr * wt;
|
||||
buf_neg += (1.0f - ctr) * wt;
|
||||
}
|
||||
sum_pospair += buf_neg * (sum_npos + buf_pos *0.5);
|
||||
sum_npos += buf_pos; sum_nneg += buf_neg;
|
||||
sum_npos += buf_pos;
|
||||
sum_nneg += buf_neg;
|
||||
// check weird conditions
|
||||
utils::Check(sum_npos > 0.0 && sum_nneg > 0.0,
|
||||
"AUC: the dataset only contains pos or neg samples");
|
||||
@@ -443,7 +446,8 @@ struct EvalRankList : public IEvaluator {
|
||||
utils::Check(preds.size() == info.labels.size(),
|
||||
"label size predict size not match");
|
||||
// quick consistency when group is not available
|
||||
std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(preds.size());
|
||||
std::vector<unsigned> tgptr(2, 0);
|
||||
tgptr[1] = static_cast<unsigned>(preds.size());
|
||||
const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
|
||||
utils::Assert(gptr.size() != 0, "must specify group when constructing rank file");
|
||||
utils::Assert(gptr.back() == preds.size(),
|
||||
@@ -468,7 +472,7 @@ struct EvalRankList : public IEvaluator {
|
||||
float dat[2];
|
||||
dat[0] = static_cast<float>(sum_metric);
|
||||
dat[1] = static_cast<float>(ngroup);
|
||||
// approximately estimate auc using mean
|
||||
// approximately estimate the metric using mean
|
||||
rabit::Allreduce<rabit::op::Sum>(dat, 2);
|
||||
return dat[0] / dat[1];
|
||||
} else {
|
||||
@@ -500,14 +504,14 @@ struct EvalRankList : public IEvaluator {
|
||||
bool minus_;
|
||||
};
|
||||
|
||||
/*! \brief Precison at N, for both classification and rank */
|
||||
/*! \brief Precision at N, for both classification and rank */
|
||||
struct EvalPrecision : public EvalRankList{
|
||||
public:
|
||||
explicit EvalPrecision(const char *name) : EvalRankList(name) {}
|
||||
|
||||
protected:
|
||||
virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &rec) const {
|
||||
// calculate Preicsion
|
||||
// calculate Precision
|
||||
std::sort(rec.begin(), rec.end(), CmpFirst);
|
||||
unsigned nhit = 0;
|
||||
for (size_t j = 0; j < rec.size() && j < this->topn_; ++j) {
|
||||
@@ -517,7 +521,7 @@ struct EvalPrecision : public EvalRankList{
|
||||
}
|
||||
};
|
||||
|
||||
/*! \brief NDCG */
|
||||
/*! \brief NDCG: Normalized Discounted Cumulative Gain at N */
|
||||
struct EvalNDCG : public EvalRankList{
|
||||
public:
|
||||
explicit EvalNDCG(const char *name) : EvalRankList(name) {}
|
||||
@@ -549,7 +553,7 @@ struct EvalNDCG : public EvalRankList{
|
||||
}
|
||||
};
|
||||
|
||||
/*! \brief Precison at N, for both classification and rank */
|
||||
/*! \brief Mean Average Precision at N, for both classification and rank */
|
||||
struct EvalMAP : public EvalRankList {
|
||||
public:
|
||||
explicit EvalMAP(const char *name) : EvalRankList(name) {}
|
||||
|
||||
@@ -45,7 +45,7 @@ inline static int FindMaxIndex(const std::vector<float>& rec) {
|
||||
return FindMaxIndex(BeginPtr(rec), rec.size());
|
||||
}
|
||||
|
||||
// perform numerical safe logsum
|
||||
// perform numerically safe logsum
|
||||
inline float LogSum(float x, float y) {
|
||||
if (x < y) {
|
||||
return y + std::log(std::exp(x - y) + 1.0f);
|
||||
@@ -53,7 +53,7 @@ inline float LogSum(float x, float y) {
|
||||
return x + std::log(std::exp(y - x) + 1.0f);
|
||||
}
|
||||
}
|
||||
// numerical safe logsum
|
||||
// numerically safe logsum
|
||||
inline float LogSum(const float *rec, size_t size) {
|
||||
float mx = rec[0];
|
||||
for (size_t i = 1; i < size; ++i) {
|
||||
@@ -66,11 +66,11 @@ inline float LogSum(const float *rec, size_t size) {
|
||||
return mx + std::log(sum);
|
||||
}
|
||||
|
||||
// comparator functions for sorting pairs in descending order
|
||||
inline static bool CmpFirst(const std::pair<float, unsigned> &a,
|
||||
const std::pair<float, unsigned> &b) {
|
||||
return a.first > b.first;
|
||||
}
|
||||
|
||||
inline static bool CmpSecond(const std::pair<float, unsigned> &a,
|
||||
const std::pair<float, unsigned> &b) {
|
||||
return a.second > b.second;
|
||||
|
||||
@@ -22,8 +22,8 @@ namespace xgboost {
|
||||
/*! \brief namespace for learning algorithm */
|
||||
namespace learner {
|
||||
/*!
|
||||
* \brief learner that takes do gradient boosting on specific objective functions
|
||||
* and do training and prediction
|
||||
* \brief learner that performs gradient boosting for a specific objective function.
|
||||
* It does training and prediction.
|
||||
*/
|
||||
class BoostLearner : public rabit::Serializable {
|
||||
public:
|
||||
@@ -258,7 +258,7 @@ class BoostLearner : public rabit::Serializable {
|
||||
}
|
||||
/*!
|
||||
* \brief check if data matrix is ready to be used by training,
|
||||
* if not intialize it
|
||||
* if not initialize it
|
||||
* \param p_train pointer to the matrix used by training
|
||||
*/
|
||||
inline void CheckInit(DMatrix *p_train) {
|
||||
@@ -283,7 +283,7 @@ class BoostLearner : public rabit::Serializable {
|
||||
/*!
|
||||
* \brief update the model for one iteration
|
||||
* \param iter current iteration number
|
||||
* \param p_train pointer to the data matrix
|
||||
* \param train reference to the data matrix
|
||||
*/
|
||||
inline void UpdateOneIter(int iter, const DMatrix &train) {
|
||||
if (seed_per_iteration != 0 || rabit::IsDistributed()) {
|
||||
@@ -342,6 +342,7 @@ class BoostLearner : public rabit::Serializable {
|
||||
* \param out_preds output vector that stores the prediction
|
||||
* \param ntree_limit limit number of trees used for boosted tree
|
||||
* predictor, when it equals 0, this means we are using all the trees
|
||||
* \param pred_leaf whether to only predict the leaf index of each tree in a boosted tree predictor
|
||||
*/
|
||||
inline void Predict(const DMatrix &data,
|
||||
bool output_margin,
|
||||
@@ -358,7 +359,7 @@ class BoostLearner : public rabit::Serializable {
|
||||
}
|
||||
}
|
||||
/*!
|
||||
* \brief online prediction funciton, predict score for one instance at a time
|
||||
* \brief online prediction function, predict score for one instance at a time
|
||||
* NOTE: use the batch prediction interface if possible, batch prediction is usually
|
||||
* more efficient than online prediction
|
||||
* This function is NOT threadsafe, make sure you only call from one thread
|
||||
@@ -367,7 +368,6 @@ class BoostLearner : public rabit::Serializable {
|
||||
* \param output_margin whether to only predict margin value instead of transformed prediction
|
||||
* \param out_preds output vector to hold the predictions
|
||||
* \param ntree_limit limit the number of trees used in prediction
|
||||
* \param root_index the root index
|
||||
* \sa Predict
|
||||
*/
|
||||
inline void Predict(const SparseBatch::Inst &inst,
|
||||
@@ -452,7 +452,7 @@ class BoostLearner : public rabit::Serializable {
|
||||
float base_score;
|
||||
/* \brief number of features */
|
||||
unsigned num_feature;
|
||||
/* \brief number of class, if it is multi-class classification */
|
||||
/* \brief number of classes, if it is multi-class classification */
|
||||
int num_class;
|
||||
/*! \brief whether the model itself is saved with pbuffer */
|
||||
int saved_with_pbuffer;
|
||||
@@ -495,7 +495,7 @@ class BoostLearner : public rabit::Serializable {
|
||||
int updater_mode;
|
||||
// cached size of predict buffer
|
||||
size_t pred_buffer_size;
|
||||
// maximum buffred row value
|
||||
// maximum buffered row value
|
||||
float prob_buffer_row;
|
||||
// evaluation set
|
||||
EvalSet evaluator_;
|
||||
@@ -505,13 +505,13 @@ class BoostLearner : public rabit::Serializable {
|
||||
gbm::IGradBooster *gbm_;
|
||||
// name of gbm model used for training
|
||||
std::string name_gbm_;
|
||||
// objective fnction
|
||||
// objective function
|
||||
IObjFunction *obj_;
|
||||
// name of objective function
|
||||
std::string name_obj_;
|
||||
// configurations
|
||||
std::vector< std::pair<std::string, std::string> > cfg_;
|
||||
// temporal storages for prediciton
|
||||
// temporal storages for prediction
|
||||
std::vector<float> preds_;
|
||||
// gradient pairs
|
||||
std::vector<bst_gpair> gpair_;
|
||||
@@ -527,7 +527,7 @@ class BoostLearner : public rabit::Serializable {
|
||||
CacheEntry(const DMatrix *mat, size_t buffer_offset, size_t num_row)
|
||||
:mat_(mat), buffer_offset_(buffer_offset), num_row_(num_row) {}
|
||||
};
|
||||
// find internal bufer offset for certain matrix, if not exist, return -1
|
||||
// find internal buffer offset for certain matrix, if not exist, return -1
|
||||
inline int64_t FindBufferOffset(const DMatrix &mat) const {
|
||||
for (size_t i = 0; i < cache_.size(); ++i) {
|
||||
if (cache_[i].mat_ == &mat && mat.cache_learner_ptr_ == this) {
|
||||
|
||||
@@ -84,7 +84,7 @@ struct LossType {
|
||||
* \return second order gradient
|
||||
*/
|
||||
inline float SecondOrderGradient(float predt, float label) const {
|
||||
// cap second order gradient to postive value
|
||||
// cap second order gradient to positive value
|
||||
const float eps = 1e-16f;
|
||||
switch (loss_type) {
|
||||
case kLinearSquare: return 1.0f;
|
||||
|
||||
@@ -68,7 +68,7 @@ class IObjFunction{
|
||||
// factory function
|
||||
namespace xgboost {
|
||||
namespace learner {
|
||||
/*! \brief factory funciton to create objective function by name */
|
||||
/*! \brief factory function to create objective function by name */
|
||||
inline IObjFunction* CreateObjFunction(const char *name) {
|
||||
using namespace std;
|
||||
if (!strcmp("reg:linear", name)) return new RegLossObj(LossType::kLinearSquare);
|
||||
|
||||
Reference in New Issue
Block a user