From c70022e6c46b744ca4e828dd443371f9d12e70d4 Mon Sep 17 00:00:00 2001 From: Vadim Khotilovich Date: Sat, 12 Dec 2015 21:40:12 -0600 Subject: [PATCH] spelling, wording, and doc fixes in c++ code I was reading through the code and fixing some things in the comments. Only a few trivial actual code changes were made to make things more readable. --- src/data.h | 18 ++++++++-------- src/gbm/gbm.h | 6 +++--- src/gbm/gbtree-inl.hpp | 6 +++--- src/io/io.h | 4 ++-- src/io/libsvm_parser.h | 2 +- src/io/page_fmatrix-inl.hpp | 10 +++++---- src/io/simple_dmatrix-inl.hpp | 4 ++-- src/io/simple_fmatrix-inl.hpp | 6 +++--- src/io/sparse_batch_page.h | 3 +-- src/learner/dmatrix.h | 2 +- src/learner/evaluation-inl.hpp | 34 +++++++++++++++++-------------- src/learner/helper_utils.h | 6 +++--- src/learner/learner-inl.hpp | 22 ++++++++++---------- src/learner/objective-inl.hpp | 2 +- src/learner/objective.h | 2 +- src/tree/model.h | 20 +++++++++--------- src/tree/param.h | 22 ++++++++++---------- src/tree/updater.h | 6 +++--- src/tree/updater_colmaker-inl.hpp | 2 +- src/tree/updater_prune-inl.hpp | 6 +++--- src/utils/base64-inl.h | 4 ++-- src/utils/fmap.h | 2 +- src/utils/iterator.h | 2 +- src/utils/quantile.h | 14 ++++++------- src/utils/random.h | 2 +- src/utils/thread_buffer.h | 8 ++++---- src/utils/utils.h | 6 +++--- 27 files changed, 113 insertions(+), 108 deletions(-) diff --git a/src/data.h b/src/data.h index 3c4a14987..9bcb84ced 100644 --- a/src/data.h +++ b/src/data.h @@ -14,7 +14,7 @@ namespace xgboost { /*! - * \brief unsigned interger type used in boost, + * \brief unsigned integer type used in boost, * used for feature index and row index */ typedef unsigned bst_uint; @@ -35,8 +35,8 @@ struct bst_gpair { }; /*! - * \brief extra information that might needed by gbm and tree module - * these information are not necessarily presented, and can be empty + * \brief extra information that might be needed by gbm and tree module + * this information is not necessarily present, and can be empty */ struct BoosterInfo { /*! \brief number of rows in the data */ @@ -53,7 +53,7 @@ struct BoosterInfo { /*! \brief number of rows, number of columns */ BoosterInfo(void) : num_row(0), num_col(0) { } - /*! \brief get root of ith instance */ + /*! \brief get root of i-th instance */ inline unsigned GetRoot(size_t i) const { return root_index.size() == 0 ? 0 : root_index[i]; } @@ -120,13 +120,13 @@ struct ColBatch : public SparseBatch { }; /** * \brief interface of feature matrix, needed for tree construction - * this interface defines two way to access features, - * row access is defined by iterator of RowBatch - * col access is optional, checked by HaveColAccess, and defined by iterator of ColBatch + * this interface defines two ways to access features: + * row access is defined by iterator of RowBatch + * col access is optional, checked by HaveColAccess, and defined by iterator of ColBatch */ class IFMatrix { public: - // the interface only need to ganrantee row iter + // the interface only need to guarantee row iter // column iter is active, when ColIterator is called, row_iter can be disabled /*! \brief get the row iterator associated with FMatrix */ virtual utils::IIterator *RowIterator(void) = 0; @@ -142,7 +142,7 @@ class IFMatrix { * \brief check if column access is supported, if not, initialize column access * \param enabled whether certain feature should be included in column access * \param subsample subsample ratio when generating column access - * \param max_row_perbatch auxilary information, maximum row used in each column batch + * \param max_row_perbatch auxiliary information, maximum row used in each column batch * this is a hint information that can be ignored by the implementation */ virtual void InitColAccess(const std::vector &enabled, diff --git a/src/gbm/gbm.h b/src/gbm/gbm.h index 60b7474e1..8ff692c05 100644 --- a/src/gbm/gbm.h +++ b/src/gbm/gbm.h @@ -58,7 +58,7 @@ class IGradBooster { return false; } /*! - * \brief peform update to the model(boosting) + * \brief perform update to the model(boosting) * \param p_fmat feature matrix that provide access to features * \param buffer_offset buffer index offset of these instances, if equals -1 * this means we do not have buffer index allocated to the gbm @@ -88,7 +88,7 @@ class IGradBooster { std::vector *out_preds, unsigned ntree_limit = 0) = 0; /*! - * \brief online prediction funciton, predict score for one instance at a time + * \brief online prediction function, predict score for one instance at a time * NOTE: use the batch prediction interface if possible, batch prediction is usually * more efficient than online prediction * This function is NOT threadsafe, make sure you only call from one thread @@ -119,7 +119,7 @@ class IGradBooster { /*! * \brief dump the model in text format * \param fmap feature map that may help give interpretations of feature - * \param option extra option of the dumo model + * \param option extra option of the dump model * \return a vector of dump for boosters */ virtual std::vector DumpModel(const utils::FeatMap& fmap, int option) = 0; diff --git a/src/gbm/gbtree-inl.hpp b/src/gbm/gbtree-inl.hpp index c06dc51a1..65fe7e9da 100644 --- a/src/gbm/gbtree-inl.hpp +++ b/src/gbm/gbtree-inl.hpp @@ -31,7 +31,7 @@ class GBTree : public IGradBooster { using namespace std; if (!strncmp(name, "bst:", 4)) { cfg.push_back(std::make_pair(std::string(name+4), std::string(val))); - // set into updaters, if already intialized + // set into updaters, if already initialized for (size_t i = 0; i < updaters.size(); ++i) { updaters[i]->SetParam(name+4, val); } @@ -85,7 +85,7 @@ class GBTree : public IGradBooster { fo.Write(BeginPtr(pred_counter), pred_counter.size() * sizeof(unsigned)); } } - // initialize the predic buffer + // initialize the predict buffer virtual void InitModel(void) { pred_buffer.clear(); pred_counter.clear(); pred_buffer.resize(mparam.PredBufferSize(), 0.0f); @@ -446,7 +446,7 @@ class GBTree : public IGradBooster { int num_roots; /*! \brief number of features to be used by trees */ int num_feature; - /*! \brief size of predicton buffer allocated used for buffering */ + /*! \brief size of prediction buffer allocated used for buffering */ int64_t num_pbuffer; /*! * \brief how many output group a single instance can produce diff --git a/src/io/io.h b/src/io/io.h index 267bb0bff..6ceff2698 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -22,7 +22,7 @@ typedef learner::DMatrix DataMatrix; * \param silent whether print message during loading * \param savebuffer whether temporal buffer the file if the file is in text format * \param loadsplit whether we only load a split of input files - * such that each worker node get a split of the data + * such that each worker node get a split of the data * \param cache_file name of cache_file, used by external memory version * can be NULL, if cache_file is specified, this will be the temporal * space that can be re-used to store intermediate data @@ -38,7 +38,7 @@ DataMatrix* LoadDataMatrix(const char *fname, * note: the saved dmatrix format may not be in exactly same as input * SaveDMatrix will choose the best way to materialize the dmatrix. * \param dmat the dmatrix to be saved - * \param fname file name to be savd + * \param fname file name to be saved * \param silent whether print message during saving */ void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent = false); diff --git a/src/io/libsvm_parser.h b/src/io/libsvm_parser.h index 92eeaf35d..43b8d6b90 100644 --- a/src/io/libsvm_parser.h +++ b/src/io/libsvm_parser.h @@ -31,7 +31,7 @@ struct LibSVMPage : public SparsePage { /*! * \brief libsvm parser that parses the input lines * and returns rows in input data - * factry that was used by threadbuffer template + * factory that was used by threadbuffer template */ class LibSVMPageFactory { public: diff --git a/src/io/page_fmatrix-inl.hpp b/src/io/page_fmatrix-inl.hpp index 2fa5c83bd..d2b71e50f 100644 --- a/src/io/page_fmatrix-inl.hpp +++ b/src/io/page_fmatrix-inl.hpp @@ -200,7 +200,7 @@ class FMatrixPage : public IFMatrix { virtual bool HaveColAccess(void) const { return col_size_.size() != 0; } - /*! \brief get number of colmuns */ + /*! \brief get number of columns */ virtual size_t NumCol(void) const { utils::Check(this->HaveColAccess(), "NumCol:need column access"); return col_size_.size(); @@ -246,7 +246,7 @@ class FMatrixPage : public IFMatrix { return &col_iter_; } /*! - * \brief colmun based iterator + * \brief column based iterator */ virtual utils::IIterator *ColIterator(const std::vector &fset) { size_t ncol = this->NumCol(); @@ -290,8 +290,10 @@ class FMatrixPage : public IFMatrix { fo->Write(col_size_); } /*! - * \brief intialize column data + * \brief initialize column data + * \param enabled the list of enabled columns * \param pkeep probability to keep a row + * \param max_row_perbatch maximum row per batch */ inline void InitColData(const std::vector &enabled, float pkeep, size_t max_row_perbatch) { @@ -319,7 +321,7 @@ class FMatrixPage : public IFMatrix { bytes_write += spage; double tnow = rabit::utils::GetTime(); double tdiff = tnow - tstart; - utils::Printf("Writting to %s in %g MB/s, %lu MB written\n", + utils::Printf("Writing to %s in %g MB/s, %lu MB written\n", col_data_name_.c_str(), (bytes_write >> 20UL) / tdiff, (bytes_write >> 20UL)); diff --git a/src/io/simple_dmatrix-inl.hpp b/src/io/simple_dmatrix-inl.hpp index 190cbdcdf..063b01665 100644 --- a/src/io/simple_dmatrix-inl.hpp +++ b/src/io/simple_dmatrix-inl.hpp @@ -51,7 +51,7 @@ class DMatrixSimple : public DataMatrix { inline void CopyFrom(const DataMatrix &src) { this->Clear(); this->info = src.info; - // clone data content in thos matrix + // clone data contents from src matrix utils::IIterator *iter = src.fmat()->RowIterator(); iter->BeforeFirst(); while (iter->Next()) { @@ -313,7 +313,7 @@ class DMatrixSimple : public DataMatrix { private: // whether is at first bool at_first_; - // pointer to parient + // pointer to parent DMatrixSimple *parent_; // temporal space for batch RowBatch batch_; diff --git a/src/io/simple_fmatrix-inl.hpp b/src/io/simple_fmatrix-inl.hpp index 0e0da4461..e467263fa 100644 --- a/src/io/simple_fmatrix-inl.hpp +++ b/src/io/simple_fmatrix-inl.hpp @@ -40,7 +40,7 @@ class FMatrixS : public IFMatrix { virtual bool HaveColAccess(void) const { return col_size_.size() != 0; } - /*! \brief get number of colmuns */ + /*! \brief get number of columns */ virtual size_t NumCol(void) const { utils::Check(this->HaveColAccess(), "NumCol:need column access"); return col_size_.size(); @@ -83,7 +83,7 @@ class FMatrixS : public IFMatrix { return &col_iter_; } /*! - * \brief colmun based iterator + * \brief column based iterator */ virtual utils::IIterator *ColIterator(const std::vector &fset) { size_t ncol = this->NumCol(); @@ -112,7 +112,7 @@ class FMatrixS : public IFMatrix { protected: /*! - * \brief intialize column data + * \brief initialize column data * \param enabled the list of enabled columns * \param pkeep probability to keep a row * \param max_row_perbatch maximum row per batch diff --git a/src/io/sparse_batch_page.h b/src/io/sparse_batch_page.h index 24546f785..96810c0fb 100644 --- a/src/io/sparse_batch_page.h +++ b/src/io/sparse_batch_page.h @@ -33,8 +33,7 @@ class SparsePage { return offset.size() - 1; } /*! - * \brief load the by providing a list of interested segments - * only the interested segments are loaded + * \brief load only the segments we are interested in * \param fi the input stream of the file * \param sorted_index_set sorted index of segments we are interested in * \return true of the loading as successful, false if end of file was reached diff --git a/src/learner/dmatrix.h b/src/learner/dmatrix.h index 3fbc579de..52828c3be 100644 --- a/src/learner/dmatrix.h +++ b/src/learner/dmatrix.h @@ -35,7 +35,7 @@ struct MetaInfo { std::vector weights; /*! * \brief initialized margins, - * if specified, xgboost will start from this init margin + * if specified, xgboost will start from this initial margin * can be used to specify initial prediction to boost from */ std::vector base_margin; diff --git a/src/learner/evaluation-inl.hpp b/src/learner/evaluation-inl.hpp index 2b69a43a8..d28702728 100644 --- a/src/learner/evaluation-inl.hpp +++ b/src/learner/evaluation-inl.hpp @@ -21,7 +21,7 @@ namespace xgboost { namespace learner { /*! - * \brief base class of elementwise evaluation + * \brief base class of element-wise evaluation * \tparam Derived the name of subclass */ template @@ -57,7 +57,7 @@ struct EvalEWiseBase : public IEvaluator { */ inline static float EvalRow(float label, float pred); /*! - * \brief to be overide by subclas, final trasnformation + * \brief to be overridden by subclass, final transformation * \param esum the sum statistics returned by EvalRow * \param wsum sum of weight */ @@ -109,7 +109,7 @@ struct EvalError : public EvalEWiseBase { } }; -/*! \brief loglikelihood of poission distribution */ +/*! \brief log-likelihood of Poission distribution */ struct EvalPoissionNegLogLik : public EvalEWiseBase { virtual const char *Name(void) const { return "poisson-nloglik"; @@ -174,7 +174,7 @@ struct EvalMClassBase : public IEvaluator { const float *pred, size_t nclass); /*! - * \brief to be overide by subclas, final trasnformation + * \brief to be overridden by subclass, final transformation * \param esum the sum statistics returned by EvalRow * \param wsum sum of weight */ @@ -367,7 +367,7 @@ struct EvalPrecisionRatio : public IEvaluator{ std::string name_; }; -/*! \brief Area under curve, for both classification and rank */ +/*! \brief Area Under Curve, for both classification and rank */ struct EvalAuc : public IEvaluator { virtual float Eval(const std::vector &preds, const MetaInfo &info, @@ -382,7 +382,7 @@ struct EvalAuc : public IEvaluator { utils::Check(gptr.back() == info.labels.size(), "EvalAuc: group structure must match number of prediction"); const bst_omp_uint ngroup = static_cast(gptr.size() - 1); - // sum statictis + // sum statistics double sum_auc = 0.0f; #pragma omp parallel reduction(+:sum_auc) { @@ -404,13 +404,16 @@ struct EvalAuc : public IEvaluator { // keep bucketing predictions in same bucket if (j != 0 && rec[j].first != rec[j - 1].first) { sum_pospair += buf_neg * (sum_npos + buf_pos *0.5); - sum_npos += buf_pos; sum_nneg += buf_neg; + sum_npos += buf_pos; + sum_nneg += buf_neg; buf_neg = buf_pos = 0.0f; } - buf_pos += ctr * wt; buf_neg += (1.0f - ctr) * wt; + buf_pos += ctr * wt; + buf_neg += (1.0f - ctr) * wt; } sum_pospair += buf_neg * (sum_npos + buf_pos *0.5); - sum_npos += buf_pos; sum_nneg += buf_neg; + sum_npos += buf_pos; + sum_nneg += buf_neg; // check weird conditions utils::Check(sum_npos > 0.0 && sum_nneg > 0.0, "AUC: the dataset only contains pos or neg samples"); @@ -443,7 +446,8 @@ struct EvalRankList : public IEvaluator { utils::Check(preds.size() == info.labels.size(), "label size predict size not match"); // quick consistency when group is not available - std::vector tgptr(2, 0); tgptr[1] = static_cast(preds.size()); + std::vector tgptr(2, 0); + tgptr[1] = static_cast(preds.size()); const std::vector &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr; utils::Assert(gptr.size() != 0, "must specify group when constructing rank file"); utils::Assert(gptr.back() == preds.size(), @@ -468,7 +472,7 @@ struct EvalRankList : public IEvaluator { float dat[2]; dat[0] = static_cast(sum_metric); dat[1] = static_cast(ngroup); - // approximately estimate auc using mean + // approximately estimate the metric using mean rabit::Allreduce(dat, 2); return dat[0] / dat[1]; } else { @@ -500,14 +504,14 @@ struct EvalRankList : public IEvaluator { bool minus_; }; -/*! \brief Precison at N, for both classification and rank */ +/*! \brief Precision at N, for both classification and rank */ struct EvalPrecision : public EvalRankList{ public: explicit EvalPrecision(const char *name) : EvalRankList(name) {} protected: virtual float EvalMetric(std::vector< std::pair > &rec) const { - // calculate Preicsion + // calculate Precision std::sort(rec.begin(), rec.end(), CmpFirst); unsigned nhit = 0; for (size_t j = 0; j < rec.size() && j < this->topn_; ++j) { @@ -517,7 +521,7 @@ struct EvalPrecision : public EvalRankList{ } }; -/*! \brief NDCG */ +/*! \brief NDCG: Normalized Discounted Cumulative Gain at N */ struct EvalNDCG : public EvalRankList{ public: explicit EvalNDCG(const char *name) : EvalRankList(name) {} @@ -549,7 +553,7 @@ struct EvalNDCG : public EvalRankList{ } }; -/*! \brief Precison at N, for both classification and rank */ +/*! \brief Mean Average Precision at N, for both classification and rank */ struct EvalMAP : public EvalRankList { public: explicit EvalMAP(const char *name) : EvalRankList(name) {} diff --git a/src/learner/helper_utils.h b/src/learner/helper_utils.h index 7ca7ba59c..0db1b46f3 100644 --- a/src/learner/helper_utils.h +++ b/src/learner/helper_utils.h @@ -45,7 +45,7 @@ inline static int FindMaxIndex(const std::vector& rec) { return FindMaxIndex(BeginPtr(rec), rec.size()); } -// perform numerical safe logsum +// perform numerically safe logsum inline float LogSum(float x, float y) { if (x < y) { return y + std::log(std::exp(x - y) + 1.0f); @@ -53,7 +53,7 @@ inline float LogSum(float x, float y) { return x + std::log(std::exp(y - x) + 1.0f); } } -// numerical safe logsum +// numerically safe logsum inline float LogSum(const float *rec, size_t size) { float mx = rec[0]; for (size_t i = 1; i < size; ++i) { @@ -66,11 +66,11 @@ inline float LogSum(const float *rec, size_t size) { return mx + std::log(sum); } +// comparator functions for sorting pairs in descending order inline static bool CmpFirst(const std::pair &a, const std::pair &b) { return a.first > b.first; } - inline static bool CmpSecond(const std::pair &a, const std::pair &b) { return a.second > b.second; diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp index f051992d3..0e8480663 100644 --- a/src/learner/learner-inl.hpp +++ b/src/learner/learner-inl.hpp @@ -22,8 +22,8 @@ namespace xgboost { /*! \brief namespace for learning algorithm */ namespace learner { /*! - * \brief learner that takes do gradient boosting on specific objective functions - * and do training and prediction + * \brief learner that performs gradient boosting for a specific objective function. + * It does training and prediction. */ class BoostLearner : public rabit::Serializable { public: @@ -258,7 +258,7 @@ class BoostLearner : public rabit::Serializable { } /*! * \brief check if data matrix is ready to be used by training, - * if not intialize it + * if not initialize it * \param p_train pointer to the matrix used by training */ inline void CheckInit(DMatrix *p_train) { @@ -283,7 +283,7 @@ class BoostLearner : public rabit::Serializable { /*! * \brief update the model for one iteration * \param iter current iteration number - * \param p_train pointer to the data matrix + * \param train reference to the data matrix */ inline void UpdateOneIter(int iter, const DMatrix &train) { if (seed_per_iteration != 0 || rabit::IsDistributed()) { @@ -342,6 +342,7 @@ class BoostLearner : public rabit::Serializable { * \param out_preds output vector that stores the prediction * \param ntree_limit limit number of trees used for boosted tree * predictor, when it equals 0, this means we are using all the trees + * \param pred_leaf whether to only predict the leaf index of each tree in a boosted tree predictor */ inline void Predict(const DMatrix &data, bool output_margin, @@ -358,7 +359,7 @@ class BoostLearner : public rabit::Serializable { } } /*! - * \brief online prediction funciton, predict score for one instance at a time + * \brief online prediction function, predict score for one instance at a time * NOTE: use the batch prediction interface if possible, batch prediction is usually * more efficient than online prediction * This function is NOT threadsafe, make sure you only call from one thread @@ -367,7 +368,6 @@ class BoostLearner : public rabit::Serializable { * \param output_margin whether to only predict margin value instead of transformed prediction * \param out_preds output vector to hold the predictions * \param ntree_limit limit the number of trees used in prediction - * \param root_index the root index * \sa Predict */ inline void Predict(const SparseBatch::Inst &inst, @@ -452,7 +452,7 @@ class BoostLearner : public rabit::Serializable { float base_score; /* \brief number of features */ unsigned num_feature; - /* \brief number of class, if it is multi-class classification */ + /* \brief number of classes, if it is multi-class classification */ int num_class; /*! \brief whether the model itself is saved with pbuffer */ int saved_with_pbuffer; @@ -495,7 +495,7 @@ class BoostLearner : public rabit::Serializable { int updater_mode; // cached size of predict buffer size_t pred_buffer_size; - // maximum buffred row value + // maximum buffered row value float prob_buffer_row; // evaluation set EvalSet evaluator_; @@ -505,13 +505,13 @@ class BoostLearner : public rabit::Serializable { gbm::IGradBooster *gbm_; // name of gbm model used for training std::string name_gbm_; - // objective fnction + // objective function IObjFunction *obj_; // name of objective function std::string name_obj_; // configurations std::vector< std::pair > cfg_; - // temporal storages for prediciton + // temporal storages for prediction std::vector preds_; // gradient pairs std::vector gpair_; @@ -527,7 +527,7 @@ class BoostLearner : public rabit::Serializable { CacheEntry(const DMatrix *mat, size_t buffer_offset, size_t num_row) :mat_(mat), buffer_offset_(buffer_offset), num_row_(num_row) {} }; - // find internal bufer offset for certain matrix, if not exist, return -1 + // find internal buffer offset for certain matrix, if not exist, return -1 inline int64_t FindBufferOffset(const DMatrix &mat) const { for (size_t i = 0; i < cache_.size(); ++i) { if (cache_[i].mat_ == &mat && mat.cache_learner_ptr_ == this) { diff --git a/src/learner/objective-inl.hpp b/src/learner/objective-inl.hpp index b6d388e3c..ce23b02fb 100644 --- a/src/learner/objective-inl.hpp +++ b/src/learner/objective-inl.hpp @@ -84,7 +84,7 @@ struct LossType { * \return second order gradient */ inline float SecondOrderGradient(float predt, float label) const { - // cap second order gradient to postive value + // cap second order gradient to positive value const float eps = 1e-16f; switch (loss_type) { case kLinearSquare: return 1.0f; diff --git a/src/learner/objective.h b/src/learner/objective.h index 08b57f528..774286854 100644 --- a/src/learner/objective.h +++ b/src/learner/objective.h @@ -68,7 +68,7 @@ class IObjFunction{ // factory function namespace xgboost { namespace learner { -/*! \brief factory funciton to create objective function by name */ +/*! \brief factory function to create objective function by name */ inline IObjFunction* CreateObjFunction(const char *name) { using namespace std; if (!strcmp("reg:linear", name)) return new RegLossObj(LossType::kLinearSquare); diff --git a/src/tree/model.h b/src/tree/model.h index 6a22aa5f1..6f2479cc2 100644 --- a/src/tree/model.h +++ b/src/tree/model.h @@ -321,9 +321,9 @@ class TreeModel { */ inline void SaveModel(utils::IStream &fo) const { // NOLINT(*) utils::Assert(param.num_nodes == static_cast(nodes.size()), - "Tree::SaveModel"); + "TreeModel::SaveModel"); utils::Assert(param.num_nodes == static_cast(stats.size()), - "Tree::SaveModel"); + "TreeModel::SaveModel"); fo.Write(¶m, sizeof(Param)); utils::Assert(param.num_nodes != 0, "invalid model"); fo.Write(BeginPtr(nodes), sizeof(Node) * nodes.size()); @@ -462,7 +462,7 @@ class TreeModel { /*! \brief node statistics used in regression tree */ struct RTreeNodeStat { - /*! \brief loss chg caused by current split */ + /*! \brief loss change caused by current split */ float loss_chg; /*! \brief sum of hessian values, used to measure coverage of data */ float sum_hess; @@ -485,7 +485,7 @@ class RegTree: public TreeModel{ public: /*! * \brief dense feature vector that can be taken by RegTree - * to do tranverse efficiently + * to do traverse efficiently * and can be construct from sparse feature vector */ struct FVec { @@ -498,7 +498,7 @@ class RegTree: public TreeModel{ int flag; }; std::vector data; - /*! \brief intialize the vector with size vector */ + /*! \brief initialize the vector with size vector */ inline void Init(size_t size) { Entry e; e.flag = -1; data.resize(size); @@ -529,14 +529,14 @@ class RegTree: public TreeModel{ }; /*! * \brief get the leaf index - * \param feats dense feature vector, if the feature is missing the field is set to NaN - * \param root_gid starting root index of the instance + * \param feat dense feature vector, if the feature is missing the field is set to NaN + * \param root_id starting root index of the instance * \return the leaf index of the given feature */ - inline int GetLeafIndex(const FVec&feat, unsigned root_id = 0) const { + inline int GetLeafIndex(const FVec &feat, unsigned root_id = 0) const { // start from groups that belongs to current data int pid = static_cast(root_id); - // tranverse tree + // traverse tree while (!(*this)[ pid ].is_leaf()) { unsigned split_index = (*this)[pid].split_index(); pid = this->GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index)); @@ -546,7 +546,7 @@ class RegTree: public TreeModel{ /*! * \brief get the prediction of regression tree, only accepts dense feature vector * \param feats dense feature vector, if the feature is missing the field is set to NaN - * \param root_gid starting root index of the instance + * \param root_id starting root index of the instance * \return the leaf index of the given feature */ inline float Predict(const FVec &feat, unsigned root_id = 0) const { diff --git a/src/tree/param.h b/src/tree/param.h index c6060ffbf..364e3572d 100644 --- a/src/tree/param.h +++ b/src/tree/param.h @@ -32,7 +32,7 @@ struct TrainParam{ // default direction choice int default_direction; // maximum delta update we can add in weight estimation - // this parameter can be used to stablize update + // this parameter can be used to stabilize update // default=0 means no constraint on weight delta float max_delta_step; // whether we want to do subsample @@ -51,7 +51,7 @@ struct TrainParam{ int size_leaf_vector; // option for parallelization int parallel_option; - // option to open cacheline optimizaton + // option to open cacheline optimization int cache_opt; // number of threads to be used for tree construction, // if OpenMP is enabled, if equals 0, use system default @@ -132,7 +132,7 @@ struct TrainParam{ } } } - // calculate cost of loss function with four stati + // calculate cost of loss function with four statistics inline double CalcGain(double sum_grad, double sum_hess, double test_grad, double test_hess) const { double w = CalcWeight(sum_grad, sum_hess); @@ -167,7 +167,7 @@ struct TrainParam{ inline bool need_backward_search(float col_density, bool indicator) const { return this->default_direction != 2; } - /*! \brief given the loss change, whether we need to invode prunning */ + /*! \brief given the loss change, whether we need to invoke pruning */ inline bool need_prune(double loss_chg, int depth) const { return loss_chg < this->min_split_loss; } @@ -235,7 +235,7 @@ struct GradStats { const bst_gpair &b = gpair[ridx]; this->Add(b.grad, b.hess); } - /*! \brief caculate leaf weight */ + /*! \brief calculate leaf weight */ inline double CalcWeight(const TrainParam ¶m) const { return param.CalcWeight(sum_grad, sum_hess); } @@ -362,10 +362,10 @@ struct SplitEntry{ /*! \brief constructor */ SplitEntry(void) : loss_chg(0.0f), sindex(0), split_value(0.0f) {} /*! - * \brief decides whether a we can replace current entry with the statistics given - * This function gives better priority to lower index when loss_chg equals - * not the best way, but helps to give consistent result during multi-thread execution - * \param loss_chg the loss reduction get through the split + * \brief decides whether we can replace current entry with the given statistics + * This function gives better priority to lower index when loss_chg == new_loss_chg. + * Not the best way, but helps to give consistent result during multi-thread execution. + * \param new_loss_chg the loss reduction get through the split * \param split_index the feature index where the split is on */ inline bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const { @@ -392,9 +392,9 @@ struct SplitEntry{ } /*! * \brief update the split entry, replace it if e is better - * \param loss_chg loss reduction of new candidate + * \param new_loss_chg loss reduction of new candidate * \param split_index feature index to split on - * \param split_value the split point + * \param new_split_value the split point * \param default_left whether the missing value goes to left * \return whether the proposed split is better and can replace current split */ diff --git a/src/tree/updater.h b/src/tree/updater.h index 1cf74a699..ff4da5e98 100644 --- a/src/tree/updater.h +++ b/src/tree/updater.h @@ -26,11 +26,11 @@ class IUpdater { */ virtual void SetParam(const char *name, const char *val) = 0; /*! - * \brief peform update to the tree models + * \brief perform update to the tree models * \param gpair the gradient pair statistics of the data * \param p_fmat feature matrix that provide access to features * \param info extra side information that may be need, such as root index - * \param trees pointer to the trees to be updated, upater will change the content of the tree + * \param trees references the trees to be updated, updater will change the content of trees * note: all the trees in the vector are updated, with the same statistics, * but maybe different random seeds, usually one tree is passed in at a time, * there can be multiple trees when we train random forest style model @@ -53,7 +53,7 @@ class IUpdater { virtual ~IUpdater(void) {} }; /*! - * \brief create a updater based on name + * \brief create an updater based on name * \param name name of updater * \return return the updater instance */ diff --git a/src/tree/updater_colmaker-inl.hpp b/src/tree/updater_colmaker-inl.hpp index e3070d495..1f89f7ed4 100644 --- a/src/tree/updater_colmaker-inl.hpp +++ b/src/tree/updater_colmaker-inl.hpp @@ -17,7 +17,7 @@ namespace xgboost { namespace tree { -/*! \brief colunwise update to construct a tree */ +/*! \brief column-wise update to construct a tree */ template class ColMaker: public IUpdater { public: diff --git a/src/tree/updater_prune-inl.hpp b/src/tree/updater_prune-inl.hpp index dc99e94e4..2b90646be 100644 --- a/src/tree/updater_prune-inl.hpp +++ b/src/tree/updater_prune-inl.hpp @@ -14,7 +14,7 @@ namespace xgboost { namespace tree { -/*! \brief pruner that prunes a tree after growing finishs */ +/*! \brief pruner that prunes a tree after growing finishes */ class TreePruner: public IUpdater { public: virtual ~TreePruner(void) {} @@ -56,7 +56,7 @@ class TreePruner: public IUpdater { return npruned; } } - /*! \brief do prunning of a tree */ + /*! \brief do pruning of a tree */ inline void DoPrune(RegTree &tree) { // NOLINT(*) int npruned = 0; // initialize auxiliary statistics @@ -69,7 +69,7 @@ class TreePruner: public IUpdater { } } if (silent == 0) { - utils::Printf("tree prunning end, %d roots, %d extra nodes, %d pruned nodes ,max_depth=%d\n", + utils::Printf("tree pruning end, %d roots, %d extra nodes, %d pruned nodes ,max_depth=%d\n", tree.param.num_roots, tree.num_extra_nodes(), npruned, tree.MaxDepth()); } } diff --git a/src/utils/base64-inl.h b/src/utils/base64-inl.h index 49cd65254..be99e07b7 100644 --- a/src/utils/base64-inl.h +++ b/src/utils/base64-inl.h @@ -91,7 +91,7 @@ class Base64InStream: public IStream { * call this function before actually start read */ inline void InitPosition(void) { - // get a charater + // get a character do { tmp_ch = reader_.GetChar(); } while (isspace(tmp_ch)); @@ -223,7 +223,7 @@ class Base64OutStream: public IStream { } /*! * \brief finish writing of all current base64 stream, do some post processing - * \param endch charater to put to end of stream, if it is EOF, then nothing will be done + * \param endch character to put to end of stream, if it is EOF, then nothing will be done */ inline void Finish(char endch = EOF) { using base64::EncodeTable; diff --git a/src/utils/fmap.h b/src/utils/fmap.h index 218a61aa4..cc06b7021 100644 --- a/src/utils/fmap.h +++ b/src/utils/fmap.h @@ -58,7 +58,7 @@ class FeatMap { } /*! \brief return type of specific feature */ const Type& type(size_t idx) const { - utils::Assert(idx < names_.size(), "utils::FMap::name feature index exceed bound"); + utils::Assert(idx < names_.size(), "utils::FMap::type feature index exceed bound"); return types_[idx]; } diff --git a/src/utils/iterator.h b/src/utils/iterator.h index 5d986b2e4..73068dbbf 100644 --- a/src/utils/iterator.h +++ b/src/utils/iterator.h @@ -23,7 +23,7 @@ class IIterator { * \param val value of parameter */ virtual void SetParam(const char *name, const char *val) {} - /*! \brief initalize the iterator so that we can use the iterator */ + /*! \brief initialize the iterator so that we can use the iterator */ virtual void Init(void) {} /*! \brief set before first of the item */ virtual void BeforeFirst(void) = 0; diff --git a/src/utils/quantile.h b/src/utils/quantile.h index adcd0222d..d1c029f65 100644 --- a/src/utils/quantile.h +++ b/src/utils/quantile.h @@ -214,7 +214,7 @@ struct WQSummary { /*! * \brief set current summary to be merged summary of sa and sb * \param sa first input summary to be merged - * \param sb second input summar to be merged + * \param sb second input summary to be merged */ inline void SetCombine(const WQSummary &sa, const WQSummary &sb) { @@ -329,7 +329,7 @@ struct WQSummary { } }; -/*! \brief try to do efficient prunning */ +/*! \brief try to do efficient pruning */ template struct WXQSummary : public WQSummary { // redefine entry type @@ -364,7 +364,7 @@ struct WXQSummary : public WQSummary { RType mrange = 0; { // first scan, grab all the big chunk - // moviing block index + // moving block index size_t bid = 0; for (size_t i = 1; i < src.size; ++i) { if (CheckLarge(src.data[i], chunk)) { @@ -574,7 +574,7 @@ struct GKSummary { }; /*! - * \brief template for all quantle sketch algorithm + * \brief template for all quantile sketch algorithm * that uses merge/prune scheme * \tparam DType type of data content * \tparam RType type of rank @@ -605,7 +605,7 @@ class QuantileSketchTemplate { } /*! * \brief set the space to be merge of all Summary arrays - * \param begin begining position in th summary array + * \param begin beginning position in the summary array * \param end ending position in the Summary array */ inline void SetMerge(const Summary *begin, @@ -664,7 +664,7 @@ class QuantileSketchTemplate { } }; /*! - * \brief intialize the quantile sketch, given the performance specification + * \brief initialize the quantile sketch, given the performance specification * \param maxn maximum number of data points can be feed into sketch * \param eps accuracy level of summary */ @@ -688,7 +688,7 @@ class QuantileSketchTemplate { } /*! * \brief add an element to a sketch - * \param x the elemented added to the sketch + * \param x the element added to the sketch */ inline void Push(DType x, RType w = 1) { if (w == static_cast(0)) return; diff --git a/src/utils/random.h b/src/utils/random.h index 7d52c2ae7..8e3255cf3 100644 --- a/src/utils/random.h +++ b/src/utils/random.h @@ -27,7 +27,7 @@ inline void Seed(unsigned seed) { inline double Uniform(void) { return static_cast(rand()) / (static_cast(RAND_MAX)+1.0); // NOLINT(*) } -/*! \brief return a real numer uniform in (0,1) */ +/*! \brief return a real number uniform in (0,1) */ inline double NextDouble2(void) { return (static_cast(rand()) + 1.0) / (static_cast(RAND_MAX)+2.0); // NOLINT(*) } diff --git a/src/utils/thread_buffer.h b/src/utils/thread_buffer.h index bc4fb9f5e..8acb8ffd0 100644 --- a/src/utils/thread_buffer.h +++ b/src/utils/thread_buffer.h @@ -21,8 +21,8 @@ namespace utils { #if !defined(XGBOOST_STRICT_CXX98_) /*! * \brief buffered loading iterator that uses multithread - * this template method will assume the following paramters - * \tparam Elem elememt type to be buffered + * this template method will assume the following parameters + * \tparam Elem element type to be buffered * \tparam ElemFactory factory type to implement in order to use thread buffer */ template @@ -45,7 +45,7 @@ class ThreadBuffer { /*! * \brief initalize the buffered iterator * \param param a initialize parameter that will pass to factory, ignore it if not necessary - * \return false if the initlization can't be done, e.g. buffer file hasn't been created + * \return false if the initialization can't be done, e.g. buffer file hasn't been created */ inline bool Init(void) { if (!factory.Init()) return false; @@ -61,7 +61,7 @@ class ThreadBuffer { inline void BeforeFirst(void) { // wait till last loader end loading_end.Wait(); - // critcal zone + // critical zone current_buf = 1; factory.BeforeFirst(); // reset terminate limit diff --git a/src/utils/utils.h b/src/utils/utils.h index 7a8f18390..4d06d3c61 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -62,7 +62,7 @@ const int kPrintBuffer = 1 << 12; #ifndef XGBOOST_CUSTOMIZE_MSG_ /*! - * \brief handling of Assert error, caused by in-apropriate input + * \brief handling of Assert error, caused by inappropriate input * \param msg error message */ inline void HandleAssertError(const char *msg) { @@ -70,7 +70,7 @@ inline void HandleAssertError(const char *msg) { exit(-1); } /*! - * \brief handling of Check error, caused by in-apropriate input + * \brief handling of Check error, caused by inappropriate input * \param msg error message */ inline void HandleCheckError(const char *msg) { @@ -157,7 +157,7 @@ inline std::FILE *FopenCheck(const char *fname, const char *flag) { return fp; } } // namespace utils -// easy utils that can be directly acessed in xgboost +// easy utils that can be directly accessed in xgboost /*! \brief get the beginning address of a vector */ template inline T *BeginPtr(std::vector &vec) { // NOLINT(*)