From c70022e6c46b744ca4e828dd443371f9d12e70d4 Mon Sep 17 00:00:00 2001
From: Vadim Khotilovich <khotilovich@gmail.com>
Date: Sat, 12 Dec 2015 21:40:12 -0600
Subject: [PATCH] spelling, wording, and doc fixes in c++ code

I was reading through the code and fixing some things in the comments.
Only a few trivial actual code changes were made to make things more
readable.
---
 src/data.h                        | 18 ++++++++--------
 src/gbm/gbm.h                     |  6 +++---
 src/gbm/gbtree-inl.hpp            |  6 +++---
 src/io/io.h                       |  4 ++--
 src/io/libsvm_parser.h            |  2 +-
 src/io/page_fmatrix-inl.hpp       | 10 +++++----
 src/io/simple_dmatrix-inl.hpp     |  4 ++--
 src/io/simple_fmatrix-inl.hpp     |  6 +++---
 src/io/sparse_batch_page.h        |  3 +--
 src/learner/dmatrix.h             |  2 +-
 src/learner/evaluation-inl.hpp    | 34 +++++++++++++++++--------------
 src/learner/helper_utils.h        |  6 +++---
 src/learner/learner-inl.hpp       | 22 ++++++++++----------
 src/learner/objective-inl.hpp     |  2 +-
 src/learner/objective.h           |  2 +-
 src/tree/model.h                  | 20 +++++++++---------
 src/tree/param.h                  | 22 ++++++++++----------
 src/tree/updater.h                |  6 +++---
 src/tree/updater_colmaker-inl.hpp |  2 +-
 src/tree/updater_prune-inl.hpp    |  6 +++---
 src/utils/base64-inl.h            |  4 ++--
 src/utils/fmap.h                  |  2 +-
 src/utils/iterator.h              |  2 +-
 src/utils/quantile.h              | 14 ++++++-------
 src/utils/random.h                |  2 +-
 src/utils/thread_buffer.h         |  8 ++++----
 src/utils/utils.h                 |  6 +++---
 27 files changed, 113 insertions(+), 108 deletions(-)
diff --git a/src/data.h b/src/data.h
index 3c4a14987..9bcb84ced 100644
--- a/src/data.h
+++ b/src/data.h
@@ -14,7 +14,7 @@
 
 namespace xgboost {
 /*!
- * \brief unsigned interger type used in boost,
+ * \brief unsigned integer type used in boost,
  *        used for feature index and row index
  */
 typedef unsigned bst_uint;
@@ -35,8 +35,8 @@ struct bst_gpair {
 };
 
 /*!
- * \brief extra information that might needed by gbm and tree module
- * these information are not necessarily presented, and can be empty
+ * \brief extra information that might be needed by gbm and tree module
+ * this information is not necessarily present, and can be empty
  */
 struct BoosterInfo {
   /*! \brief number of rows in the data */
@@ -53,7 +53,7 @@ struct BoosterInfo {
   /*! \brief number of rows, number of columns */
   BoosterInfo(void) : num_row(0), num_col(0) {
   }
-  /*! \brief get root of ith instance */
+  /*! \brief get root of i-th instance */
   inline unsigned GetRoot(size_t i) const {
     return root_index.size() == 0 ? 0 : root_index[i];
   }
@@ -120,13 +120,13 @@ struct ColBatch : public SparseBatch {
 };
 /**
  * \brief interface of feature matrix, needed for tree construction
- *  this interface defines two way to access features,
- *  row access is defined by iterator of RowBatch
- *  col access is optional, checked by HaveColAccess, and defined by iterator of ColBatch
+ *  this interface defines two ways to access features:
+ *   row access is defined by iterator of RowBatch
+ *   col access is optional, checked by HaveColAccess, and defined by iterator of ColBatch
  */
 class IFMatrix {
  public:
-  // the interface only need to ganrantee row iter
+  // the interface only need to guarantee row iter
   // column iter is active, when ColIterator is called, row_iter can be disabled
   /*! \brief get the row iterator associated with FMatrix */
   virtual utils::IIterator<RowBatch> *RowIterator(void) = 0;
@@ -142,7 +142,7 @@ class IFMatrix {
    * \brief check if column access is supported, if not, initialize column access
    * \param enabled whether certain feature should be included in column access
    * \param subsample subsample ratio when generating column access
-   * \param max_row_perbatch auxilary information, maximum row used in each column batch
+   * \param max_row_perbatch auxiliary information, maximum row used in each column batch
    *         this is a hint information that can be ignored by the implementation
    */
   virtual void InitColAccess(const std::vector<bool> &enabled,
diff --git a/src/gbm/gbm.h b/src/gbm/gbm.h
index 60b7474e1..8ff692c05 100644
--- a/src/gbm/gbm.h
+++ b/src/gbm/gbm.h
@@ -58,7 +58,7 @@ class IGradBooster {
     return false;
   }
   /*!
-   * \brief peform update to the model(boosting)
+   * \brief perform update to the model(boosting)
    * \param p_fmat feature matrix that provide access to features
    * \param buffer_offset buffer index offset of these instances, if equals -1
    *        this means we do not have buffer index allocated to the gbm
@@ -88,7 +88,7 @@ class IGradBooster {
                        std::vector<float> *out_preds,
                        unsigned ntree_limit = 0) = 0;
   /*!
-   * \brief online prediction funciton, predict score for one instance at a time
+   * \brief online prediction function, predict score for one instance at a time
    *  NOTE: use the batch prediction interface if possible, batch prediction is usually
    *        more efficient than online prediction
    *        This function is NOT threadsafe, make sure you only call from one thread
@@ -119,7 +119,7 @@ class IGradBooster {
   /*!
    * \brief dump the model in text format
    * \param fmap feature map that may help give interpretations of feature
-   * \param option extra option of the dumo model
+   * \param option extra option of the dump model
    * \return a vector of dump for boosters
    */
   virtual std::vector<std::string> DumpModel(const utils::FeatMap& fmap, int option) = 0;
diff --git a/src/gbm/gbtree-inl.hpp b/src/gbm/gbtree-inl.hpp
index c06dc51a1..65fe7e9da 100644
--- a/src/gbm/gbtree-inl.hpp
+++ b/src/gbm/gbtree-inl.hpp
@@ -31,7 +31,7 @@ class GBTree : public IGradBooster {
     using namespace std;
     if (!strncmp(name, "bst:", 4)) {
       cfg.push_back(std::make_pair(std::string(name+4), std::string(val)));
-      // set into updaters, if already intialized
+      // set into updaters, if already initialized
       for (size_t i = 0; i < updaters.size(); ++i) {
         updaters[i]->SetParam(name+4, val);
       }
@@ -85,7 +85,7 @@ class GBTree : public IGradBooster {
       fo.Write(BeginPtr(pred_counter), pred_counter.size() * sizeof(unsigned));
     }
   }
-  // initialize the predic buffer
+  // initialize the predict buffer
   virtual void InitModel(void) {
     pred_buffer.clear(); pred_counter.clear();
     pred_buffer.resize(mparam.PredBufferSize(), 0.0f);
@@ -446,7 +446,7 @@ class GBTree : public IGradBooster {
     int num_roots;
     /*! \brief number of features to be used by trees */
     int num_feature;
-    /*! \brief size of predicton buffer allocated used for buffering */
+    /*! \brief size of prediction buffer allocated used for buffering */
     int64_t num_pbuffer;
     /*!
      * \brief how many output group a single instance can produce
diff --git a/src/io/io.h b/src/io/io.h
index 267bb0bff..6ceff2698 100644
--- a/src/io/io.h
+++ b/src/io/io.h
@@ -22,7 +22,7 @@ typedef learner::DMatrix DataMatrix;
  * \param silent whether print message during loading
  * \param savebuffer whether temporal buffer the file if the file is in text format
  * \param loadsplit whether we only load a split of input files
- *   such that each worker node get a split of the data
+ *        such that each worker node get a split of the data
  * \param cache_file name of cache_file, used by external memory version
  *        can be NULL, if cache_file is specified, this will be the temporal
  *        space that can be re-used to store intermediate data
@@ -38,7 +38,7 @@ DataMatrix* LoadDataMatrix(const char *fname,
  *  note: the saved dmatrix format may not be in exactly same as input
  *  SaveDMatrix will choose the best way to materialize the dmatrix.
  * \param dmat the dmatrix to be saved
- * \param fname file name to be savd
+ * \param fname file name to be saved
  * \param silent whether print message during saving
  */
 void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent = false);
diff --git a/src/io/libsvm_parser.h b/src/io/libsvm_parser.h
index 92eeaf35d..43b8d6b90 100644
--- a/src/io/libsvm_parser.h
+++ b/src/io/libsvm_parser.h
@@ -31,7 +31,7 @@ struct LibSVMPage : public SparsePage {
 /*!
  * \brief libsvm parser that parses the input lines
  * and returns rows in input data
- * factry that was used by threadbuffer template
+ * factory that was used by threadbuffer template
  */
 class LibSVMPageFactory  {
  public:
diff --git a/src/io/page_fmatrix-inl.hpp b/src/io/page_fmatrix-inl.hpp
index 2fa5c83bd..d2b71e50f 100644
--- a/src/io/page_fmatrix-inl.hpp
+++ b/src/io/page_fmatrix-inl.hpp
@@ -200,7 +200,7 @@ class FMatrixPage : public IFMatrix {
   virtual bool HaveColAccess(void) const {
     return col_size_.size() != 0;
   }
-  /*! \brief get number of colmuns */
+  /*! \brief get number of columns */
   virtual size_t NumCol(void) const {
     utils::Check(this->HaveColAccess(), "NumCol:need column access");
     return col_size_.size();
@@ -246,7 +246,7 @@ class FMatrixPage : public IFMatrix {
     return &col_iter_;
   }
   /*!
-   * \brief colmun based iterator
+   * \brief column based iterator
    */
   virtual utils::IIterator<ColBatch> *ColIterator(const std::vector<bst_uint> &fset) {
     size_t ncol = this->NumCol();
@@ -290,8 +290,10 @@ class FMatrixPage : public IFMatrix {
     fo->Write(col_size_);
   }
   /*!
-   * \brief intialize column data
+   * \brief initialize column data
+   * \param enabled the list of enabled columns
    * \param pkeep probability to keep a row
+   * \param max_row_perbatch maximum row per batch
    */
   inline void InitColData(const std::vector<bool> &enabled,
                           float pkeep, size_t max_row_perbatch) {
@@ -319,7 +321,7 @@ class FMatrixPage : public IFMatrix {
       bytes_write += spage;
       double tnow = rabit::utils::GetTime();
       double tdiff = tnow - tstart;
-      utils::Printf("Writting to %s in %g MB/s, %lu MB written\n",
+      utils::Printf("Writing to %s in %g MB/s, %lu MB written\n",
                     col_data_name_.c_str(),
                     (bytes_write >> 20UL) / tdiff,
                     (bytes_write >> 20UL));
diff --git a/src/io/simple_dmatrix-inl.hpp b/src/io/simple_dmatrix-inl.hpp
index 190cbdcdf..063b01665 100644
--- a/src/io/simple_dmatrix-inl.hpp
+++ b/src/io/simple_dmatrix-inl.hpp
@@ -51,7 +51,7 @@ class DMatrixSimple : public DataMatrix {
   inline void CopyFrom(const DataMatrix &src) {
     this->Clear();
     this->info = src.info;
-    // clone data content in thos matrix
+    // clone data contents from src matrix
     utils::IIterator<RowBatch> *iter = src.fmat()->RowIterator();
     iter->BeforeFirst();
     while (iter->Next()) {
@@ -313,7 +313,7 @@ class DMatrixSimple : public DataMatrix {
    private:
     // whether is at first
     bool at_first_;
-    // pointer to parient
+    // pointer to parent
     DMatrixSimple *parent_;
     // temporal space for batch
     RowBatch batch_;
diff --git a/src/io/simple_fmatrix-inl.hpp b/src/io/simple_fmatrix-inl.hpp
index 0e0da4461..e467263fa 100644
--- a/src/io/simple_fmatrix-inl.hpp
+++ b/src/io/simple_fmatrix-inl.hpp
@@ -40,7 +40,7 @@ class FMatrixS : public IFMatrix {
   virtual bool HaveColAccess(void) const {
     return col_size_.size() != 0;
   }
-  /*! \brief get number of colmuns */
+  /*! \brief get number of columns */
   virtual size_t NumCol(void) const {
     utils::Check(this->HaveColAccess(), "NumCol:need column access");
     return col_size_.size();
@@ -83,7 +83,7 @@ class FMatrixS : public IFMatrix {
     return &col_iter_;
   }
   /*!
-   * \brief colmun based iterator
+   * \brief column based iterator
    */
   virtual utils::IIterator<ColBatch> *ColIterator(const std::vector<bst_uint> &fset) {
     size_t ncol = this->NumCol();
@@ -112,7 +112,7 @@ class FMatrixS : public IFMatrix {
 
  protected:
   /*!
-   * \brief intialize column data
+   * \brief initialize column data
    * \param enabled the list of enabled columns
    * \param pkeep probability to keep a row
    * \param max_row_perbatch maximum row per batch
diff --git a/src/io/sparse_batch_page.h b/src/io/sparse_batch_page.h
index 24546f785..96810c0fb 100644
--- a/src/io/sparse_batch_page.h
+++ b/src/io/sparse_batch_page.h
@@ -33,8 +33,7 @@ class SparsePage {
     return offset.size() - 1;
   }
   /*!
-   * \brief load the by providing a list of interested segments
-   *        only the interested segments are loaded
+   * \brief load only the segments we are interested in
    * \param fi the input stream of the file
    * \param sorted_index_set sorted index of segments we are interested in
    * \return true of the loading as successful, false if end of file was reached
diff --git a/src/learner/dmatrix.h b/src/learner/dmatrix.h
index 3fbc579de..52828c3be 100644
--- a/src/learner/dmatrix.h
+++ b/src/learner/dmatrix.h
@@ -35,7 +35,7 @@ struct MetaInfo {
   std::vector<float> weights;
   /*!
    * \brief initialized margins,
-   * if specified, xgboost will start from this init margin
+   * if specified, xgboost will start from this initial margin
    * can be used to specify initial prediction to boost from
    */
   std::vector<float> base_margin;
diff --git a/src/learner/evaluation-inl.hpp b/src/learner/evaluation-inl.hpp
index 2b69a43a8..d28702728 100644
--- a/src/learner/evaluation-inl.hpp
+++ b/src/learner/evaluation-inl.hpp
@@ -21,7 +21,7 @@
 namespace xgboost {
 namespace learner {
 /*!
- * \brief base class of elementwise evaluation
+ * \brief base class of element-wise evaluation
  * \tparam Derived the name of subclass
  */
 template<typename Derived>
@@ -57,7 +57,7 @@ struct EvalEWiseBase : public IEvaluator {
    */
   inline static float EvalRow(float label, float pred);
   /*!
-   * \brief to be overide by subclas, final trasnformation
+   * \brief to be overridden by subclass, final transformation
    * \param esum the sum statistics returned by EvalRow
    * \param wsum sum of weight
    */
@@ -109,7 +109,7 @@ struct EvalError : public EvalEWiseBase<EvalError> {
   }
 };
 
-/*! \brief loglikelihood of poission distribution */
+/*! \brief log-likelihood of Poission distribution */
 struct EvalPoissionNegLogLik : public EvalEWiseBase<EvalPoissionNegLogLik> {
   virtual const char *Name(void) const {
     return "poisson-nloglik";
@@ -174,7 +174,7 @@ struct EvalMClassBase : public IEvaluator {
                               const float *pred,
                               size_t nclass);
   /*!
-   * \brief to be overide by subclas, final trasnformation
+   * \brief to be overridden by subclass, final transformation
    * \param esum the sum statistics returned by EvalRow
    * \param wsum sum of weight
    */
@@ -367,7 +367,7 @@ struct EvalPrecisionRatio : public IEvaluator{
   std::string name_;
 };
 
-/*! \brief Area under curve, for both classification and rank */
+/*! \brief Area Under Curve, for both classification and rank */
 struct EvalAuc : public IEvaluator {
   virtual float Eval(const std::vector<float> &preds,
                      const MetaInfo &info,
@@ -382,7 +382,7 @@ struct EvalAuc : public IEvaluator {
     utils::Check(gptr.back() == info.labels.size(),
                  "EvalAuc: group structure must match number of prediction");
     const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
-    // sum statictis
+    // sum statistics
     double sum_auc = 0.0f;
     #pragma omp parallel reduction(+:sum_auc)
     {
@@ -404,13 +404,16 @@ struct EvalAuc : public IEvaluator {
           // keep bucketing predictions in same bucket
           if (j != 0 && rec[j].first != rec[j - 1].first) {
             sum_pospair += buf_neg * (sum_npos + buf_pos *0.5);
-            sum_npos += buf_pos; sum_nneg += buf_neg;
+            sum_npos += buf_pos;
+            sum_nneg += buf_neg;
             buf_neg = buf_pos = 0.0f;
           }
-          buf_pos += ctr * wt; buf_neg += (1.0f - ctr) * wt;
+          buf_pos += ctr * wt;
+          buf_neg += (1.0f - ctr) * wt;
         }
         sum_pospair += buf_neg * (sum_npos + buf_pos *0.5);
-        sum_npos += buf_pos; sum_nneg += buf_neg;
+        sum_npos += buf_pos;
+        sum_nneg += buf_neg;
         // check weird conditions
         utils::Check(sum_npos > 0.0 && sum_nneg > 0.0,
                      "AUC: the dataset only contains pos or neg samples");
@@ -443,7 +446,8 @@ struct EvalRankList : public IEvaluator {
     utils::Check(preds.size() == info.labels.size(),
                   "label size predict size not match");
     // quick consistency when group is not available
-    std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(preds.size());
+    std::vector<unsigned> tgptr(2, 0);
+    tgptr[1] = static_cast<unsigned>(preds.size());
     const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
     utils::Assert(gptr.size() != 0, "must specify group when constructing rank file");
     utils::Assert(gptr.back() == preds.size(),
@@ -468,7 +472,7 @@ struct EvalRankList : public IEvaluator {
       float dat[2];
       dat[0] = static_cast<float>(sum_metric);
       dat[1] = static_cast<float>(ngroup);
-      // approximately estimate auc using mean
+      // approximately estimate the metric using mean
       rabit::Allreduce<rabit::op::Sum>(dat, 2);
       return dat[0] / dat[1];
     } else {
@@ -500,14 +504,14 @@ struct EvalRankList : public IEvaluator {
   bool minus_;
 };
 
-/*! \brief Precison at N, for both classification and rank */
+/*! \brief Precision at N, for both classification and rank */
 struct EvalPrecision : public EvalRankList{
  public:
   explicit EvalPrecision(const char *name) : EvalRankList(name) {}
 
  protected:
   virtual float EvalMetric(std::vector< std::pair<float, unsigned> > &rec) const {
-    // calculate Preicsion
+    // calculate Precision
     std::sort(rec.begin(), rec.end(), CmpFirst);
     unsigned nhit = 0;
     for (size_t j = 0; j < rec.size() && j < this->topn_; ++j) {
@@ -517,7 +521,7 @@ struct EvalPrecision : public EvalRankList{
   }
 };
 
-/*! \brief NDCG */
+/*! \brief NDCG: Normalized Discounted Cumulative Gain at N */
 struct EvalNDCG : public EvalRankList{
  public:
   explicit EvalNDCG(const char *name) : EvalRankList(name) {}
@@ -549,7 +553,7 @@ struct EvalNDCG : public EvalRankList{
   }
 };
 
-/*! \brief Precison at N, for both classification and rank */
+/*! \brief Mean Average Precision at N, for both classification and rank */
 struct EvalMAP : public EvalRankList {
  public:
   explicit EvalMAP(const char *name) : EvalRankList(name) {}
diff --git a/src/learner/helper_utils.h b/src/learner/helper_utils.h
index 7ca7ba59c..0db1b46f3 100644
--- a/src/learner/helper_utils.h
+++ b/src/learner/helper_utils.h
@@ -45,7 +45,7 @@ inline static int FindMaxIndex(const std::vector<float>& rec) {
   return FindMaxIndex(BeginPtr(rec), rec.size());
 }
 
-// perform numerical safe logsum
+// perform numerically safe logsum
 inline float LogSum(float x, float y) {
   if (x < y) {
     return y + std::log(std::exp(x - y) + 1.0f);
@@ -53,7 +53,7 @@ inline float LogSum(float x, float y) {
     return x + std::log(std::exp(y - x) + 1.0f);
   }
 }
-// numerical safe logsum
+// numerically safe logsum
 inline float LogSum(const float *rec, size_t size) {
   float mx = rec[0];
   for (size_t i = 1; i < size; ++i) {
@@ -66,11 +66,11 @@ inline float LogSum(const float *rec, size_t size) {
   return mx + std::log(sum);
 }
 
+// comparator functions for sorting pairs in descending order
 inline static bool CmpFirst(const std::pair<float, unsigned> &a,
                             const std::pair<float, unsigned> &b) {
   return a.first > b.first;
 }
-
 inline static bool CmpSecond(const std::pair<float, unsigned> &a,
                              const std::pair<float, unsigned> &b) {
   return a.second > b.second;
diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp
index f051992d3..0e8480663 100644
--- a/src/learner/learner-inl.hpp
+++ b/src/learner/learner-inl.hpp
@@ -22,8 +22,8 @@ namespace xgboost {
 /*! \brief namespace for learning algorithm */
 namespace learner {
 /*!
- * \brief learner that takes do gradient boosting on specific objective functions
- *  and do training and prediction
+ * \brief learner that performs gradient boosting for a specific objective function.
+ *  It does training and prediction.
  */
 class BoostLearner : public rabit::Serializable {
  public:
@@ -258,7 +258,7 @@ class BoostLearner : public rabit::Serializable {
   }
   /*!
    * \brief check if data matrix is ready to be used by training,
-   *  if not intialize it
+   *  if not initialize it
    * \param p_train pointer to the matrix used by training
    */
   inline void CheckInit(DMatrix *p_train) {
@@ -283,7 +283,7 @@ class BoostLearner : public rabit::Serializable {
   /*!
    * \brief update the model for one iteration
    * \param iter current iteration number
-   * \param p_train pointer to the data matrix
+   * \param train reference to the data matrix
    */
   inline void UpdateOneIter(int iter, const DMatrix &train) {
     if (seed_per_iteration != 0 || rabit::IsDistributed()) {
@@ -342,6 +342,7 @@ class BoostLearner : public rabit::Serializable {
    * \param out_preds output vector that stores the prediction
    * \param ntree_limit limit number of trees used for boosted tree
    *   predictor, when it equals 0, this means we are using all the trees
+   * \param pred_leaf whether to only predict the leaf index of each tree in a boosted tree predictor
    */
   inline void Predict(const DMatrix &data,
                       bool output_margin,
@@ -358,7 +359,7 @@ class BoostLearner : public rabit::Serializable {
     }
   }
   /*!
-   * \brief online prediction funciton, predict score for one instance at a time
+   * \brief online prediction function, predict score for one instance at a time
    *  NOTE: use the batch prediction interface if possible, batch prediction is usually
    *        more efficient than online prediction
    *        This function is NOT threadsafe, make sure you only call from one thread
@@ -367,7 +368,6 @@ class BoostLearner : public rabit::Serializable {
    * \param output_margin whether to only predict margin value instead of transformed prediction
    * \param out_preds output vector to hold the predictions
    * \param ntree_limit limit the number of trees used in prediction
-   * \param root_index the root index
    * \sa Predict
    */
   inline void Predict(const SparseBatch::Inst &inst,
@@ -452,7 +452,7 @@ class BoostLearner : public rabit::Serializable {
     float base_score;
     /* \brief number of features  */
     unsigned num_feature;
-    /* \brief number of class, if it is multi-class classification  */
+    /* \brief number of classes, if it is multi-class classification  */
     int num_class;
     /*! \brief whether the model itself is saved with pbuffer */
     int saved_with_pbuffer;
@@ -495,7 +495,7 @@ class BoostLearner : public rabit::Serializable {
   int updater_mode;
   // cached size of predict buffer
   size_t pred_buffer_size;
-  // maximum buffred row value
+  // maximum buffered row value
   float prob_buffer_row;
   // evaluation set
   EvalSet evaluator_;
@@ -505,13 +505,13 @@ class BoostLearner : public rabit::Serializable {
   gbm::IGradBooster *gbm_;
   // name of gbm model used for training
   std::string name_gbm_;
-  // objective fnction
+  // objective function
   IObjFunction *obj_;
   // name of objective function
   std::string name_obj_;
   // configurations
   std::vector< std::pair<std::string, std::string> > cfg_;
-  // temporal storages for prediciton
+  // temporal storages for prediction
   std::vector<float> preds_;
   // gradient pairs
   std::vector<bst_gpair> gpair_;
@@ -527,7 +527,7 @@ class BoostLearner : public rabit::Serializable {
     CacheEntry(const DMatrix *mat, size_t buffer_offset, size_t num_row)
         :mat_(mat), buffer_offset_(buffer_offset), num_row_(num_row) {}
   };
-  // find internal bufer offset for certain matrix, if not exist, return -1
+  // find internal buffer offset for certain matrix, if not exist, return -1
   inline int64_t FindBufferOffset(const DMatrix &mat) const {
     for (size_t i = 0; i < cache_.size(); ++i) {
       if (cache_[i].mat_ == &mat && mat.cache_learner_ptr_ == this) {
diff --git a/src/learner/objective-inl.hpp b/src/learner/objective-inl.hpp
index b6d388e3c..ce23b02fb 100644
--- a/src/learner/objective-inl.hpp
+++ b/src/learner/objective-inl.hpp
@@ -84,7 +84,7 @@ struct LossType {
    * \return second order gradient
    */
   inline float SecondOrderGradient(float predt, float label) const {
-    // cap second order gradient to postive value
+    // cap second order gradient to positive value
     const float eps = 1e-16f;
     switch (loss_type) {
       case kLinearSquare: return 1.0f;
diff --git a/src/learner/objective.h b/src/learner/objective.h
index 08b57f528..774286854 100644
--- a/src/learner/objective.h
+++ b/src/learner/objective.h
@@ -68,7 +68,7 @@ class IObjFunction{
 // factory function
 namespace xgboost {
 namespace learner {
-/*! \brief factory funciton to create objective function by name */
+/*! \brief factory function to create objective function by name */
 inline IObjFunction* CreateObjFunction(const char *name) {
   using namespace std;
   if (!strcmp("reg:linear", name)) return new RegLossObj(LossType::kLinearSquare);
diff --git a/src/tree/model.h b/src/tree/model.h
index 6a22aa5f1..6f2479cc2 100644
--- a/src/tree/model.h
+++ b/src/tree/model.h
@@ -321,9 +321,9 @@ class TreeModel {
    */
   inline void SaveModel(utils::IStream &fo) const { // NOLINT(*)
     utils::Assert(param.num_nodes == static_cast<int>(nodes.size()),
-                  "Tree::SaveModel");
+                  "TreeModel::SaveModel");
     utils::Assert(param.num_nodes == static_cast<int>(stats.size()),
-                  "Tree::SaveModel");
+                  "TreeModel::SaveModel");
     fo.Write(&param, sizeof(Param));
     utils::Assert(param.num_nodes != 0, "invalid model");
     fo.Write(BeginPtr(nodes), sizeof(Node) * nodes.size());
@@ -462,7 +462,7 @@ class TreeModel {
 
 /*! \brief node statistics used in regression tree */
 struct RTreeNodeStat {
-  /*! \brief loss chg caused by current split */
+  /*! \brief loss change caused by current split */
   float loss_chg;
   /*! \brief sum of hessian values, used to measure coverage of data */
   float sum_hess;
@@ -485,7 +485,7 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat>{
  public:
   /*!
    * \brief dense feature vector that can be taken by RegTree
-   * to do tranverse efficiently
+   * to do traverse efficiently
    * and can be construct from sparse feature vector
    */
   struct FVec {
@@ -498,7 +498,7 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat>{
       int flag;
     };
     std::vector<Entry> data;
-    /*! \brief intialize the vector with size vector */
+    /*! \brief initialize the vector with size vector */
     inline void Init(size_t size) {
       Entry e; e.flag = -1;
       data.resize(size);
@@ -529,14 +529,14 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat>{
   };
   /*!
    * \brief get the leaf index
-   * \param feats dense feature vector, if the feature is missing the field is set to NaN
-   * \param root_gid starting root index of the instance
+   * \param feat dense feature vector, if the feature is missing the field is set to NaN
+   * \param root_id starting root index of the instance
    * \return the leaf index of the given feature
    */
-  inline int GetLeafIndex(const FVec&feat, unsigned root_id = 0) const {
+  inline int GetLeafIndex(const FVec &feat, unsigned root_id = 0) const {
     // start from groups that belongs to current data
     int pid = static_cast<int>(root_id);
-    // tranverse tree
+    // traverse tree
     while (!(*this)[ pid ].is_leaf()) {
       unsigned split_index = (*this)[pid].split_index();
       pid = this->GetNext(pid, feat.fvalue(split_index), feat.is_missing(split_index));
@@ -546,7 +546,7 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat>{
   /*!
    * \brief get the prediction of regression tree, only accepts dense feature vector
    * \param feats dense feature vector, if the feature is missing the field is set to NaN
-   * \param root_gid starting root index of the instance
+   * \param root_id starting root index of the instance
    * \return the leaf index of the given feature
    */
   inline float Predict(const FVec &feat, unsigned root_id = 0) const {
diff --git a/src/tree/param.h b/src/tree/param.h
index c6060ffbf..364e3572d 100644
--- a/src/tree/param.h
+++ b/src/tree/param.h
@@ -32,7 +32,7 @@ struct TrainParam{
   // default direction choice
   int default_direction;
   // maximum delta update we can add in weight estimation
-  // this parameter can be used to stablize update
+  // this parameter can be used to stabilize update
   // default=0 means no constraint on weight delta
   float max_delta_step;
   // whether we want to do subsample
@@ -51,7 +51,7 @@ struct TrainParam{
   int size_leaf_vector;
   // option for parallelization
   int parallel_option;
-  // option to open cacheline optimizaton
+  // option to open cacheline optimization
   int cache_opt;
   // number of threads to be used for tree construction,
   // if OpenMP is enabled, if equals 0, use system default
@@ -132,7 +132,7 @@ struct TrainParam{
       }
     }
   }
-  // calculate cost of loss function with four stati
+  // calculate cost of loss function with four statistics
   inline double CalcGain(double sum_grad, double sum_hess,
                          double test_grad, double test_hess) const {
     double w = CalcWeight(sum_grad, sum_hess);
@@ -167,7 +167,7 @@ struct TrainParam{
   inline bool need_backward_search(float col_density, bool indicator) const {
     return this->default_direction != 2;
   }
-  /*! \brief given the loss change, whether we need to invode prunning */
+  /*! \brief given the loss change, whether we need to invoke pruning */
   inline bool need_prune(double loss_chg, int depth) const {
     return loss_chg < this->min_split_loss;
   }
@@ -235,7 +235,7 @@ struct GradStats {
     const bst_gpair &b = gpair[ridx];
     this->Add(b.grad, b.hess);
   }
-  /*! \brief caculate leaf weight */
+  /*! \brief calculate leaf weight */
   inline double CalcWeight(const TrainParam &param) const {
     return param.CalcWeight(sum_grad, sum_hess);
   }
@@ -362,10 +362,10 @@ struct SplitEntry{
   /*! \brief constructor */
   SplitEntry(void) : loss_chg(0.0f), sindex(0), split_value(0.0f) {}
   /*!
-   * \brief decides whether a we can replace current entry with the statistics given
-   *   This function gives better priority to lower index when loss_chg equals
-   *    not the best way, but helps to give consistent result during multi-thread execution
-   * \param loss_chg the loss reduction get through the split
+   * \brief decides whether we can replace current entry with the given statistics
+   *   This function gives better priority to lower index when loss_chg == new_loss_chg.
+   *   Not the best way, but helps to give consistent result during multi-thread execution.
+   * \param new_loss_chg the loss reduction get through the split
    * \param split_index the feature index where the split is on
    */
   inline bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const {
@@ -392,9 +392,9 @@ struct SplitEntry{
   }
   /*!
    * \brief update the split entry, replace it if e is better
-   * \param loss_chg loss reduction of new candidate
+   * \param new_loss_chg loss reduction of new candidate
    * \param split_index feature index to split on
-   * \param split_value the split point
+   * \param new_split_value the split point
    * \param default_left whether the missing value goes to left
    * \return whether the proposed split is better and can replace current split
    */
diff --git a/src/tree/updater.h b/src/tree/updater.h
index 1cf74a699..ff4da5e98 100644
--- a/src/tree/updater.h
+++ b/src/tree/updater.h
@@ -26,11 +26,11 @@ class IUpdater {
    */
   virtual void SetParam(const char *name, const char *val) = 0;
   /*!
-   * \brief peform update to the tree models
+   * \brief perform update to the tree models
    * \param gpair the gradient pair statistics of the data
    * \param p_fmat feature matrix that provide access to features
    * \param info extra side information that may be need, such as root index
-   * \param trees pointer to the trees to be updated, upater will change the content of the tree
+   * \param trees references the trees to be updated, updater will change the content of trees
    *   note: all the trees in the vector are updated, with the same statistics,
    *         but maybe different random seeds, usually one tree is passed in at a time,
    *         there can be multiple trees when we train random forest style model
@@ -53,7 +53,7 @@ class IUpdater {
   virtual ~IUpdater(void) {}
 };
 /*!
- * \brief create a updater based on name
+ * \brief create an updater based on name
  * \param name name of updater
  * \return return the updater instance
  */
diff --git a/src/tree/updater_colmaker-inl.hpp b/src/tree/updater_colmaker-inl.hpp
index e3070d495..1f89f7ed4 100644
--- a/src/tree/updater_colmaker-inl.hpp
+++ b/src/tree/updater_colmaker-inl.hpp
@@ -17,7 +17,7 @@
 
 namespace xgboost {
 namespace tree {
-/*! \brief colunwise update to construct a tree */
+/*! \brief column-wise update to construct a tree */
 template<typename TStats>
 class ColMaker: public IUpdater {
  public:
diff --git a/src/tree/updater_prune-inl.hpp b/src/tree/updater_prune-inl.hpp
index dc99e94e4..2b90646be 100644
--- a/src/tree/updater_prune-inl.hpp
+++ b/src/tree/updater_prune-inl.hpp
@@ -14,7 +14,7 @@
 
 namespace xgboost {
 namespace tree {
-/*! \brief pruner that prunes a tree after growing finishs */
+/*! \brief pruner that prunes a tree after growing finishes */
 class TreePruner: public IUpdater {
  public:
   virtual ~TreePruner(void) {}
@@ -56,7 +56,7 @@ class TreePruner: public IUpdater {
       return npruned;
     }
   }
-  /*! \brief do prunning of a tree */
+  /*! \brief do pruning of a tree */
   inline void DoPrune(RegTree &tree) { // NOLINT(*)
     int npruned = 0;
     // initialize auxiliary statistics
@@ -69,7 +69,7 @@ class TreePruner: public IUpdater {
       }
     }
     if (silent == 0) {
-      utils::Printf("tree prunning end, %d roots, %d extra nodes, %d pruned nodes ,max_depth=%d\n",
+      utils::Printf("tree pruning end, %d roots, %d extra nodes, %d pruned nodes ,max_depth=%d\n",
                     tree.param.num_roots, tree.num_extra_nodes(), npruned, tree.MaxDepth());
     }
   }
diff --git a/src/utils/base64-inl.h b/src/utils/base64-inl.h
index 49cd65254..be99e07b7 100644
--- a/src/utils/base64-inl.h
+++ b/src/utils/base64-inl.h
@@ -91,7 +91,7 @@ class Base64InStream: public IStream {
    * call this function before actually start read
    */
   inline void InitPosition(void) {
-    // get a charater
+    // get a character
     do {
       tmp_ch = reader_.GetChar();
     } while (isspace(tmp_ch));
@@ -223,7 +223,7 @@ class Base64OutStream: public IStream {
   }
   /*!
    * \brief finish writing of all current base64 stream, do some post processing
-   * \param endch charater to put to end of stream, if it is EOF, then nothing will be done
+   * \param endch character to put to end of stream, if it is EOF, then nothing will be done
    */
   inline void Finish(char endch = EOF) {
     using base64::EncodeTable;
diff --git a/src/utils/fmap.h b/src/utils/fmap.h
index 218a61aa4..cc06b7021 100644
--- a/src/utils/fmap.h
+++ b/src/utils/fmap.h
@@ -58,7 +58,7 @@ class FeatMap {
   }
   /*! \brief return type of specific feature */
   const Type& type(size_t idx) const {
-    utils::Assert(idx < names_.size(), "utils::FMap::name feature index exceed bound");
+    utils::Assert(idx < names_.size(), "utils::FMap::type feature index exceed bound");
     return types_[idx];
   }
 
diff --git a/src/utils/iterator.h b/src/utils/iterator.h
index 5d986b2e4..73068dbbf 100644
--- a/src/utils/iterator.h
+++ b/src/utils/iterator.h
@@ -23,7 +23,7 @@ class IIterator {
    * \param val value of parameter
    */
   virtual void SetParam(const char *name, const char *val) {}
-  /*! \brief initalize the iterator so that we can use the iterator */
+  /*! \brief initialize the iterator so that we can use the iterator */
   virtual void Init(void) {}
   /*! \brief set before first of the item */
   virtual void BeforeFirst(void) = 0;
diff --git a/src/utils/quantile.h b/src/utils/quantile.h
index adcd0222d..d1c029f65 100644
--- a/src/utils/quantile.h
+++ b/src/utils/quantile.h
@@ -214,7 +214,7 @@ struct WQSummary {
   /*!
    * \brief set current summary to be merged summary of sa and sb
    * \param sa first input summary to be merged
-   * \param sb second input summar to be merged
+   * \param sb second input summary to be merged
    */
   inline void SetCombine(const WQSummary &sa,
                          const WQSummary &sb) {
@@ -329,7 +329,7 @@ struct WQSummary {
   }
 };
 
-/*! \brief try to do efficient prunning */
+/*! \brief try to do efficient pruning */
 template<typename DType, typename RType>
 struct WXQSummary : public WQSummary<DType, RType> {
   // redefine entry type
@@ -364,7 +364,7 @@ struct WXQSummary : public WQSummary<DType, RType> {
     RType mrange = 0;
     {
       // first scan, grab all the big chunk
-      // moviing block index
+      // moving block index
       size_t bid = 0;
       for (size_t i = 1; i < src.size; ++i) {
         if (CheckLarge(src.data[i], chunk)) {
@@ -574,7 +574,7 @@ struct GKSummary {
 };
 
 /*!
- * \brief template for all quantle sketch algorithm
+ * \brief template for all quantile sketch algorithm
  *        that uses merge/prune scheme
  * \tparam DType type of data content
  * \tparam RType type of rank
@@ -605,7 +605,7 @@ class QuantileSketchTemplate {
     }
     /*!
      * \brief set the space to be merge of all Summary arrays
-     * \param begin begining position in th summary array
+     * \param begin beginning position in the summary array
      * \param end ending position in the Summary array
      */
     inline void SetMerge(const Summary *begin,
@@ -664,7 +664,7 @@ class QuantileSketchTemplate {
     }
   };
   /*!
-   * \brief intialize the quantile sketch, given the performance specification
+   * \brief initialize the quantile sketch, given the performance specification
    * \param maxn maximum number of data points can be feed into sketch
    * \param eps accuracy level of summary
    */
@@ -688,7 +688,7 @@ class QuantileSketchTemplate {
   }
   /*!
    * \brief add an element to a sketch
-   * \param x the elemented added to the sketch
+   * \param x the element added to the sketch
    */
   inline void Push(DType x, RType w = 1) {
     if (w == static_cast<RType>(0)) return;
diff --git a/src/utils/random.h b/src/utils/random.h
index 7d52c2ae7..8e3255cf3 100644
--- a/src/utils/random.h
+++ b/src/utils/random.h
@@ -27,7 +27,7 @@ inline void Seed(unsigned seed) {
 inline double Uniform(void) {
   return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX)+1.0); // NOLINT(*)
 }
-/*! \brief return a real numer uniform in (0,1) */
+/*! \brief return a real number uniform in (0,1) */
 inline double NextDouble2(void) {
   return (static_cast<double>(rand()) + 1.0) / (static_cast<double>(RAND_MAX)+2.0); // NOLINT(*)
 }
diff --git a/src/utils/thread_buffer.h b/src/utils/thread_buffer.h
index bc4fb9f5e..8acb8ffd0 100644
--- a/src/utils/thread_buffer.h
+++ b/src/utils/thread_buffer.h
@@ -21,8 +21,8 @@ namespace utils {
 #if !defined(XGBOOST_STRICT_CXX98_)
 /*!
  * \brief buffered loading iterator that uses multithread
- * this template method will assume the following paramters
- * \tparam Elem elememt type to be buffered
+ * this template method will assume the following parameters
+ * \tparam Elem element type to be buffered
  * \tparam ElemFactory factory type to implement in order to use thread buffer
  */
 template<typename Elem, typename ElemFactory>
@@ -45,7 +45,7 @@ class ThreadBuffer {
   /*!
    * \brief initalize the buffered iterator
    * \param param a initialize parameter that will pass to factory, ignore it if not necessary
-   * \return false if the initlization can't be done, e.g. buffer file hasn't been created
+   * \return false if the initialization can't be done, e.g. buffer file hasn't been created
    */
   inline bool Init(void) {
     if (!factory.Init()) return false;
@@ -61,7 +61,7 @@ class ThreadBuffer {
   inline void BeforeFirst(void) {
     // wait till last loader end
     loading_end.Wait();
-    // critcal zone
+    // critical zone
     current_buf = 1;
     factory.BeforeFirst();
     // reset terminate limit
diff --git a/src/utils/utils.h b/src/utils/utils.h
index 7a8f18390..4d06d3c61 100644
--- a/src/utils/utils.h
+++ b/src/utils/utils.h
@@ -62,7 +62,7 @@ const int kPrintBuffer = 1 << 12;
 
 #ifndef XGBOOST_CUSTOMIZE_MSG_
 /*!
- * \brief handling of Assert error, caused by in-apropriate input
+ * \brief handling of Assert error, caused by inappropriate input
  * \param msg error message
  */
 inline void HandleAssertError(const char *msg) {
@@ -70,7 +70,7 @@ inline void HandleAssertError(const char *msg) {
   exit(-1);
 }
 /*!
- * \brief handling of Check error, caused by in-apropriate input
+ * \brief handling of Check error, caused by inappropriate input
  * \param msg error message
  */
 inline void HandleCheckError(const char *msg) {
@@ -157,7 +157,7 @@ inline std::FILE *FopenCheck(const char *fname, const char *flag) {
   return fp;
 }
 }  // namespace utils
-// easy utils that can be directly acessed in xgboost
+// easy utils that can be directly accessed in xgboost
 /*! \brief get the beginning address of a vector */
 template<typename T>
 inline T *BeginPtr(std::vector<T> &vec) { // NOLINT(*)