From 0162bb7034f224dfcb3ecd290b2c7d2ad316fe86 Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 3 Jul 2015 18:31:52 -0700 Subject: [PATCH] lint half way --- src/gbm/gblinear-inl.hpp | 17 +-- src/gbm/gbm.cpp | 1 + src/gbm/gbm.h | 27 ++-- src/gbm/gbtree-inl.hpp | 46 +++---- src/io/io.h | 9 +- src/tree/model.h | 80 ++++++------ src/tree/param.h | 49 ++++---- src/tree/updater.cpp | 1 + src/tree/updater.h | 20 +-- src/tree/updater_basemaker-inl.hpp | 77 +++++++----- src/tree/updater_colmaker-inl.hpp | 141 +++++++++++++-------- src/tree/updater_distcol-inl.hpp | 32 +++-- src/tree/updater_histmaker-inl.hpp | 92 +++++++------- src/tree/updater_prune-inl.hpp | 15 ++- src/tree/updater_refresh-inl.hpp | 12 +- src/tree/updater_skmaker-inl.hpp | 59 +++++---- src/tree/updater_sync-inl.hpp | 13 +- src/utils/config.h | 32 ++--- src/utils/group_data.h | 20 +-- src/utils/io.h | 26 ++-- src/utils/thread.h | 195 +++++++++++++++++++++-------- 21 files changed, 573 insertions(+), 391 deletions(-) diff --git a/src/gbm/gblinear-inl.hpp b/src/gbm/gblinear-inl.hpp index 3d2f36f5f..17d90e556 100644 --- a/src/gbm/gblinear-inl.hpp +++ b/src/gbm/gblinear-inl.hpp @@ -1,11 +1,13 @@ -#ifndef XGBOOST_GBM_GBLINEAR_INL_HPP_ -#define XGBOOST_GBM_GBLINEAR_INL_HPP_ /*! + * Copyright by Contributors * \file gblinear-inl.hpp * \brief Implementation of Linear booster, with L1/L2 regularization: Elastic Net * the update rule is parallel coordinate descent (shotgun) * \author Tianqi Chen */ +#ifndef XGBOOST_GBM_GBLINEAR_INL_HPP_ +#define XGBOOST_GBM_GBLINEAR_INL_HPP_ + #include #include #include @@ -33,10 +35,10 @@ class GBLinear : public IGradBooster { model.param.SetParam(name, val); } } - virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) { + virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) { // NOLINT(*) model.LoadModel(fi); } - virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const { + virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const { // NOLINT(*) model.SaveModel(fo); } virtual void InitModel(void) { @@ -92,7 +94,8 @@ class GBLinear : public IGradBooster { sum_hess += p.hess * v * v; } float &w = model[fid][gid]; - bst_float dw = static_cast(param.learning_rate * param.CalcDelta(sum_grad, sum_hess, w)); + bst_float dw = static_cast(param.learning_rate * + param.CalcDelta(sum_grad, sum_hess, w)); w += dw; // update grad value for (bst_uint j = 0; j < col.length; ++j) { @@ -258,12 +261,12 @@ class GBLinear : public IGradBooster { std::fill(weight.begin(), weight.end(), 0.0f); } // save the model to file - inline void SaveModel(utils::IStream &fo) const { + inline void SaveModel(utils::IStream &fo) const { // NOLINT(*) fo.Write(¶m, sizeof(Param)); fo.Write(weight); } // load model from file - inline void LoadModel(utils::IStream &fi) { + inline void LoadModel(utils::IStream &fi) { // NOLINT(*) utils::Assert(fi.Read(¶m, sizeof(Param)) != 0, "Load LinearBooster"); fi.Read(&weight); } diff --git a/src/gbm/gbm.cpp b/src/gbm/gbm.cpp index fe8d778e4..13ad44c57 100644 --- a/src/gbm/gbm.cpp +++ b/src/gbm/gbm.cpp @@ -1,3 +1,4 @@ +// Copyright by Contributors #define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_DEPRECATE #define NOMINMAX diff --git a/src/gbm/gbm.h b/src/gbm/gbm.h index f07d277ac..60b7474e1 100644 --- a/src/gbm/gbm.h +++ b/src/gbm/gbm.h @@ -1,11 +1,14 @@ -#ifndef XGBOOST_GBM_GBM_H_ -#define XGBOOST_GBM_GBM_H_ /*! + * Copyright by Contributors * \file gbm.h * \brief interface of gradient booster, that learns through gradient statistics * \author Tianqi Chen */ +#ifndef XGBOOST_GBM_GBM_H_ +#define XGBOOST_GBM_GBM_H_ + #include +#include #include "../data.h" #include "../utils/io.h" #include "../utils/fmap.h" @@ -13,7 +16,7 @@ namespace xgboost { /*! \brief namespace for gradient booster */ namespace gbm { -/*! +/*! * \brief interface of gradient boosting model */ class IGradBooster { @@ -29,26 +32,26 @@ class IGradBooster { * \param fi input stream * \param with_pbuffer whether the incoming data contains pbuffer */ - virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) = 0; + virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) = 0; // NOLINT(*) /*! * \brief save model to stream * \param fo output stream * \param with_pbuffer whether save out pbuffer */ - virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const = 0; + virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const = 0; // NOLINT(*) /*! * \brief initialize the model */ virtual void InitModel(void) = 0; - /*! + /*! * \brief reset the predict buffer * this will invalidate all the previous cached results * and recalculate from scratch */ virtual void ResetPredBuffer(size_t num_pbuffer) {} - /*! + /*! * \brief whether the model allow lazy checkpoint - * return true if model is only updated in DoBoost + * return true if model is only updated in DoBoost * after all Allreduce calls */ virtual bool AllowLazyCheckPoint(void) const { @@ -76,20 +79,20 @@ class IGradBooster { * the size of buffer is set by convention using IGradBooster.SetParam("num_pbuffer","size") * \param info extra side information that may be needed for prediction * \param out_preds output vector to hold the predictions - * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means + * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means * we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear */ virtual void Predict(IFMatrix *p_fmat, int64_t buffer_offset, const BoosterInfo &info, std::vector *out_preds, - unsigned ntree_limit = 0) = 0; + unsigned ntree_limit = 0) = 0; /*! * \brief online prediction funciton, predict score for one instance at a time * NOTE: use the batch prediction interface if possible, batch prediction is usually * more efficient than online prediction * This function is NOT threadsafe, make sure you only call from one thread - * + * * \param inst the instance you want to predict * \param out_preds output vector to hold the predictions * \param ntree_limit limit the number of trees used in prediction @@ -106,7 +109,7 @@ class IGradBooster { * \param p_fmat feature matrix * \param info extra side information that may be needed for prediction * \param out_preds output vector to hold the predictions - * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means + * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means * we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear */ virtual void PredictLeaf(IFMatrix *p_fmat, diff --git a/src/gbm/gbtree-inl.hpp b/src/gbm/gbtree-inl.hpp index c868c302a..9335ef8e7 100644 --- a/src/gbm/gbtree-inl.hpp +++ b/src/gbm/gbtree-inl.hpp @@ -1,13 +1,16 @@ -#ifndef XGBOOST_GBM_GBTREE_INL_HPP_ -#define XGBOOST_GBM_GBTREE_INL_HPP_ /*! + * Copyright by Contributors * \file gbtree-inl.hpp * \brief gradient boosted tree implementation * \author Tianqi Chen */ +#ifndef XGBOOST_GBM_GBTREE_INL_HPP_ +#define XGBOOST_GBM_GBTREE_INL_HPP_ + #include #include #include +#include #include "./gbm.h" #include "../utils/omp.h" #include "../tree/updater.h" @@ -39,7 +42,7 @@ class GBTree : public IGradBooster { tparam.SetParam(name, val); if (trees.size() == 0) mparam.SetParam(name, val); } - virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) { + virtual void LoadModel(utils::IStream &fi, bool with_pbuffer) { // NOLINT(*) this->Clear(); utils::Check(fi.Read(&mparam, sizeof(ModelParam)) != 0, "GBTree: invalid model file"); @@ -62,10 +65,10 @@ class GBTree : public IGradBooster { "GBTree: invalid model file"); } } - virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const { + virtual void SaveModel(utils::IStream &fo, bool with_pbuffer) const { // NOLINT(*) utils::Assert(mparam.num_trees == static_cast(trees.size()), "GBTree"); if (with_pbuffer) { - fo.Write(&mparam, sizeof(ModelParam)); + fo.Write(&mparam, sizeof(ModelParam)); } else { ModelParam p = mparam; p.num_pbuffer = 0; @@ -129,7 +132,7 @@ class GBTree : public IGradBooster { int64_t buffer_offset, const BoosterInfo &info, std::vector *out_preds, - unsigned ntree_limit = 0) { + unsigned ntree_limit = 0) { int nthread; #pragma omp parallel { @@ -160,12 +163,12 @@ class GBTree : public IGradBooster { this->Pred(batch[i], buffer_offset < 0 ? -1 : buffer_offset + ridx, gid, info.GetRoot(ridx), &feats, - &preds[ridx * mparam.num_output_group + gid], stride, + &preds[ridx * mparam.num_output_group + gid], stride, ntree_limit); } } } - } + } virtual void Predict(const SparseBatch::Inst &inst, std::vector *out_preds, unsigned ntree_limit, @@ -178,10 +181,10 @@ class GBTree : public IGradBooster { // loop over output groups for (int gid = 0; gid < mparam.num_output_group; ++gid) { this->Pred(inst, -1, gid, root_index, &thread_temp[0], - &(*out_preds)[gid], mparam.num_output_group, + &(*out_preds)[gid], mparam.num_output_group, ntree_limit); } - } + } virtual void PredictLeaf(IFMatrix *p_fmat, const BoosterInfo &info, std::vector *out_preds, @@ -196,7 +199,6 @@ class GBTree : public IGradBooster { thread_temp[i].Init(mparam.num_feature); } this->PredPath(p_fmat, info, out_preds, ntree_limit); - } virtual std::vector DumpModel(const utils::FeatMap& fmap, int option) { std::vector dump; @@ -260,7 +262,7 @@ class GBTree : public IGradBooster { // update the trees for (size_t i = 0; i < updaters.size(); ++i) { updaters[i]->Update(gpair, p_fmat, info, new_trees); - } + } // optimization, update buffer, if possible // this is only under distributed column mode // for safety check of lazy checkpoint @@ -287,7 +289,7 @@ class GBTree : public IGradBooster { } // update buffer by pre-cached position inline void UpdateBufferByPosition(IFMatrix *p_fmat, - int64_t buffer_offset, + int64_t buffer_offset, int bst_group, const tree::RegTree &new_tree, const int* leaf_position) { @@ -313,11 +315,11 @@ class GBTree : public IGradBooster { int bst_group, unsigned root_index, tree::RegTree::FVec *p_feats, - float *out_pred, size_t stride, + float *out_pred, size_t stride, unsigned ntree_limit) { size_t itop = 0; float psum = 0.0f; - // sum of leaf vector + // sum of leaf vector std::vector vec_psum(mparam.size_leaf_vector, 0.0f); const int64_t bid = mparam.BufferOffset(buffer_index, bst_group); // number of valid trees @@ -339,7 +341,7 @@ class GBTree : public IGradBooster { for (int j = 0; j < mparam.size_leaf_vector; ++j) { vec_psum[j] += trees[i]->leafvec(tid)[j]; } - if(--treeleft == 0) break; + if (--treeleft == 0) break; } } p_feats->Drop(inst); @@ -365,7 +367,7 @@ class GBTree : public IGradBooster { // number of valid trees if (ntree_limit == 0 || ntree_limit > trees.size()) { ntree_limit = static_cast(trees.size()); - } + } std::vector &preds = *out_preds; preds.resize(info.num_row * ntree_limit); // start collecting the prediction @@ -389,7 +391,7 @@ class GBTree : public IGradBooster { } } } - + // --- data structure --- /*! \brief training parameters */ struct TrainParam { @@ -442,10 +444,10 @@ class GBTree : public IGradBooster { int num_feature; /*! \brief size of predicton buffer allocated used for buffering */ int64_t num_pbuffer; - /*! + /*! * \brief how many output group a single instance can produce * this affects the behavior of number of output we have: - * suppose we have n instance and k group, output will be k*n + * suppose we have n instance and k group, output will be k*n */ int num_output_group; /*! \brief size of leaf vector needed in tree */ @@ -478,8 +480,8 @@ class GBTree : public IGradBooster { inline size_t PredBufferSize(void) const { return num_output_group * num_pbuffer * (size_leaf_vector + 1); } - /*! - * \brief get the buffer offset given a buffer index and group id + /*! + * \brief get the buffer offset given a buffer index and group id * \return calculated buffer offset */ inline int64_t BufferOffset(int64_t buffer_index, int bst_group) const { diff --git a/src/io/io.h b/src/io/io.h index ed075977c..267bb0bff 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -1,11 +1,13 @@ -#ifndef XGBOOST_IO_IO_H_ -#define XGBOOST_IO_IO_H_ /*! + * Copyright 2014 by Contributors * \file io.h * \brief handles input data format of xgboost * I/O module handles a specific DMatrix format * \author Tianqi Chen */ +#ifndef XGBOOST_IO_IO_H_ +#define XGBOOST_IO_IO_H_ + #include "../data.h" #include "../learner/dmatrix.h" @@ -32,7 +34,7 @@ DataMatrix* LoadDataMatrix(const char *fname, bool loadsplit, const char *cache_file = NULL); /*! - * \brief save DataMatrix into stream, + * \brief save DataMatrix into stream, * note: the saved dmatrix format may not be in exactly same as input * SaveDMatrix will choose the best way to materialize the dmatrix. * \param dmat the dmatrix to be saved @@ -40,7 +42,6 @@ DataMatrix* LoadDataMatrix(const char *fname, * \param silent whether print message during saving */ void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent = false); - } // namespace io } // namespace xgboost #endif // XGBOOST_IO_IO_H_ diff --git a/src/tree/model.h b/src/tree/model.h index 4eea34911..6a22aa5f1 100644 --- a/src/tree/model.h +++ b/src/tree/model.h @@ -1,10 +1,12 @@ -#ifndef XGBOOST_TREE_MODEL_H_ -#define XGBOOST_TREE_MODEL_H_ /*! + * Copyright 2014 by Contributors * \file model.h * \brief model structure for tree * \author Tianqi Chen */ +#ifndef XGBOOST_TREE_MODEL_H_ +#define XGBOOST_TREE_MODEL_H_ + #include #include #include @@ -19,7 +21,7 @@ namespace xgboost { namespace tree { /*! - * \brief template class of TreeModel + * \brief template class of TreeModel * \tparam TSplitCond data type to indicate split condition * \tparam TNodeStat auxiliary statistics of node to help tree building */ @@ -42,7 +44,7 @@ class TreeModel { int max_depth; /*! \brief number of features used for tree construction */ int num_feature; - /*! + /*! * \brief leaf vector size, used for vector tree * used to store more than one dimensional information in tree */ @@ -55,8 +57,8 @@ class TreeModel { size_leaf_vector = 0; std::memset(reserved, 0, sizeof(reserved)); } - /*! - * \brief set parameters from outside + /*! + * \brief set parameters from outside * \param name name of the parameter * \param val value of the parameter */ @@ -70,7 +72,7 @@ class TreeModel { /*! \brief tree node */ class Node { public: - Node(void) : sindex_(0) {} + Node(void) : sindex_(0) {} /*! \brief index of left child */ inline int cleft(void) const { return this->cleft_; @@ -119,15 +121,15 @@ class TreeModel { inline bool is_root(void) const { return parent_ == -1; } - /*! - * \brief set the right child + /*! + * \brief set the right child * \param nide node id to right child */ inline void set_right_child(int nid) { this->cright_ = nid; } - /*! - * \brief set split condition of current node + /*! + * \brief set split condition of current node * \param split_index feature index to split * \param split_cond split condition * \param default_left the default direction when feature is unknown @@ -138,10 +140,10 @@ class TreeModel { this->sindex_ = split_index; (this->info_).split_cond = split_cond; } - /*! + /*! * \brief set the leaf value of the node * \param value leaf value - * \param right right index, could be used to store + * \param right right index, could be used to store * additional information */ inline void set_leaf(float value, int right = -1) { @@ -153,12 +155,12 @@ class TreeModel { inline void mark_delete(void) { this->sindex_ = std::numeric_limits::max(); } - + private: friend class TreeModel; - /*! - * \brief in leaf node, we have weights, in non-leaf nodes, - * we have split condition + /*! + * \brief in leaf node, we have weights, in non-leaf nodes, + * we have split condition */ union Info{ float leaf_value; @@ -203,7 +205,7 @@ class TreeModel { "number of nodes in the tree exceed 2^31"); nodes.resize(param.num_nodes); stats.resize(param.num_nodes); - leaf_vector.resize(param.num_nodes * param.size_leaf_vector); + leaf_vector.resize(param.num_nodes * param.size_leaf_vector); return nd; } // delete a tree node, keep the parent field to allow trace back @@ -215,7 +217,7 @@ class TreeModel { } public: - /*! + /*! * \brief change a non leaf node to a leaf node, delete its children * \param rid node id of the node * \param new leaf value @@ -229,7 +231,7 @@ class TreeModel { this->DeleteNode(nodes[rid].cright()); nodes[rid].set_leaf(value); } - /*! + /*! * \brief collapse a non leaf node to a leaf node, delete its children * \param rid node id of the node * \param new leaf value @@ -273,7 +275,7 @@ class TreeModel { return &leaf_vector[nid * param.size_leaf_vector]; } /*! \brief get leaf vector given nid */ - inline const bst_float* leafvec(int nid) const{ + inline const bst_float* leafvec(int nid) const { if (leaf_vector.size() == 0) return NULL; return &leaf_vector[nid * param.size_leaf_vector]; } @@ -288,15 +290,15 @@ class TreeModel { nodes[i].set_parent(-1); } } - /*! + /*! * \brief load model from stream * \param fi input stream */ - inline void LoadModel(utils::IStream &fi) { + inline void LoadModel(utils::IStream &fi) { // NOLINT(*) utils::Check(fi.Read(¶m, sizeof(Param)) > 0, "TreeModel: wrong format"); nodes.resize(param.num_nodes); stats.resize(param.num_nodes); - utils::Assert(param.num_nodes != 0, "invalid model"); + utils::Assert(param.num_nodes != 0, "invalid model"); utils::Check(fi.Read(BeginPtr(nodes), sizeof(Node) * nodes.size()) > 0, "TreeModel: wrong format"); utils::Check(fi.Read(BeginPtr(stats), sizeof(NodeStat) * stats.size()) > 0, @@ -313,22 +315,22 @@ class TreeModel { "number of deleted nodes do not match, num_deleted=%d, dnsize=%lu, num_nodes=%d", param.num_deleted, deleted_nodes.size(), param.num_nodes); } - /*! + /*! * \brief save model to stream * \param fo output stream */ - inline void SaveModel(utils::IStream &fo) const { + inline void SaveModel(utils::IStream &fo) const { // NOLINT(*) utils::Assert(param.num_nodes == static_cast(nodes.size()), "Tree::SaveModel"); utils::Assert(param.num_nodes == static_cast(stats.size()), "Tree::SaveModel"); fo.Write(¶m, sizeof(Param)); - utils::Assert(param.num_nodes != 0, "invalid model"); + utils::Assert(param.num_nodes != 0, "invalid model"); fo.Write(BeginPtr(nodes), sizeof(Node) * nodes.size()); fo.Write(BeginPtr(stats), sizeof(NodeStat) * nodes.size()); if (param.size_leaf_vector != 0) fo.Write(leaf_vector); } - /*! + /*! * \brief add child nodes to node * \param nid node id to add childs */ @@ -340,8 +342,8 @@ class TreeModel { nodes[nodes[nid].cleft() ].set_parent(nid, true); nodes[nodes[nid].cright()].set_parent(nid, false); } - /*! - * \brief only add a right child to a leaf node + /*! + * \brief only add a right child to a leaf node * \param node id to add right child */ inline void AddRightChild(int nid) { @@ -385,7 +387,7 @@ class TreeModel { inline int num_extra_nodes(void) const { return param.num_nodes - param.num_roots - param.num_deleted; } - /*! + /*! * \brief dump model to text string * \param fmap feature map of feature types * \param with_stats whether dump out statistics as well @@ -400,7 +402,7 @@ class TreeModel { } private: - void Dump(int nid, std::stringstream &fo, + void Dump(int nid, std::stringstream &fo, // NOLINT(*) const utils::FeatMap& fmap, int depth, bool with_stats) { for (int i = 0; i < depth; ++i) { fo << '\t'; @@ -469,7 +471,7 @@ struct RTreeNodeStat { /*! \brief number of child that is leaf node known up to now */ int leaf_child_cnt; /*! \brief print information of current stats to fo */ - inline void Print(std::stringstream &fo, bool is_leaf) const { + inline void Print(std::stringstream &fo, bool is_leaf) const { // NOLINT(*) if (!is_leaf) { fo << ",gain=" << loss_chg << ",cover=" << sum_hess; } else { @@ -481,13 +483,13 @@ struct RTreeNodeStat { /*! \brief define regression tree to be the most common tree model */ class RegTree: public TreeModel{ public: - /*! + /*! * \brief dense feature vector that can be taken by RegTree * to do tranverse efficiently * and can be construct from sparse feature vector */ struct FVec { - /*! + /*! * \brief a union value of value and flag * when flag == -1, this indicate the value is missing */ @@ -510,7 +512,7 @@ class RegTree: public TreeModel{ } } /*! \brief drop the trace after fill, must be called after fill */ - inline void Drop(const RowBatch::Inst &inst) { + inline void Drop(const RowBatch::Inst &inst) { for (bst_uint i = 0; i < inst.length; ++i) { if (inst[i].index >= data.size()) continue; data[inst[i].index].flag = -1; @@ -526,10 +528,10 @@ class RegTree: public TreeModel{ } }; /*! - * \brief get the leaf index + * \brief get the leaf index * \param feats dense feature vector, if the feature is missing the field is set to NaN * \param root_gid starting root index of the instance - * \return the leaf index of the given feature + * \return the leaf index of the given feature */ inline int GetLeafIndex(const FVec&feat, unsigned root_id = 0) const { // start from groups that belongs to current data @@ -545,7 +547,7 @@ class RegTree: public TreeModel{ * \brief get the prediction of regression tree, only accepts dense feature vector * \param feats dense feature vector, if the feature is missing the field is set to NaN * \param root_gid starting root index of the instance - * \return the leaf index of the given feature + * \return the leaf index of the given feature */ inline float Predict(const FVec &feat, unsigned root_id = 0) const { int pid = this->GetLeafIndex(feat, root_id); diff --git a/src/tree/param.h b/src/tree/param.h index 20ba1e6c0..f06365a17 100644 --- a/src/tree/param.h +++ b/src/tree/param.h @@ -1,10 +1,13 @@ -#ifndef XGBOOST_TREE_PARAM_H_ -#define XGBOOST_TREE_PARAM_H_ /*! + * Copyright 2014 by Contributors * \file param.h * \brief training parameters, statistics used to support tree construction * \author Tianqi Chen */ +#ifndef XGBOOST_TREE_PARAM_H_ +#define XGBOOST_TREE_PARAM_H_ + +#include #include #include "../data.h" @@ -27,7 +30,7 @@ struct TrainParam{ // L1 regularization factor float reg_alpha; // default direction choice - int default_direction; + int default_direction; // maximum delta update we can add in weight estimation // this parameter can be used to stablize update // default=0 means no constraint on weight delta @@ -45,7 +48,7 @@ struct TrainParam{ // accuracy of sketch float sketch_ratio; // leaf vector size - int size_leaf_vector; + int size_leaf_vector; // option for parallelization int parallel_option; // option to open cacheline optimizaton @@ -74,11 +77,11 @@ struct TrainParam{ sketch_ratio = 2.0f; cache_opt = 1; } - /*! - * \brief set parameters from outside + /*! + * \brief set parameters from outside * \param name name of the parameter * \param val value of the parameter - */ + */ inline void SetParam(const char *name, const char *val) { using namespace std; // sync-names @@ -116,7 +119,7 @@ struct TrainParam{ if (reg_alpha == 0.0f) { return Sqr(sum_grad) / (sum_hess + reg_lambda); } else { - return Sqr(ThresholdL1(sum_grad, reg_alpha)) / (sum_hess + reg_lambda); + return Sqr(ThresholdL1(sum_grad, reg_alpha)) / (sum_hess + reg_lambda); } } else { double w = CalcWeight(sum_grad, sum_hess); @@ -213,7 +216,7 @@ struct GradStats { inline static void CheckInfo(const BoosterInfo &info) { } /*! - * \brief accumulate statistics + * \brief accumulate statistics * \param p the gradient pair */ inline void Add(bst_gpair p) { @@ -222,7 +225,7 @@ struct GradStats { /*! * \brief accumulate statistics, more complicated version * \param gpair the vector storing the gradient statistics - * \param info the additional information + * \param info the additional information * \param ridx instance index of this instance */ inline void Add(const std::vector &gpair, @@ -244,7 +247,7 @@ struct GradStats { this->Add(b.sum_grad, b.sum_hess); } /*! \brief same as add, reduce is used in All Reduce */ - inline static void Reduce(GradStats &a, const GradStats &b) { + inline static void Reduce(GradStats &a, const GradStats &b) { // NOLINT(*) a.Add(b); } /*! \brief set current value to a - b */ @@ -257,8 +260,8 @@ struct GradStats { return sum_hess == 0.0; } /*! \brief set leaf vector value based on statistics */ - inline void SetLeafVec(const TrainParam ¶m, bst_float *vec) const{ - } + inline void SetLeafVec(const TrainParam ¶m, bst_float *vec) const { + } // constructor to allow inheritance GradStats(void) {} /*! \brief add statistics to the data */ @@ -311,7 +314,7 @@ struct CVGradStats : public GradStats { ret += param.CalcGain(train[i].sum_grad, train[i].sum_hess, vsize * valid[i].sum_grad, - vsize * valid[i].sum_hess); + vsize * valid[i].sum_hess); } return ret / vsize; } @@ -324,7 +327,7 @@ struct CVGradStats : public GradStats { } } /*! \brief same as add, reduce is used in All Reduce */ - inline static void Reduce(CVGradStats &a, const CVGradStats &b) { + inline static void Reduce(CVGradStats &a, const CVGradStats &b) { // NOLINT(*) a.Add(b); } /*! \brief set current value to a - b */ @@ -344,8 +347,8 @@ struct CVGradStats : public GradStats { } }; -/*! - * \brief statistics that is helpful to store +/*! + * \brief statistics that is helpful to store * and represent a split solution for the tree */ struct SplitEntry{ @@ -357,12 +360,12 @@ struct SplitEntry{ float split_value; /*! \brief constructor */ SplitEntry(void) : loss_chg(0.0f), sindex(0), split_value(0.0f) {} - /*! - * \brief decides whether a we can replace current entry with the statistics given + /*! + * \brief decides whether a we can replace current entry with the statistics given * This function gives better priority to lower index when loss_chg equals * not the best way, but helps to give consistent result during multi-thread execution * \param loss_chg the loss reduction get through the split - * \param split_index the feature index where the split is on + * \param split_index the feature index where the split is on */ inline bool NeedReplace(bst_float new_loss_chg, unsigned split_index) const { if (this->split_index() <= split_index) { @@ -371,7 +374,7 @@ struct SplitEntry{ return !(this->loss_chg > new_loss_chg); } } - /*! + /*! * \brief update the split entry, replace it if e is better * \param e candidate split solution * \return whether the proposed split is better and can replace current split @@ -386,7 +389,7 @@ struct SplitEntry{ return false; } } - /*! + /*! * \brief update the split entry, replace it if e is better * \param loss_chg loss reduction of new candidate * \param split_index feature index to split on @@ -407,7 +410,7 @@ struct SplitEntry{ } } /*! \brief same as update, used by AllReduce*/ - inline static void Reduce(SplitEntry &dst, const SplitEntry &src) { + inline static void Reduce(SplitEntry &dst, const SplitEntry &src) { // NOLINT(*) dst.Update(src); } /*!\return feature index to split on */ diff --git a/src/tree/updater.cpp b/src/tree/updater.cpp index 5d2e99820..eb2e06925 100644 --- a/src/tree/updater.cpp +++ b/src/tree/updater.cpp @@ -1,3 +1,4 @@ +// Copyright 2014 by Contributors #define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_DEPRECATE #define NOMINMAX diff --git a/src/tree/updater.h b/src/tree/updater.h index 4ced21e5e..1cf74a699 100644 --- a/src/tree/updater.h +++ b/src/tree/updater.h @@ -1,10 +1,12 @@ -#ifndef XGBOOST_TREE_UPDATER_H_ -#define XGBOOST_TREE_UPDATER_H_ /*! + * Copyright 2014 by Contributors * \file updater.h * \brief interface to update the tree * \author Tianqi Chen */ +#ifndef XGBOOST_TREE_UPDATER_H_ +#define XGBOOST_TREE_UPDATER_H_ + #include #include "../data.h" @@ -12,7 +14,7 @@ namespace xgboost { namespace tree { -/*! +/*! * \brief interface of tree update module, that performs update of a tree */ class IUpdater { @@ -21,7 +23,7 @@ class IUpdater { * \brief set parameters from outside * \param name name of the parameter * \param val value of the parameter - */ + */ virtual void SetParam(const char *name, const char *val) = 0; /*! * \brief peform update to the tree models @@ -29,8 +31,8 @@ class IUpdater { * \param p_fmat feature matrix that provide access to features * \param info extra side information that may be need, such as root index * \param trees pointer to the trees to be updated, upater will change the content of the tree - * note: all the trees in the vector are updated, with the same statistics, - * but maybe different random seeds, usually one tree is passed in at a time, + * note: all the trees in the vector are updated, with the same statistics, + * but maybe different random seeds, usually one tree is passed in at a time, * there can be multiple trees when we train random forest style model */ virtual void Update(const std::vector &gpair, @@ -38,7 +40,7 @@ class IUpdater { const BoosterInfo &info, const std::vector &trees) = 0; - /*! + /*! * \brief this is simply a function for optimizing performance * this function asks the updater to return the leaf position of each instance in the p_fmat, * if it is cached in the updater, if it is not available, return NULL @@ -50,8 +52,8 @@ class IUpdater { // destructor virtual ~IUpdater(void) {} }; -/*! - * \brief create a updater based on name +/*! + * \brief create a updater based on name * \param name name of updater * \return return the updater instance */ diff --git a/src/tree/updater_basemaker-inl.hpp b/src/tree/updater_basemaker-inl.hpp index f144ae199..6204c47b7 100644 --- a/src/tree/updater_basemaker-inl.hpp +++ b/src/tree/updater_basemaker-inl.hpp @@ -1,12 +1,14 @@ -#ifndef XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_ -#define XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_ /*! + * Copyright 2014 by Contributors * \file updater_basemaker-inl.hpp * \brief implement a common tree constructor * \author Tianqi Chen */ +#ifndef XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_ +#define XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_ #include #include +#include #include #include "../sync/sync.h" #include "../utils/random.h" @@ -14,7 +16,7 @@ namespace xgboost { namespace tree { -/*! +/*! * \brief base tree maker class that defines common operation * needed in tree making */ @@ -26,7 +28,7 @@ class BaseMaker: public IUpdater { virtual void SetParam(const char *name, const char *val) { param.SetParam(name, val); } - + protected: // helper to collect and query feature meta information struct FMetaHelper { @@ -60,8 +62,11 @@ class BaseMaker: public IUpdater { bst_float a = fminmax[fid * 2]; bst_float b = fminmax[fid * 2 + 1]; if (a == -std::numeric_limits::max()) return 0; - if (-a == b) return 1; - else return 2; + if (-a == b) { + return 1; + } else { + return 2; + } } inline bst_float MaxValue(bst_uint fid) const { return fminmax[fid *2 + 1]; @@ -70,7 +75,7 @@ class BaseMaker: public IUpdater { std::vector &findex = *p_findex; findex.clear(); for (size_t i = 0; i < fminmax.size(); i += 2) { - const bst_uint fid = static_cast(i / 2); + const bst_uint fid = static_cast(i / 2); if (this->Type(fid) != 0) findex.push_back(fid); } unsigned n = static_cast(p * findex.size()); @@ -86,7 +91,7 @@ class BaseMaker: public IUpdater { rabit::Broadcast(&s_cache, 0); fs.Read(&findex); } - + private: std::vector fminmax; }; @@ -116,7 +121,7 @@ class BaseMaker: public IUpdater { } return nthread; } - // ------class member helpers--------- + // ------class member helpers--------- /*! \brief initialize temp data structure */ inline void InitData(const std::vector &gpair, const IFMatrix &fmat, @@ -124,7 +129,8 @@ class BaseMaker: public IUpdater { const RegTree &tree) { utils::Assert(tree.param.num_nodes == tree.param.num_roots, "TreeMaker: can only grow new tree"); - {// setup position + { + // setup position position.resize(gpair.size()); if (root_index.size() == 0) { std::fill(position.begin(), position.end(), 0); @@ -147,7 +153,8 @@ class BaseMaker: public IUpdater { } } } - {// expand query + { + // expand query qexpand.reserve(256); qexpand.clear(); for (int i = 0; i < tree.param.num_roots; ++i) { qexpand.push_back(i); @@ -170,7 +177,7 @@ class BaseMaker: public IUpdater { this->UpdateNode2WorkIndex(tree); } // return decoded position - inline int DecodePosition(bst_uint ridx) const{ + inline int DecodePosition(bst_uint ridx) const { const int pid = position[ridx]; return pid < 0 ? ~pid : pid; } @@ -182,23 +189,24 @@ class BaseMaker: public IUpdater { position[ridx] = nid; } } - /*! + /*! * \brief this is helper function uses column based data structure, * reset the positions to the lastest one * \param nodes the set of nodes that contains the split to be used * \param p_fmat feature matrix needed for tree construction * \param tree the regression tree structure */ - inline void ResetPositionCol(const std::vector &nodes, IFMatrix *p_fmat, const RegTree &tree) { + inline void ResetPositionCol(const std::vector &nodes, + IFMatrix *p_fmat, const RegTree &tree) { // set the positions in the nondefault this->SetNonDefaultPositionCol(nodes, p_fmat, tree); // set rest of instances to default position const std::vector &rowset = p_fmat->buffered_rowset(); // set default direct nodes to default - // for leaf nodes that are not fresh, mark then to ~nid, + // for leaf nodes that are not fresh, mark then to ~nid, // so that they are ignored in future statistics collection const bst_omp_uint ndata = static_cast(rowset.size()); - + #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < ndata; ++i) { const bst_uint ridx = rowset[i]; @@ -237,7 +245,7 @@ class BaseMaker: public IUpdater { } std::sort(fsplits.begin(), fsplits.end()); fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin()); - + utils::IIterator *iter = p_fmat->ColIterator(fsplits); while (iter->Next()) { const ColBatch &batch = iter->Value(); @@ -252,7 +260,7 @@ class BaseMaker: public IUpdater { const int nid = this->DecodePosition(ridx); // go back to parent, correct those who are not default if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) { - if(fvalue < tree[nid].split_cond()) { + if (fvalue < tree[nid].split_cond()) { this->SetEncodePosition(ridx, tree[nid].cleft()); } else { this->SetEncodePosition(ridx, tree[nid].cright()); @@ -324,7 +332,7 @@ class BaseMaker: public IUpdater { sketch->temp.size = 0; } /*! - * \brief push a new element to sketch + * \brief push a new element to sketch * \param fvalue feature value, comes in sorted ascending order * \param w weight * \param max_size @@ -337,31 +345,32 @@ class BaseMaker: public IUpdater { return; } if (last_fvalue != fvalue) { - double rmax = rmin + wmin; + double rmax = rmin + wmin; if (rmax >= next_goal && sketch->temp.size != max_size) { - if (sketch->temp.size == 0 || last_fvalue > sketch->temp.data[sketch->temp.size-1].value) { + if (sketch->temp.size == 0 || + last_fvalue > sketch->temp.data[sketch->temp.size-1].value) { // push to sketch sketch->temp.data[sketch->temp.size] = utils::WXQuantileSketch:: Entry(static_cast(rmin), - static_cast(rmax), - static_cast(wmin), last_fvalue); + static_cast(rmax), + static_cast(wmin), last_fvalue); utils::Assert(sketch->temp.size < max_size, "invalid maximum size max_size=%u, stemp.size=%lu\n", max_size, sketch->temp.size); ++sketch->temp.size; } if (sketch->temp.size == max_size) { - next_goal = sum_total * 2.0f + 1e-5f; - } else{ + next_goal = sum_total * 2.0f + 1e-5f; + } else { next_goal = static_cast(sketch->temp.size * sum_total / max_size); } } else { - if (rmax >= next_goal) { - rabit::TrackerPrintf("INFO: rmax=%g, sum_total=%g, next_goal=%g, size=%lu\n", - rmax, sum_total, next_goal, sketch->temp.size); - } - } + if (rmax >= next_goal) { + rabit::TrackerPrintf("INFO: rmax=%g, sum_total=%g, next_goal=%g, size=%lu\n", + rmax, sum_total, next_goal, sketch->temp.size); + } + } rmin = rmax; wmin = w; last_fvalue = fvalue; @@ -375,13 +384,13 @@ class BaseMaker: public IUpdater { if (sketch->temp.size == 0 || last_fvalue > sketch->temp.data[sketch->temp.size-1].value) { utils::Assert(sketch->temp.size <= max_size, "Finalize: invalid maximum size, max_size=%u, stemp.size=%lu", - sketch->temp.size, max_size ); + sketch->temp.size, max_size); // push to sketch sketch->temp.data[sketch->temp.size] = utils::WXQuantileSketch:: Entry(static_cast(rmin), - static_cast(rmax), - static_cast(wmin), last_fvalue); + static_cast(rmax), + static_cast(wmin), last_fvalue); ++sketch->temp.size; } sketch->PushTemp(); @@ -415,4 +424,4 @@ class BaseMaker: public IUpdater { }; } // namespace tree } // namespace xgboost -#endif // XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_ +#endif // XGBOOST_TREE_UPDATER_BASEMAKER_INL_HPP_ diff --git a/src/tree/updater_colmaker-inl.hpp b/src/tree/updater_colmaker-inl.hpp index db3581aac..e3070d495 100644 --- a/src/tree/updater_colmaker-inl.hpp +++ b/src/tree/updater_colmaker-inl.hpp @@ -1,10 +1,12 @@ -#ifndef XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_ -#define XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_ /*! + * Copyright 2014 by Contributors * \file updater_colmaker-inl.hpp * \brief use columnwise update to construct a tree * \author Tianqi Chen */ +#ifndef XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_ +#define XGBOOST_TREE_UPDATER_COLMAKER_INL_HPP_ + #include #include #include @@ -114,10 +116,13 @@ class ColMaker: public IUpdater { // initialize temp data structure inline void InitData(const std::vector &gpair, const IFMatrix &fmat, - const std::vector &root_index, const RegTree &tree) { - utils::Assert(tree.param.num_nodes == tree.param.num_roots, "ColMaker: can only grow new tree"); + const std::vector &root_index, + const RegTree &tree) { + utils::Assert(tree.param.num_nodes == tree.param.num_roots, + "ColMaker: can only grow new tree"); const std::vector &rowset = fmat.buffered_rowset(); - {// setup position + { + // setup position position.resize(gpair.size()); if (root_index.size() == 0) { for (size_t i = 0; i < rowset.size(); ++i) { @@ -127,7 +132,8 @@ class ColMaker: public IUpdater { for (size_t i = 0; i < rowset.size(); ++i) { const bst_uint ridx = rowset[i]; position[ridx] = root_index[ridx]; - utils::Assert(root_index[ridx] < (unsigned)tree.param.num_roots, "root index exceed setting"); + utils::Assert(root_index[ridx] < (unsigned)tree.param.num_roots, + "root index exceed setting"); } } // mark delete for the deleted datas @@ -154,11 +160,12 @@ class ColMaker: public IUpdater { } unsigned n = static_cast(param.colsample_bytree * feat_index.size()); random::Shuffle(feat_index); - //utils::Check(n > 0, "colsample_bytree is too small that no feature can be included"); - utils::Check(n > 0, "colsample_bytree=%g is too small that no feature can be included", param.colsample_bytree); + utils::Check(n > 0, "colsample_bytree=%g is too small that no feature can be included", + param.colsample_bytree); feat_index.resize(n); } - {// setup temp space for each thread + { + // setup temp space for each thread #pragma omp parallel { this->nthread = omp_get_num_threads(); @@ -171,20 +178,25 @@ class ColMaker: public IUpdater { } snode.reserve(256); } - {// expand query + { + // expand query qexpand_.reserve(256); qexpand_.clear(); for (int i = 0; i < tree.param.num_roots; ++i) { qexpand_.push_back(i); } } } - /*! \brief initialize the base_weight, root_gain, and NodeEntry for all the new nodes in qexpand */ + /*! + * \brief initialize the base_weight, root_gain, + * and NodeEntry for all the new nodes in qexpand + */ inline void InitNewNode(const std::vector &qexpand, const std::vector &gpair, const IFMatrix &fmat, const BoosterInfo &info, const RegTree &tree) { - {// setup statistics space for each tree node + { + // setup statistics space for each tree node for (size_t i = 0; i < stemp.size(); ++i) { stemp[i].resize(tree.param.num_nodes, ThreadEntry(param)); } @@ -226,7 +238,7 @@ class ColMaker: public IUpdater { } // use new nodes for qexpand qexpand = newnodes; - } + } // parallel find the best split of current fid // this function does not support nested functions inline void ParallelFindSplit(const ColBatch::Inst &col, @@ -280,26 +292,30 @@ class ColMaker: public IUpdater { ThreadEntry &e = stemp[tid][nid]; float fsplit; if (tid != 0) { - if(std::abs(stemp[tid - 1][nid].last_fvalue - e.first_fvalue) > rt_2eps) { + if (std::abs(stemp[tid - 1][nid].last_fvalue - e.first_fvalue) > rt_2eps) { fsplit = (stemp[tid - 1][nid].last_fvalue - e.first_fvalue) * 0.5f; } else { continue; } } else { fsplit = e.first_fvalue - rt_eps; - } + } if (need_forward && tid != 0) { c.SetSubstract(snode[nid].stats, e.stats); - if (c.sum_hess >= param.min_child_weight && e.stats.sum_hess >= param.min_child_weight) { - bst_float loss_chg = static_cast(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); + if (c.sum_hess >= param.min_child_weight && + e.stats.sum_hess >= param.min_child_weight) { + bst_float loss_chg = static_cast(e.stats.CalcGain(param) + + c.CalcGain(param) - snode[nid].root_gain); e.best.Update(loss_chg, fid, fsplit, false); } } if (need_backward) { tmp.SetSubstract(sum, e.stats); c.SetSubstract(snode[nid].stats, tmp); - if (c.sum_hess >= param.min_child_weight && tmp.sum_hess >= param.min_child_weight) { - bst_float loss_chg = static_cast(tmp.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); + if (c.sum_hess >= param.min_child_weight && + tmp.sum_hess >= param.min_child_weight) { + bst_float loss_chg = static_cast(tmp.CalcGain(param) + + c.CalcGain(param) - snode[nid].root_gain); e.best.Update(loss_chg, fid, fsplit, true); } } @@ -308,8 +324,10 @@ class ColMaker: public IUpdater { tmp = sum; ThreadEntry &e = stemp[nthread-1][nid]; c.SetSubstract(snode[nid].stats, tmp); - if (c.sum_hess >= param.min_child_weight && tmp.sum_hess >= param.min_child_weight) { - bst_float loss_chg = static_cast(tmp.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); + if (c.sum_hess >= param.min_child_weight && + tmp.sum_hess >= param.min_child_weight) { + bst_float loss_chg = static_cast(tmp.CalcGain(param) + + c.CalcGain(param) - snode[nid].root_gain); e.best.Update(loss_chg, fid, e.last_fvalue + rt_eps, true); } } @@ -335,25 +353,31 @@ class ColMaker: public IUpdater { e.first_fvalue = fvalue; } else { // forward default right - if (std::abs(fvalue - e.first_fvalue) > rt_2eps){ - if (need_forward) { + if (std::abs(fvalue - e.first_fvalue) > rt_2eps) { + if (need_forward) { c.SetSubstract(snode[nid].stats, e.stats); - if (c.sum_hess >= param.min_child_weight && e.stats.sum_hess >= param.min_child_weight) { - bst_float loss_chg = static_cast(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); + if (c.sum_hess >= param.min_child_weight && + e.stats.sum_hess >= param.min_child_weight) { + bst_float loss_chg = static_cast(e.stats.CalcGain(param) + + c.CalcGain(param) - + snode[nid].root_gain); e.best.Update(loss_chg, fid, (fvalue + e.first_fvalue) * 0.5f, false); } } if (need_backward) { cright.SetSubstract(e.stats_extra, e.stats); c.SetSubstract(snode[nid].stats, cright); - if (c.sum_hess >= param.min_child_weight && cright.sum_hess >= param.min_child_weight) { - bst_float loss_chg = static_cast(cright.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); + if (c.sum_hess >= param.min_child_weight && + cright.sum_hess >= param.min_child_weight) { + bst_float loss_chg = static_cast(cright.CalcGain(param) + + c.CalcGain(param) - + snode[nid].root_gain); e.best.Update(loss_chg, fid, (fvalue + e.first_fvalue) * 0.5f, true); } } - } + } e.stats.Add(gpair, info, ridx); - e.first_fvalue = fvalue; + e.first_fvalue = fvalue; } } } @@ -361,7 +385,7 @@ class ColMaker: public IUpdater { // update enumeration solution inline void UpdateEnumeration(int nid, bst_gpair gstats, float fvalue, int d_step, bst_uint fid, - TStats &c, std::vector &temp) { + TStats &c, std::vector &temp) { // NOLINT(*) // get the statistics of nid ThreadEntry &e = temp[nid]; // test if first hit, this is fine, because we set 0 during init @@ -370,10 +394,12 @@ class ColMaker: public IUpdater { e.last_fvalue = fvalue; } else { // try to find a split - if (std::abs(fvalue - e.last_fvalue) > rt_2eps && e.stats.sum_hess >= param.min_child_weight) { + if (std::abs(fvalue - e.last_fvalue) > rt_2eps && + e.stats.sum_hess >= param.min_child_weight) { c.SetSubstract(snode[nid].stats, e.stats); if (c.sum_hess >= param.min_child_weight) { - bst_float loss_chg = static_cast(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); + bst_float loss_chg = static_cast(e.stats.CalcGain(param) + + c.CalcGain(param) - snode[nid].root_gain); e.best.Update(loss_chg, fid, (fvalue + e.last_fvalue) * 0.5f, d_step == -1); } } @@ -388,7 +414,7 @@ class ColMaker: public IUpdater { int d_step, bst_uint fid, const std::vector &gpair, - std::vector &temp) { + std::vector &temp) { // NOLINT(*) const std::vector &qexpand = qexpand_; // clear all the temp statistics for (size_t j = 0; j < qexpand.size(); ++j) { @@ -423,7 +449,7 @@ class ColMaker: public IUpdater { this->UpdateEnumeration(nid, buf_gpair[i], p->fvalue, d_step, fid, c, temp); - } + } } // finish up the ending piece for (it = align_end, i = 0; it != end; ++i, it += d_step) { @@ -436,14 +462,15 @@ class ColMaker: public IUpdater { this->UpdateEnumeration(nid, buf_gpair[i], it->fvalue, d_step, fid, c, temp); - } + } // finish updating all statistics, check if it is possible to include all sum statistics for (size_t i = 0; i < qexpand.size(); ++i) { const int nid = qexpand[i]; ThreadEntry &e = temp[nid]; c.SetSubstract(snode[nid].stats, e.stats); if (e.stats.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) { - bst_float loss_chg = static_cast(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); + bst_float loss_chg = static_cast(e.stats.CalcGain(param) + + c.CalcGain(param) - snode[nid].root_gain); const float gap = std::abs(e.last_fvalue) + rt_eps; const float delta = d_step == +1 ? gap: -gap; e.best.Update(loss_chg, fid, e.last_fvalue + delta, d_step == -1); @@ -458,7 +485,7 @@ class ColMaker: public IUpdater { bst_uint fid, const std::vector &gpair, const BoosterInfo &info, - std::vector &temp) { + std::vector &temp) { // NOLINT(*) // use cacheline aware optimization if (TStats::kSimpleStats != 0 && param.cache_opt != 0) { EnumerateSplitCacheOpt(begin, end, d_step, fid, gpair, temp); @@ -471,7 +498,7 @@ class ColMaker: public IUpdater { } // left statistics TStats c(param); - for(const ColBatch::Entry *it = begin; it != end; it += d_step) { + for (const ColBatch::Entry *it = begin; it != end; it += d_step) { const bst_uint ridx = it->index; const int nid = position[ridx]; if (nid < 0) continue; @@ -485,10 +512,12 @@ class ColMaker: public IUpdater { e.last_fvalue = fvalue; } else { // try to find a split - if (std::abs(fvalue - e.last_fvalue) > rt_2eps && e.stats.sum_hess >= param.min_child_weight) { + if (std::abs(fvalue - e.last_fvalue) > rt_2eps && + e.stats.sum_hess >= param.min_child_weight) { c.SetSubstract(snode[nid].stats, e.stats); if (c.sum_hess >= param.min_child_weight) { - bst_float loss_chg = static_cast(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); + bst_float loss_chg = static_cast(e.stats.CalcGain(param) + + c.CalcGain(param) - snode[nid].root_gain); e.best.Update(loss_chg, fid, (fvalue + e.last_fvalue) * 0.5f, d_step == -1); } } @@ -503,7 +532,8 @@ class ColMaker: public IUpdater { ThreadEntry &e = temp[nid]; c.SetSubstract(snode[nid].stats, e.stats); if (e.stats.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) { - bst_float loss_chg = static_cast(e.stats.CalcGain(param) + c.CalcGain(param) - snode[nid].root_gain); + bst_float loss_chg = static_cast(e.stats.CalcGain(param) + + c.CalcGain(param) - snode[nid].root_gain); const float gap = std::abs(e.last_fvalue) + rt_eps; const float delta = d_step == +1 ? gap: -gap; e.best.Update(loss_chg, fid, e.last_fvalue + delta, d_step == -1); @@ -511,14 +541,14 @@ class ColMaker: public IUpdater { } } - // update the solution candidate + // update the solution candidate virtual void UpdateSolution(const ColBatch &batch, const std::vector &gpair, const IFMatrix &fmat, const BoosterInfo &info) { // start enumeration const bst_omp_uint nsize = static_cast(batch.size); - #if defined(_OPENMP) + #if defined(_OPENMP) const int batch_size = std::max(static_cast(nsize / this->nthread / 32), 1); #endif int poption = param.parallel_option; @@ -533,11 +563,11 @@ class ColMaker: public IUpdater { const ColBatch::Inst c = batch[i]; const bool ind = c.length != 0 && c.data[0].fvalue == c.data[c.length - 1].fvalue; if (param.need_forward_search(fmat.GetColDensity(fid), ind)) { - this->EnumerateSplit(c.data, c.data + c.length, +1, + this->EnumerateSplit(c.data, c.data + c.length, +1, fid, gpair, info, stemp[tid]); } if (param.need_backward_search(fmat.GetColDensity(fid), ind)) { - this->EnumerateSplit(c.data + c.length - 1, c.data - 1, -1, + this->EnumerateSplit(c.data + c.length - 1, c.data - 1, -1, fid, gpair, info, stemp[tid]); } } @@ -546,7 +576,7 @@ class ColMaker: public IUpdater { this->ParallelFindSplit(batch[i], batch.col_index[i], fmat, gpair, info); } - } + } } // find splits at current level, do split per level inline void FindSplit(int depth, @@ -571,7 +601,7 @@ class ColMaker: public IUpdater { // get the best result, we can synchronize the solution for (size_t i = 0; i < qexpand.size(); ++i) { const int nid = qexpand[i]; - NodeEntry &e = snode[nid]; + NodeEntry &e = snode[nid]; // now we know the solution in snode[nid], set split if (e.best.loss_chg > rt_eps) { p_tree->AddChilds(nid); @@ -582,19 +612,20 @@ class ColMaker: public IUpdater { } else { (*p_tree)[nid].set_leaf(e.weight * param.learning_rate); } - } + } } // reset position of each data points after split is created in the tree - inline void ResetPosition(const std::vector &qexpand, IFMatrix *p_fmat, const RegTree &tree) { + inline void ResetPosition(const std::vector &qexpand, + IFMatrix *p_fmat, const RegTree &tree) { // set the positions in the nondefault - this->SetNonDefaultPosition(qexpand, p_fmat, tree); + this->SetNonDefaultPosition(qexpand, p_fmat, tree); // set rest of instances to default position const std::vector &rowset = p_fmat->buffered_rowset(); // set default direct nodes to default - // for leaf nodes that are not fresh, mark then to ~nid, + // for leaf nodes that are not fresh, mark then to ~nid, // so that they are ignored in future statistics collection const bst_omp_uint ndata = static_cast(rowset.size()); - + #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < ndata; ++i) { const bst_uint ridx = rowset[i]; @@ -655,7 +686,7 @@ class ColMaker: public IUpdater { const float fvalue = col[j].fvalue; // go back to parent, correct those who are not default if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) { - if(fvalue < tree[nid].split_cond()) { + if (fvalue < tree[nid].split_cond()) { this->SetEncodePosition(ridx, tree[nid].cleft()); } else { this->SetEncodePosition(ridx, tree[nid].cright()); @@ -667,7 +698,7 @@ class ColMaker: public IUpdater { } // utils to get/set position, with encoded format // return decoded position - inline int DecodePosition(bst_uint ridx) const{ + inline int DecodePosition(bst_uint ridx) const { const int pid = position[ridx]; return pid < 0 ? ~pid : pid; } @@ -679,7 +710,7 @@ class ColMaker: public IUpdater { position[ridx] = nid; } } - //--data fields-- + // --data fields-- const TrainParam ¶m; // number of omp thread used during training int nthread; diff --git a/src/tree/updater_distcol-inl.hpp b/src/tree/updater_distcol-inl.hpp index c989f4e47..e3d3f8b59 100644 --- a/src/tree/updater_distcol-inl.hpp +++ b/src/tree/updater_distcol-inl.hpp @@ -1,11 +1,15 @@ -#ifndef XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_ -#define XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_ /*! + * Copyright 2014 by Contributors * \file updater_distcol-inl.hpp - * \brief beta distributed version that takes a sub-column + * \brief beta distributed version that takes a sub-column * and construct a tree * \author Tianqi Chen */ +#ifndef XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_ +#define XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_ + +#include +#include #include "../sync/sync.h" #include "../utils/bitmap.h" #include "../utils/io.h" @@ -27,7 +31,7 @@ class DistColMaker : public ColMaker { virtual void Update(const std::vector &gpair, IFMatrix *p_fmat, const BoosterInfo &info, - const std::vector &trees) { + const std::vector &trees) { TStats::CheckInfo(info); utils::Check(trees.size() == 1, "DistColMaker: only support one tree at a time"); // build the tree @@ -39,11 +43,12 @@ class DistColMaker : public ColMaker { } virtual const int* GetLeafPosition(void) const { return builder.GetLeafPosition(); - } + } + private: struct Builder : public ColMaker::Builder { public: - Builder(const TrainParam ¶m) + explicit Builder(const TrainParam ¶m) : ColMaker::Builder(param) { } inline void UpdatePosition(IFMatrix *p_fmat, const RegTree &tree) { @@ -63,7 +68,8 @@ class DistColMaker : public ColMaker { virtual const int* GetLeafPosition(void) const { return BeginPtr(this->position); } - protected: + + protected: virtual void SetNonDefaultPosition(const std::vector &qexpand, IFMatrix *p_fmat, const RegTree &tree) { // step 2, classify the non-default data into right places @@ -87,7 +93,7 @@ class DistColMaker : public ColMaker { #pragma omp parallel for schedule(static) for (bst_omp_uint j = 0; j < ndata; ++j) { boolmap[j] = 0; - } + } } utils::IIterator *iter = p_fmat->ColIterator(fsplits); while (iter->Next()) { @@ -111,7 +117,7 @@ class DistColMaker : public ColMaker { } } } - + bitmap.InitFromBool(boolmap); // communicate bitmap rabit::Allreduce(BeginPtr(bitmap.data), bitmap.data.size()); @@ -142,7 +148,7 @@ class DistColMaker : public ColMaker { } vec.push_back(this->snode[nid].best); } - // TODO, lazy version + // TODO(tqchen) lazy version // communicate best solution reducer.Allreduce(BeginPtr(vec), vec.size()); // assign solution back @@ -151,7 +157,7 @@ class DistColMaker : public ColMaker { this->snode[nid].best = vec[i]; } } - + private: utils::BitMap bitmap; std::vector boolmap; @@ -162,8 +168,8 @@ class DistColMaker : public ColMaker { // training parameter TrainParam param; // pointer to the builder - Builder builder; + Builder builder; }; } // namespace tree } // namespace xgboost -#endif +#endif // XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_ diff --git a/src/tree/updater_histmaker-inl.hpp b/src/tree/updater_histmaker-inl.hpp index f739f23f3..d86204e4b 100644 --- a/src/tree/updater_histmaker-inl.hpp +++ b/src/tree/updater_histmaker-inl.hpp @@ -1,10 +1,12 @@ -#ifndef XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_ -#define XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_ /*! + * Copyright 2014 by Contributors * \file updater_histmaker-inl.hpp * \brief use histogram counting to construct a tree * \author Tianqi Chen */ +#ifndef XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_ +#define XGBOOST_TREE_UPDATER_HISTMAKER_INL_HPP_ + #include #include #include "../sync/sync.h" @@ -38,7 +40,7 @@ class HistMaker: public BaseMaker { struct HistUnit { /*! \brief cutting point of histogram, contains maximum point */ const bst_float *cut; - /*! \brief content of statistics data */ + /*! \brief content of statistics data */ TStats *data; /*! \brief size of histogram */ unsigned size; @@ -48,13 +50,13 @@ class HistMaker: public BaseMaker { HistUnit(const bst_float *cut, TStats *data, unsigned size) : cut(cut), data(data), size(size) {} /*! \brief add a histogram to data */ - inline void Add(bst_float fv, + inline void Add(bst_float fv, const std::vector &gpair, const BoosterInfo &info, const bst_uint ridx) { unsigned i = std::upper_bound(cut, cut + size, fv) - cut; utils::Assert(size != 0, "try insert into size=0"); - utils::Assert(i < size, + utils::Assert(i < size, "maximum value must be in cut, fv = %g, cutmax=%g", fv, cut[size-1]); data[i].Add(gpair, info, ridx); } @@ -74,7 +76,7 @@ class HistMaker: public BaseMaker { rptr[fid+1] - rptr[fid]); } }; - // thread workspace + // thread workspace struct ThreadWSpace { /*! \brief actual unit pointer */ std::vector rptr; @@ -92,7 +94,7 @@ class HistMaker: public BaseMaker { } hset[tid].rptr = BeginPtr(rptr); hset[tid].cut = BeginPtr(cut); - hset[tid].data.resize(cut.size(), TStats(param)); + hset[tid].data.resize(cut.size(), TStats(param)); } } // aggregate all statistics to hset[0] @@ -147,7 +149,7 @@ class HistMaker: public BaseMaker { } // this function does two jobs // (1) reset the position in array position, to be the latest leaf id - // (2) propose a set of candidate cuts and set wspace.rptr wspace.cut correctly + // (2) propose a set of candidate cuts and set wspace.rptr wspace.cut correctly virtual void ResetPosAndPropose(const std::vector &gpair, IFMatrix *p_fmat, const BoosterInfo &info, @@ -171,8 +173,9 @@ class HistMaker: public BaseMaker { const BoosterInfo &info, const std::vector &fset, const RegTree &tree) = 0; + private: - inline void EnumerateSplit(const HistUnit &hist, + inline void EnumerateSplit(const HistUnit &hist, const TStats &node_sum, bst_uint fid, SplitEntry *best, @@ -187,7 +190,7 @@ class HistMaker: public BaseMaker { c.SetSubstract(node_sum, s); if (c.sum_hess >= param.min_child_weight) { double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; - if (best->Update((float)loss_chg, fid, hist.cut[i], false)) { + if (best->Update(static_cast(loss_chg), fid, hist.cut[i], false)) { *left_sum = s; } } @@ -200,7 +203,7 @@ class HistMaker: public BaseMaker { c.SetSubstract(node_sum, s); if (c.sum_hess >= param.min_child_weight) { double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; - if (best->Update((float)loss_chg, fid, hist.cut[i-1], true)) { + if (best->Update(static_cast(loss_chg), fid, hist.cut[i-1], true)) { *left_sum = c; } } @@ -216,22 +219,22 @@ class HistMaker: public BaseMaker { const size_t num_feature = fset.size(); // get the best split condition for each node std::vector sol(qexpand.size()); - std::vector left_sum(qexpand.size()); + std::vector left_sum(qexpand.size()); bst_omp_uint nexpand = static_cast(qexpand.size()); #pragma omp parallel for schedule(dynamic, 1) - for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) { + for (bst_omp_uint wid = 0; wid < nexpand; ++wid) { const int nid = qexpand[wid]; utils::Assert(node2workindex[nid] == static_cast(wid), "node2workindex inconsistent"); SplitEntry &best = sol[wid]; TStats &node_sum = wspace.hset[0][num_feature + wid * (num_feature + 1)].data[0]; - for (size_t i = 0; i < fset.size(); ++ i) { + for (size_t i = 0; i < fset.size(); ++i) { EnumerateSplit(this->wspace.hset[0][i + wid * (num_feature+1)], node_sum, fset[i], &best, &left_sum[wid]); } } // get the best result, we can synchronize the solution - for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) { + for (bst_omp_uint wid = 0; wid < nexpand; ++wid) { const int nid = qexpand[wid]; const SplitEntry &best = sol[wid]; const TStats &node_sum = wspace.hset[0][num_feature + wid * (num_feature + 1)].data[0]; @@ -244,7 +247,7 @@ class HistMaker: public BaseMaker { (*p_tree)[nid].set_split(best.split_index(), best.split_value, best.default_left()); // mark right child as 0, to indicate fresh leaf - (*p_tree)[(*p_tree)[nid].cleft()].set_leaf(0.0f, 0); + (*p_tree)[(*p_tree)[nid].cleft()].set_leaf(0.0f, 0); (*p_tree)[(*p_tree)[nid].cright()].set_leaf(0.0f, 0); // right side sum TStats right_sum; @@ -256,11 +259,11 @@ class HistMaker: public BaseMaker { } } } - + inline void SetStats(RegTree *p_tree, int nid, const TStats &node_sum) { p_tree->stat(nid).base_weight = static_cast(node_sum.CalcWeight(param)); p_tree->stat(nid).sum_hess = static_cast(node_sum.sum_hess); - node_sum.SetLeafVec(param, p_tree->leafvec(nid)); + node_sum.SetLeafVec(param, p_tree->leafvec(nid)); } }; @@ -270,7 +273,7 @@ class CQHistMaker: public HistMaker { struct HistEntry { typename HistMaker::HistUnit hist; unsigned istart; - /*! + /*! * \brief add a histogram to data, * do linear scan, start from istart */ @@ -282,7 +285,7 @@ class CQHistMaker: public HistMaker { utils::Assert(istart != hist.size, "the bound variable must be max"); hist.data[istart].Add(gpair, info, ridx); } - /*! + /*! * \brief add a histogram to data, * do linear scan, start from istart */ @@ -302,7 +305,7 @@ class CQHistMaker: public HistMaker { feat_helper.InitByCol(p_fmat, tree); feat_helper.SampleCol(this->param.colsample_bytree, p_fset); } - // code to create histogram + // code to create histogram virtual void CreateHist(const std::vector &gpair, IFMatrix *p_fmat, const BoosterInfo &info, @@ -313,7 +316,7 @@ class CQHistMaker: public HistMaker { std::fill(feat2workindex.begin(), feat2workindex.end(), -1); for (size_t i = 0; i < fset.size(); ++i) { feat2workindex[fset[i]] = static_cast(i); - } + } // start to work this->wspace.Init(this->param, 1); // if it is C++11, use lazy evaluation for Allreduce, @@ -350,11 +353,11 @@ class CQHistMaker: public HistMaker { // sync the histogram // if it is C++11, use lazy evaluation for Allreduce #if __cplusplus >= 201103L - this->histred.Allreduce(BeginPtr(this->wspace.hset[0].data), + this->histred.Allreduce(BeginPtr(this->wspace.hset[0].data), this->wspace.hset[0].data.size(), lazy_get_hist); #else - this->histred.Allreduce(BeginPtr(this->wspace.hset[0].data), this->wspace.hset[0].data.size()); -#endif + this->histred.Allreduce(BeginPtr(this->wspace.hset[0].data), this->wspace.hset[0].data.size()); +#endif } virtual void ResetPositionAfterSplit(IFMatrix *p_fmat, const RegTree &tree) { @@ -374,11 +377,11 @@ class CQHistMaker: public HistMaker { feat2workindex[fset[i]] = static_cast(freal_set.size()); freal_set.push_back(fset[i]); } else { - feat2workindex[fset[i]] = -2; + feat2workindex[fset[i]] = -2; } } this->GetNodeStats(gpair, *p_fmat, tree, info, - &thread_stats, &node_stats); + &thread_stats, &node_stats); sketchs.resize(this->qexpand.size() * freal_set.size()); for (size_t i = 0; i < sketchs.size(); ++i) { sketchs[i].Init(info.num_row, this->param.sketch_eps); @@ -394,7 +397,8 @@ class CQHistMaker: public HistMaker { #if __cplusplus >= 201103L auto lazy_get_summary = [&]() #endif - {// get smmary + { + // get smmary thread_sketch.resize(this->get_nthread()); // number of rows in const size_t nrows = p_fmat->buffered_rowset().size(); @@ -457,9 +461,9 @@ class CQHistMaker: public HistMaker { this->wspace.rptr.push_back(static_cast(this->wspace.cut.size())); } else { utils::Assert(offset == -2, "BUG in mark"); - bst_float cpt = feat_helper.MaxValue(fset[i]); + bst_float cpt = feat_helper.MaxValue(fset[i]); this->wspace.cut.push_back(cpt + fabs(cpt) + rt_eps); - this->wspace.rptr.push_back(static_cast(this->wspace.cut.size())); + this->wspace.rptr.push_back(static_cast(this->wspace.cut.size())); } } // reserve last value for global statistics @@ -470,7 +474,7 @@ class CQHistMaker: public HistMaker { (fset.size() + 1) * this->qexpand.size() + 1, "cut space inconsistent"); } - + private: inline void UpdateHistCol(const std::vector &gpair, const ColBatch::Inst &c, @@ -554,9 +558,9 @@ class CQHistMaker: public HistMaker { } } else { for (size_t i = 0; i < this->qexpand.size(); ++i) { - const unsigned nid = this->qexpand[i]; + const unsigned nid = this->qexpand[i]; sbuilder[nid].sum_total = static_cast(nstats[nid].sum_hess); - } + } } // if only one value, no need to do second pass if (c[0].fvalue == c[c.length-1].fvalue) { @@ -589,7 +593,7 @@ class CQHistMaker: public HistMaker { if (nid >= 0) { sbuilder[nid].Push(c[j + i].fvalue, buf_hess[i], max_size); } - } + } } for (bst_uint j = align_length; j < c.length; ++j) { const bst_uint ridx = c[j].index; @@ -617,7 +621,7 @@ class CQHistMaker: public HistMaker { // temp space to map feature id to working index std::vector feat2workindex; // set of index from fset that are real - std::vector freal_set; + std::vector freal_set; // thread temp data std::vector< std::vector > thread_sketch; // used to hold statistics @@ -631,18 +635,18 @@ class CQHistMaker: public HistMaker { // reducer for summary rabit::SerializeReducer sreducer; // per node, per feature sketch - std::vector< utils::WXQuantileSketch > sketchs; + std::vector< utils::WXQuantileSketch > sketchs; }; template -class QuantileHistMaker: public HistMaker { +class QuantileHistMaker: public HistMaker { protected: typedef utils::WXQuantileSketch WXQSketch; virtual void ResetPosAndPropose(const std::vector &gpair, IFMatrix *p_fmat, const BoosterInfo &info, const std::vector &fset, - const RegTree &tree) { + const RegTree &tree) { // initialize the data structure int nthread = BaseMaker::get_nthread(); sketchs.resize(this->qexpand.size() * tree.param.num_feature); @@ -658,7 +662,7 @@ class QuantileHistMaker: public HistMaker { utils::ParallelGroupBuilder builder(&col_ptr, &col_data, &thread_col_ptr); builder.InitBudget(tree.param.num_feature, nthread); - const bst_omp_uint nbatch = static_cast(batch.size); + const bst_omp_uint nbatch = static_cast(batch.size); #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < nbatch; ++i) { RowBatch::Inst inst = batch[i]; @@ -667,11 +671,11 @@ class QuantileHistMaker: public HistMaker { if (nid >= 0) { if (!tree[nid].is_leaf()) { this->position[ridx] = nid = HistMaker::NextLevel(inst, tree, nid); - } + } if (this->node2workindex[nid] < 0) { this->position[ridx] = ~nid; - } else{ - for (bst_uint j = 0; j < inst.length; ++j) { + } else { + for (bst_uint j = 0; j < inst.length; ++j) { builder.AddBudget(inst[j].index, omp_get_thread_num()); } } @@ -712,8 +716,8 @@ class QuantileHistMaker: public HistMaker { summary_array[i].Reserve(max_size); summary_array[i].SetPrune(out, max_size); } - - size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size); + + size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size); sreducer.Allreduce(BeginPtr(summary_array), nbytes, summary_array.size()); // now we get the final result of sketch, setup the cut this->wspace.cut.clear(); diff --git a/src/tree/updater_prune-inl.hpp b/src/tree/updater_prune-inl.hpp index e7e5f9f0b..dc99e94e4 100644 --- a/src/tree/updater_prune-inl.hpp +++ b/src/tree/updater_prune-inl.hpp @@ -1,10 +1,12 @@ -#ifndef XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_ -#define XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_ /*! + * Copyright 2014 by Contributors * \file updater_prune-inl.hpp - * \brief prune a tree given the statistics + * \brief prune a tree given the statistics * \author Tianqi Chen */ +#ifndef XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_ +#define XGBOOST_TREE_UPDATER_PRUNE_INL_HPP_ + #include #include "./param.h" #include "./updater.h" @@ -37,9 +39,10 @@ class TreePruner: public IUpdater { param.learning_rate = lr; syncher.Update(gpair, p_fmat, info, trees); } + private: // try to prune off current leaf - inline int TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) { + inline int TryPruneLeaf(RegTree &tree, int nid, int depth, int npruned) { // NOLINT(*) if (tree[nid].is_root()) return npruned; int pid = tree[nid].parent(); RegTree::NodeStat &s = tree.stat(pid); @@ -51,10 +54,10 @@ class TreePruner: public IUpdater { return this->TryPruneLeaf(tree, pid, depth - 1, npruned+2); } else { return npruned; - } + } } /*! \brief do prunning of a tree */ - inline void DoPrune(RegTree &tree) { + inline void DoPrune(RegTree &tree) { // NOLINT(*) int npruned = 0; // initialize auxiliary statistics for (int nid = 0; nid < tree.param.num_nodes; ++nid) { diff --git a/src/tree/updater_refresh-inl.hpp b/src/tree/updater_refresh-inl.hpp index 8613c8ea6..b6c5ee89e 100644 --- a/src/tree/updater_refresh-inl.hpp +++ b/src/tree/updater_refresh-inl.hpp @@ -1,10 +1,12 @@ -#ifndef XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_ -#define XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_ /*! + * Copyright 2014 by Contributors * \file updater_refresh-inl.hpp * \brief refresh the statistics and leaf value on the tree on the dataset * \author Tianqi Chen */ +#ifndef XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_ +#define XGBOOST_TREE_UPDATER_REFRESH_INL_HPP_ + #include #include #include "../sync/sync.h" @@ -27,7 +29,7 @@ class TreeRefresher: public IUpdater { virtual void Update(const std::vector &gpair, IFMatrix *p_fmat, const BoosterInfo &info, - const std::vector &trees) { + const std::vector &trees) { if (trees.size() == 0) return; // number of threads // thread temporal space @@ -100,7 +102,7 @@ class TreeRefresher: public IUpdater { float lr = param.learning_rate; param.learning_rate = lr / trees.size(); int offset = 0; - for (size_t i = 0; i < trees.size(); ++i) { + for (size_t i = 0; i < trees.size(); ++i) { for (int rid = 0; rid < trees[i]->param.num_roots; ++rid) { this->Refresh(BeginPtr(stemp[0]) + offset, rid, trees[i]); } @@ -147,7 +149,7 @@ class TreeRefresher: public IUpdater { // training parameter TrainParam param; // reducer - rabit::Reducer reducer; + rabit::Reducer reducer; }; } // namespace tree diff --git a/src/tree/updater_skmaker-inl.hpp b/src/tree/updater_skmaker-inl.hpp index 6bc2fc39a..ade22011b 100644 --- a/src/tree/updater_skmaker-inl.hpp +++ b/src/tree/updater_skmaker-inl.hpp @@ -1,11 +1,13 @@ -#ifndef XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_ -#define XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_ /*! + * Copyright 2014 by Contributors * \file updater_skmaker-inl.hpp * \brief use approximation sketch to construct a tree, a refresh is needed to make the statistics exactly correct * \author Tianqi Chen */ +#ifndef XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_ +#define XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_ + #include #include #include "../sync/sync.h" @@ -30,7 +32,7 @@ class SketchMaker: public BaseMaker { } param.learning_rate = lr; } - + protected: inline void Update(const std::vector &gpair, IFMatrix *p_fmat, @@ -79,9 +81,9 @@ class SketchMaker: public BaseMaker { double pos_grad; /*! \brief sum of all negative gradient */ double neg_grad; - /*! \brief sum of hessian statistics */ + /*! \brief sum of hessian statistics */ double sum_hess; - explicit SKStats(void) {} + SKStats(void) {} // constructor explicit SKStats(const TrainParam ¶m) { this->Clear(); @@ -123,7 +125,7 @@ class SketchMaker: public BaseMaker { sum_hess += b.sum_hess; } /*! \brief same as add, reduce is used in All Reduce */ - inline static void Reduce(SKStats &a, const SKStats &b) { + inline static void Reduce(SKStats &a, const SKStats &b) { // NOLINT(*) a.Add(b); } /*! \brief set leaf vector value based on statistics */ @@ -139,7 +141,7 @@ class SketchMaker: public BaseMaker { sketchs[i].Init(info.num_row, this->param.sketch_eps); } thread_sketch.resize(this->get_nthread()); - // number of rows in + // number of rows in const size_t nrows = p_fmat->buffered_rowset().size(); // start accumulating statistics utils::IIterator *iter = p_fmat->ColIterator(); @@ -156,7 +158,7 @@ class SketchMaker: public BaseMaker { batch[i].length == nrows, &thread_sketch[omp_get_thread_num()]); } - } + } // setup maximum size unsigned max_size = param.max_sketch_size(); // synchronize sketch @@ -167,8 +169,8 @@ class SketchMaker: public BaseMaker { summary_array[i].Reserve(max_size); summary_array[i].SetPrune(out, max_size); } - size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size); - sketch_reducer.Allreduce(BeginPtr(summary_array), nbytes, summary_array.size()); + size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_size); + sketch_reducer.Allreduce(BeginPtr(summary_array), nbytes, summary_array.size()); } // update sketch information in column fid inline void UpdateSketchCol(const std::vector &gpair, @@ -209,7 +211,7 @@ class SketchMaker: public BaseMaker { const unsigned nid = this->qexpand[i]; sbuilder[3 * nid + 0].sum_total = static_cast(nstats[nid].pos_grad); sbuilder[3 * nid + 1].sum_total = static_cast(nstats[nid].neg_grad); - sbuilder[3 * nid + 2].sum_total = static_cast(nstats[nid].sum_hess); + sbuilder[3 * nid + 2].sum_total = static_cast(nstats[nid].sum_hess); } } // if only one value, no need to do second pass @@ -217,7 +219,9 @@ class SketchMaker: public BaseMaker { for (size_t i = 0; i < this->qexpand.size(); ++i) { const int nid = this->qexpand[i]; for (int k = 0; k < 3; ++k) { - sbuilder[3 * nid + k].sketch->Push(c[0].fvalue, static_cast(sbuilder[3 * nid + k].sum_total)); + sbuilder[3 * nid + k].sketch->Push(c[0].fvalue, + static_cast( + sbuilder[3 * nid + k].sum_total)); } } return; @@ -250,7 +254,7 @@ class SketchMaker: public BaseMaker { sbuilder[3 * nid + k].Finalize(max_size); } } - } + } inline void SyncNodeStats(void) { utils::Assert(qexpand.size() != 0, "qexpand must not be empty"); std::vector tmp(qexpand.size()); @@ -272,12 +276,12 @@ class SketchMaker: public BaseMaker { std::vector sol(qexpand.size()); bst_omp_uint nexpand = static_cast(qexpand.size()); #pragma omp parallel for schedule(dynamic, 1) - for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) { + for (bst_omp_uint wid = 0; wid < nexpand; ++wid) { const int nid = qexpand[wid]; utils::Assert(node2workindex[nid] == static_cast(wid), "node2workindex inconsistent"); SplitEntry &best = sol[wid]; - for (bst_uint fid = 0; fid < num_feature; ++ fid) { + for (bst_uint fid = 0; fid < num_feature; ++fid) { unsigned base = (wid * p_tree->param.num_feature + fid) * 3; EnumerateSplit(summary_array[base + 0], summary_array[base + 1], @@ -286,7 +290,7 @@ class SketchMaker: public BaseMaker { } } // get the best result, we can synchronize the solution - for (bst_omp_uint wid = 0; wid < nexpand; ++ wid) { + for (bst_omp_uint wid = 0; wid < nexpand; ++wid) { const int nid = qexpand[wid]; const SplitEntry &best = sol[wid]; // set up the values @@ -337,7 +341,7 @@ class SketchMaker: public BaseMaker { feat_sum.neg_grad = neg_grad.data[neg_grad.size - 1].rmax; feat_sum.sum_hess = sum_hess.data[sum_hess.size - 1].rmax; size_t ipos = 0, ineg = 0, ihess = 0; - for (size_t i = 1; i < fsplits.size(); ++i) { + for (size_t i = 1; i < fsplits.size(); ++i) { WXQSketch::Entry pos = pos_grad.Query(fsplits[i], ipos); WXQSketch::Entry neg = neg_grad.Query(fsplits[i], ineg); WXQSketch::Entry hess = sum_hess.Query(fsplits[i], ihess); @@ -345,11 +349,11 @@ class SketchMaker: public BaseMaker { s.pos_grad = 0.5f * (pos.rmin + pos.rmax - pos.wmin); s.neg_grad = 0.5f * (neg.rmin + neg.rmax - neg.wmin); s.sum_hess = 0.5f * (hess.rmin + hess.rmax - hess.wmin); - c.SetSubstract(node_sum, s); + c.SetSubstract(node_sum, s); // forward if (s.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) { - double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; + double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; best->Update(static_cast(loss_chg), fid, fsplits[i], false); } // backward @@ -357,22 +361,23 @@ class SketchMaker: public BaseMaker { s.SetSubstract(node_sum, c); if (s.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) { - double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; + double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; best->Update(static_cast(loss_chg), fid, fsplits[i], true); - } + } } - {// all including + { + // all including SKStats s = feat_sum, c; c.SetSubstract(node_sum, s); if (s.sum_hess >= param.min_child_weight && c.sum_hess >= param.min_child_weight) { bst_float cpt = fsplits.back(); - double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; + double loss_chg = s.CalcGain(param) + c.CalcGain(param) - root_gain; best->Update(static_cast(loss_chg), fid, cpt + fabsf(cpt) + 1.0f, false); } } } - + // thread temp data // used to hold temporal sketch std::vector< std::vector > thread_sketch; @@ -389,6 +394,6 @@ class SketchMaker: public BaseMaker { // per node, per feature sketch std::vector< utils::WXQuantileSketch > sketchs; }; -} // tree -} // xgboost -#endif +} // namespace tree +} // namespace xgboost +#endif // XGBOOST_TREE_UPDATER_SKMAKER_INL_HPP_ diff --git a/src/tree/updater_sync-inl.hpp b/src/tree/updater_sync-inl.hpp index 2aa534aa8..e76d1f76d 100644 --- a/src/tree/updater_sync-inl.hpp +++ b/src/tree/updater_sync-inl.hpp @@ -1,18 +1,21 @@ -#ifndef XGBOOST_TREE_UPDATER_SYNC_INL_HPP_ -#define XGBOOST_TREE_UPDATER_SYNC_INL_HPP_ /*! + * Copyright 2014 by Contributors * \file updater_sync-inl.hpp * \brief synchronize the tree in all distributed nodes * \author Tianqi Chen */ +#ifndef XGBOOST_TREE_UPDATER_SYNC_INL_HPP_ +#define XGBOOST_TREE_UPDATER_SYNC_INL_HPP_ + #include +#include #include #include "../sync/sync.h" #include "./updater.h" namespace xgboost { namespace tree { -/*! +/*! * \brief syncher that synchronize the tree in all distributed nodes * can implement various strategies, so far it is always set to node 0's tree */ @@ -28,7 +31,7 @@ class TreeSyncher: public IUpdater { const std::vector &trees) { this->SyncTrees(trees); } - + private: // synchronize the trees in different nodes, take tree from rank 0 inline void SyncTrees(const std::vector &trees) { @@ -43,7 +46,7 @@ class TreeSyncher: public IUpdater { } fs.Seek(0); rabit::Broadcast(&s_model, 0); - for (size_t i = 0; i < trees.size(); ++i) { + for (size_t i = 0; i < trees.size(); ++i) { trees[i]->LoadModel(fs); } } diff --git a/src/utils/config.h b/src/utils/config.h index 19f4980cf..43d7bc8bd 100644 --- a/src/utils/config.h +++ b/src/utils/config.h @@ -1,10 +1,12 @@ -#ifndef XGBOOST_UTILS_CONFIG_H_ -#define XGBOOST_UTILS_CONFIG_H_ /*! + * Copyright 2014 by Contributors * \file config.h * \brief helper class to load in configures from file * \author Tianqi Chen */ +#ifndef XGBOOST_UTILS_CONFIG_H_ +#define XGBOOST_UTILS_CONFIG_H_ + #include #include #include @@ -14,26 +16,26 @@ namespace xgboost { namespace utils { -/*! +/*! * \brief base implementation of config reader */ class ConfigReaderBase { public: - /*! + /*! * \brief get current name, called after Next returns true - * \return current parameter name + * \return current parameter name */ inline const char *name(void) const { return s_name.c_str(); } - /*! + /*! * \brief get current value, called after Next returns true - * \return current parameter value + * \return current parameter value */ inline const char *val(void) const { return s_val.c_str(); } - /*! + /*! * \brief move iterator to next position * \return true if there is value in next position */ @@ -55,7 +57,7 @@ class ConfigReaderBase { protected: /*! * \brief to be implemented by subclass, - * get next token, return EOF if end of file + * get next token, return EOF if end of file */ virtual char GetChar(void) = 0; /*! \brief to be implemented by child, check if end of stream */ @@ -144,9 +146,9 @@ class ConfigReaderBase { */ class ConfigStreamReader: public ConfigReaderBase { public: - /*! - * \brief constructor - * \param istream input stream + /*! + * \brief constructor + * \param istream input stream */ explicit ConfigStreamReader(std::istream &fin) : fin(fin) {} @@ -163,13 +165,13 @@ class ConfigStreamReader: public ConfigReaderBase { std::istream &fin; }; -/*! +/*! * \brief an iterator that iterates over a configure file and gets the configures */ class ConfigIterator: public ConfigStreamReader { public: - /*! - * \brief constructor + /*! + * \brief constructor * \param fname name of configure file */ explicit ConfigIterator(const char *fname) : ConfigStreamReader(fi) { diff --git a/src/utils/group_data.h b/src/utils/group_data.h index 6e12a39ff..29d391aa8 100644 --- a/src/utils/group_data.h +++ b/src/utils/group_data.h @@ -1,6 +1,5 @@ -#ifndef XGBOOST_UTILS_GROUP_DATA_H_ -#define XGBOOST_UTILS_GROUP_DATA_H_ /*! + * Copyright 2014 by Contributors * \file group_data.h * \brief this file defines utils to group data by integer keys * Input: given input sequence (key,value), (k1,v1), (k2,v2) @@ -12,6 +11,11 @@ * The major algorithm is a two pass linear scan algorithm that requires two pass scan over the data * \author Tianqi Chen */ +#ifndef XGBOOST_UTILS_GROUP_DATA_H_ +#define XGBOOST_UTILS_GROUP_DATA_H_ + +#include + namespace xgboost { namespace utils { /*! @@ -32,10 +36,10 @@ struct ParallelGroupBuilder { std::vector< std::vector > *p_thread_rptr) : rptr(*p_rptr), data(*p_data), thread_rptr(*p_thread_rptr) { } - + public: /*! - * \brief step 1: initialize the helper, with hint of number keys + * \brief step 1: initialize the helper, with hint of number keys * and thread used in the construction * \param nkeys number of keys in the matrix, can be smaller than expected * \param nthread number of thread that will be used in construction @@ -56,7 +60,7 @@ struct ParallelGroupBuilder { inline void AddBudget(size_t key, int threadid, SizeType nelem = 1) { std::vector &trptr = thread_rptr[threadid]; if (trptr.size() < key + 1) { - trptr.resize(key + 1, 0); + trptr.resize(key + 1, 0); } trptr[key] += nelem; } @@ -84,13 +88,13 @@ struct ParallelGroupBuilder { data.resize(start); } /*! - * \brief step 4: add data to the allocated space, + * \brief step 4: add data to the allocated space, * the calls to this function should be exactly match previous call to AddBudget * - * \param key the key of + * \param key the key of * \param threadid the id of thread that calls this function */ - inline void Push(size_t key, ValueType value, int threadid) { + inline void Push(size_t key, ValueType value, int threadid) { SizeType &rp = thread_rptr[threadid][key]; data[rp++] = value; } diff --git a/src/utils/io.h b/src/utils/io.h index d96d16e2a..5b366e51c 100644 --- a/src/utils/io.h +++ b/src/utils/io.h @@ -1,16 +1,19 @@ -#ifndef XGBOOST_UTILS_IO_H -#define XGBOOST_UTILS_IO_H +/*! + * Copyright 2014 by Contributors + * \file io.h + * \brief general stream interface for serialization, I/O + * \author Tianqi Chen + */ + +#ifndef XGBOOST_UTILS_IO_H_ +#define XGBOOST_UTILS_IO_H_ #include #include #include #include #include "./utils.h" #include "../sync/sync.h" -/*! - * \file io.h - * \brief general stream interface for serialization, I/O - * \author Tianqi Chen - */ + namespace xgboost { namespace utils { // reuse the definitions of streams @@ -23,7 +26,7 @@ typedef rabit::utils::MemoryBufferStream MemoryBufferStream; class FileStream : public ISeekStream { public: explicit FileStream(std::FILE *fp) : fp(fp) {} - explicit FileStream(void) { + FileStream(void) { this->fp = NULL; } virtual size_t Read(void *ptr, size_t size) { @@ -33,7 +36,7 @@ class FileStream : public ISeekStream { std::fwrite(ptr, size, 1, fp); } virtual void Seek(size_t pos) { - std::fseek(fp, static_cast(pos), SEEK_SET); + std::fseek(fp, static_cast(pos), SEEK_SET); // NOLINT(*) } virtual size_t Tell(void) { return std::ftell(fp); @@ -42,7 +45,7 @@ class FileStream : public ISeekStream { return std::feof(fp) != 0; } inline void Close(void) { - if (fp != NULL){ + if (fp != NULL) { std::fclose(fp); fp = NULL; } } @@ -52,6 +55,5 @@ class FileStream : public ISeekStream { }; } // namespace utils } // namespace xgboost - #include "./base64-inl.h" -#endif +#endif // XGBOOST_UTILS_IO_H_ diff --git a/src/utils/thread.h b/src/utils/thread.h index ef6335a74..78b488cff 100644 --- a/src/utils/thread.h +++ b/src/utils/thread.h @@ -1,16 +1,17 @@ -#ifndef XGBOOST_UTILS_THREAD_H -#define XGBOOST_UTILS_THREAD_H /*! + * Copyright by Contributors * \file thread.h - * \brief this header include the minimum necessary resource for multi-threading + * \brief this header include the minimum necessary resource + * for multi-threading that can be compiled in windows, linux, mac * \author Tianqi Chen - * Acknowledgement: this file is adapted from SVDFeature project, by same author. - * The MAC support part of this code is provided by Artemy Kolchinsky */ +#ifndef XGBOOST_UTILS_THREAD_H_ // NOLINT(*) +#define XGBOOST_UTILS_THREAD_H_ // NOLINT(*) + #ifdef _MSC_VER -#include "utils.h" #include #include +#include "../xgboost/utils.h" namespace xgboost { namespace utils { /*! \brief simple semaphore used for synchronization */ @@ -18,29 +19,80 @@ class Semaphore { public : inline void Init(int init_val) { sem = CreateSemaphore(NULL, init_val, 10, NULL); - utils::Assert(sem != NULL, "create Semaphore error"); + utils::Check(sem != NULL, "create Semaphore error"); } inline void Destroy(void) { CloseHandle(sem); } inline void Wait(void) { - utils::Assert(WaitForSingleObject(sem, INFINITE) == WAIT_OBJECT_0, "WaitForSingleObject error"); + utils::Check(WaitForSingleObject(sem, INFINITE) == WAIT_OBJECT_0, "WaitForSingleObject error"); } inline void Post(void) { - utils::Assert(ReleaseSemaphore(sem, 1, NULL) != 0, "ReleaseSemaphore error"); + utils::Check(ReleaseSemaphore(sem, 1, NULL) != 0, "ReleaseSemaphore error"); } + private: HANDLE sem; }; + +/*! \brief mutex under windows */ +class Mutex { + public: + inline void Init(void) { + utils::Check(InitializeCriticalSectionAndSpinCount(&mutex, 0x00000400) != 0, + "Mutex::Init fail"); + } + inline void Lock(void) { + EnterCriticalSection(&mutex); + } + inline void Unlock(void) { + LeaveCriticalSection(&mutex); + } + inline void Destroy(void) { + DeleteCriticalSection(&mutex); + } + + private: + friend class ConditionVariable; + CRITICAL_SECTION mutex; +}; + +// conditional variable that uses pthread +class ConditionVariable { + public: + // initialize conditional variable + inline void Init(void) { + InitializeConditionVariable(&cond); + } + // destroy the thread + inline void Destroy(void) { + // DeleteConditionVariable(&cond); + } + // wait on the conditional variable + inline void Wait(Mutex *mutex) { + utils::Check(SleepConditionVariableCS(&cond, &(mutex->mutex), INFINITE) != 0, + "ConditionVariable:Wait fail"); + } + inline void Broadcast(void) { + WakeAllConditionVariable(&cond); + } + inline void Signal(void) { + WakeConditionVariable(&cond); + } + + private: + CONDITION_VARIABLE cond; +}; + /*! \brief simple thread that wraps windows thread */ class Thread { private: HANDLE thread_handle; - unsigned thread_id; + unsigned thread_id; public: - inline void Start(unsigned int __stdcall entry(void*), void *param) { + inline void Start(unsigned int __stdcall entry(void*p), void *param) { thread_handle = (HANDLE)_beginthreadex(NULL, 0, entry, param, 0, &thread_id); - } + } inline int Join(void) { WaitForSingleObject(thread_handle, INFINITE); return 0; @@ -54,39 +106,41 @@ inline void ThreadExit(void *status) { } // namespace utils } // namespace xgboost #else -// thread interface using g++ -extern "C" { +// thread interface using g++ #include #include -} +#include namespace xgboost { namespace utils { /*!\brief semaphore class */ class Semaphore { #ifdef __APPLE__ + private: sem_t* semPtr; - char sema_name[20]; + char sema_name[20]; + private: inline void GenRandomString(char *s, const int len) { - static const char alphanum[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" ; + static const char alphanum[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; for (int i = 0; i < len; ++i) { s[i] = alphanum[rand() % (sizeof(alphanum) - 1)]; } s[len] = 0; } + public: inline void Init(int init_val) { - sema_name[0]='/'; - sema_name[1]='s'; - sema_name[2]='e'; - sema_name[3]='/'; + sema_name[0] = '/'; + sema_name[1] = 's'; + sema_name[2] = 'e'; + sema_name[3] = '/'; GenRandomString(&sema_name[4], 16); - if((semPtr = sem_open(sema_name, O_CREAT, 0644, init_val)) == SEM_FAILED) { + if ((semPtr = sem_open(sema_name, O_CREAT, 0644, init_val)) == SEM_FAILED) { perror("sem_open"); exit(1); } - utils::Assert(semPtr != NULL, "create Semaphore error"); + utils::Check(semPtr != NULL, "create Semaphore error"); } inline void Destroy(void) { if (sem_close(semPtr) == -1) { @@ -103,53 +157,93 @@ class Semaphore { } inline void Post(void) { sem_post(semPtr); - } + } #else + private: sem_t sem; + public: inline void Init(int init_val) { - sem_init(&sem, 0, init_val); + if (sem_init(&sem, 0, init_val) != 0) { + utils::Error("Semaphore.Init:%s", strerror(errno)); + } } inline void Destroy(void) { - sem_destroy(&sem); + if (sem_destroy(&sem) != 0) { + utils::Error("Semaphore.Destroy:%s", strerror(errno)); + } } inline void Wait(void) { - sem_wait(&sem); + if (sem_wait(&sem) != 0) { + utils::Error("Semaphore.Wait:%s", strerror(errno)); + } } inline void Post(void) { - sem_post(&sem); + if (sem_post(&sem) != 0) { + utils::Error("Semaphore.Post:%s", strerror(errno)); + } } - #endif + #endif }; -// helper for c thread -// used to strictly call c++ function from pthread -struct ThreadContext { - void *(*entry)(void*); - void *param; -}; -extern "C" { - inline void *RunThreadContext(void *ctx_) { - ThreadContext *ctx = reinterpret_cast(ctx_); - void *ret = (*ctx->entry)(ctx->param); - delete ctx; - return ret; +// mutex that works with pthread +class Mutex { + public: + inline void Init(void) { + pthread_mutex_init(&mutex, NULL); } -} + inline void Lock(void) { + pthread_mutex_lock(&mutex); + } + inline void Unlock(void) { + pthread_mutex_unlock(&mutex); + } + inline void Destroy(void) { + pthread_mutex_destroy(&mutex); + } + + private: + friend class ConditionVariable; + pthread_mutex_t mutex; +}; + +// conditional variable that uses pthread +class ConditionVariable { + public: + // initialize conditional variable + inline void Init(void) { + pthread_cond_init(&cond, NULL); + } + // destroy the thread + inline void Destroy(void) { + pthread_cond_destroy(&cond); + } + // wait on the conditional variable + inline void Wait(Mutex *mutex) { + pthread_cond_wait(&cond, &(mutex->mutex)); + } + inline void Broadcast(void) { + pthread_cond_broadcast(&cond); + } + inline void Signal(void) { + pthread_cond_signal(&cond); + } + + private: + pthread_cond_t cond; +}; + /*!\brief simple thread class */ class Thread { private: - pthread_t thread; - + pthread_t thread; public : - inline void Start(void *entry(void*), void *param) { + inline void Start(void * entry(void*), void *param) { // NOLINT(*) pthread_attr_t attr; pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); - ThreadContext *ctx = new ThreadContext(); - ctx->entry = entry; ctx->param = param; - pthread_create(&thread, &attr, RunThreadContext, ctx); + pthread_create(&thread, &attr, entry, param); } inline int Join(void) { void *status; @@ -159,9 +253,8 @@ class Thread { inline void ThreadExit(void *status) { pthread_exit(status); } - } // namespace utils } // namespace xgboost #define XGBOOST_THREAD_PREFIX void * -#endif -#endif +#endif // Linux +#endif // XGBOOST_UTILS_THREAD_H_ NOLINT(*)