From 20043f63a668dc0fe58620a9737f45aff0d0c74a Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 1 Jan 2016 04:51:55 -0800 Subject: [PATCH] [TREE] Move colmaker --- dmlc-core | 2 +- include/xgboost/data.h | 2 + old_src/tree/updater_distcol-inl.hpp | 175 ------------- {old_src/utils => src/common}/bitmap.h | 17 +- src/common/random.h | 2 +- src/global.cc | 7 + {old_src => src}/tree/param.h | 234 ++++++------------ src/tree/tree_model.cc | 4 + .../tree/updater_colmaker.cc | 0 src/tree/updater_sync.cc | 1 - 10 files changed, 95 insertions(+), 349 deletions(-) delete mode 100644 old_src/tree/updater_distcol-inl.hpp rename {old_src/utils => src/common}/bitmap.h (85%) rename {old_src => src}/tree/param.h (57%) rename old_src/tree/updater_colmaker-inl.hpp => src/tree/updater_colmaker.cc (100%) diff --git a/dmlc-core b/dmlc-core index 98879773f..e5c8ed034 160000 --- a/dmlc-core +++ b/dmlc-core @@ -1 +1 @@ -Subproject commit 98879773f062e1c5b8a380e002c25672f2b48b13 +Subproject commit e5c8ed0342fbbdf7e38cafafb126f91bcca5ec72 diff --git a/include/xgboost/data.h b/include/xgboost/data.h index e85537347..573c04e7c 100644 --- a/include/xgboost/data.h +++ b/include/xgboost/data.h @@ -201,6 +201,8 @@ class DMatrix { public: /*! \brief meta information of the dataset */ virtual MetaInfo& info() = 0; + /*! \brief meta information of the dataset */ + virtual const MetaInfo& info() const = 0; /*! * \brief get the row iterator, reset to beginning position * \note Only either RowIterator or column Iterator can be active. diff --git a/old_src/tree/updater_distcol-inl.hpp b/old_src/tree/updater_distcol-inl.hpp deleted file mode 100644 index e3d3f8b59..000000000 --- a/old_src/tree/updater_distcol-inl.hpp +++ /dev/null @@ -1,175 +0,0 @@ -/*! - * Copyright 2014 by Contributors - * \file updater_distcol-inl.hpp - * \brief beta distributed version that takes a sub-column - * and construct a tree - * \author Tianqi Chen - */ -#ifndef XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_ -#define XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_ - -#include -#include -#include "../sync/sync.h" -#include "../utils/bitmap.h" -#include "../utils/io.h" -#include "./updater_colmaker-inl.hpp" -#include "./updater_prune-inl.hpp" - -namespace xgboost { -namespace tree { -template -class DistColMaker : public ColMaker { - public: - DistColMaker(void) : builder(param) {} - virtual ~DistColMaker(void) {} - // set training parameter - virtual void SetParam(const char *name, const char *val) { - param.SetParam(name, val); - pruner.SetParam(name, val); - } - virtual void Update(const std::vector &gpair, - IFMatrix *p_fmat, - const BoosterInfo &info, - const std::vector &trees) { - TStats::CheckInfo(info); - utils::Check(trees.size() == 1, "DistColMaker: only support one tree at a time"); - // build the tree - builder.Update(gpair, p_fmat, info, trees[0]); - //// prune the tree, note that pruner will sync the tree - pruner.Update(gpair, p_fmat, info, trees); - // update position after the tree is pruned - builder.UpdatePosition(p_fmat, *trees[0]); - } - virtual const int* GetLeafPosition(void) const { - return builder.GetLeafPosition(); - } - - private: - struct Builder : public ColMaker::Builder { - public: - explicit Builder(const TrainParam ¶m) - : ColMaker::Builder(param) { - } - inline void UpdatePosition(IFMatrix *p_fmat, const RegTree &tree) { - const std::vector &rowset = p_fmat->buffered_rowset(); - const bst_omp_uint ndata = static_cast(rowset.size()); - #pragma omp parallel for schedule(static) - for (bst_omp_uint i = 0; i < ndata; ++i) { - const bst_uint ridx = rowset[i]; - int nid = this->DecodePosition(ridx); - while (tree[nid].is_deleted()) { - nid = tree[nid].parent(); - utils::Assert(nid >=0, "distributed learning error"); - } - this->position[ridx] = nid; - } - } - virtual const int* GetLeafPosition(void) const { - return BeginPtr(this->position); - } - - protected: - virtual void SetNonDefaultPosition(const std::vector &qexpand, - IFMatrix *p_fmat, const RegTree &tree) { - // step 2, classify the non-default data into right places - std::vector fsplits; - for (size_t i = 0; i < qexpand.size(); ++i) { - const int nid = qexpand[i]; - if (!tree[nid].is_leaf()) { - fsplits.push_back(tree[nid].split_index()); - } - } - // get the candidate split index - std::sort(fsplits.begin(), fsplits.end()); - fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin()); - while (fsplits.size() != 0 && fsplits.back() >= p_fmat->NumCol()) { - fsplits.pop_back(); - } - // bitmap is only word concurrent, set to bool first - { - bst_omp_uint ndata = static_cast(this->position.size()); - boolmap.resize(ndata); - #pragma omp parallel for schedule(static) - for (bst_omp_uint j = 0; j < ndata; ++j) { - boolmap[j] = 0; - } - } - utils::IIterator *iter = p_fmat->ColIterator(fsplits); - while (iter->Next()) { - const ColBatch &batch = iter->Value(); - for (size_t i = 0; i < batch.size; ++i) { - ColBatch::Inst col = batch[i]; - const bst_uint fid = batch.col_index[i]; - const bst_omp_uint ndata = static_cast(col.length); - #pragma omp parallel for schedule(static) - for (bst_omp_uint j = 0; j < ndata; ++j) { - const bst_uint ridx = col[j].index; - const float fvalue = col[j].fvalue; - const int nid = this->DecodePosition(ridx); - if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) { - if (fvalue < tree[nid].split_cond()) { - if (!tree[nid].default_left()) boolmap[ridx] = 1; - } else { - if (tree[nid].default_left()) boolmap[ridx] = 1; - } - } - } - } - } - - bitmap.InitFromBool(boolmap); - // communicate bitmap - rabit::Allreduce(BeginPtr(bitmap.data), bitmap.data.size()); - const std::vector &rowset = p_fmat->buffered_rowset(); - // get the new position - const bst_omp_uint ndata = static_cast(rowset.size()); - #pragma omp parallel for schedule(static) - for (bst_omp_uint i = 0; i < ndata; ++i) { - const bst_uint ridx = rowset[i]; - const int nid = this->DecodePosition(ridx); - if (bitmap.Get(ridx)) { - utils::Assert(!tree[nid].is_leaf(), "inconsistent reduce information"); - if (tree[nid].default_left()) { - this->SetEncodePosition(ridx, tree[nid].cright()); - } else { - this->SetEncodePosition(ridx, tree[nid].cleft()); - } - } - } - } - // synchronize the best solution of each node - virtual void SyncBestSolution(const std::vector &qexpand) { - std::vector vec; - for (size_t i = 0; i < qexpand.size(); ++i) { - const int nid = qexpand[i]; - for (int tid = 0; tid < this->nthread; ++tid) { - this->snode[nid].best.Update(this->stemp[tid][nid].best); - } - vec.push_back(this->snode[nid].best); - } - // TODO(tqchen) lazy version - // communicate best solution - reducer.Allreduce(BeginPtr(vec), vec.size()); - // assign solution back - for (size_t i = 0; i < qexpand.size(); ++i) { - const int nid = qexpand[i]; - this->snode[nid].best = vec[i]; - } - } - - private: - utils::BitMap bitmap; - std::vector boolmap; - rabit::Reducer reducer; - }; - // we directly introduce pruner here - TreePruner pruner; - // training parameter - TrainParam param; - // pointer to the builder - Builder builder; -}; -} // namespace tree -} // namespace xgboost -#endif // XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_ diff --git a/old_src/utils/bitmap.h b/src/common/bitmap.h similarity index 85% rename from old_src/utils/bitmap.h rename to src/common/bitmap.h index eecccbda5..e8fd13109 100644 --- a/old_src/utils/bitmap.h +++ b/src/common/bitmap.h @@ -5,15 +5,14 @@ * NOTE: bitmap is only threadsafe per word access, remember this when using bitmap * \author Tianqi Chen */ -#ifndef XGBOOST_UTILS_BITMAP_H_ -#define XGBOOST_UTILS_BITMAP_H_ +#ifndef XGBOOST_COMMON_BITMAP_H_ +#define XGBOOST_COMMON_BITMAP_H_ #include -#include "./utils.h" -#include "./omp.h" +#include namespace xgboost { -namespace utils { +namespace common { /*! \brief bit map that contains set of bit indicators */ struct BitMap { /*! \brief internal data structure */ @@ -40,7 +39,7 @@ struct BitMap { data[i >> 5] |= (1 << (i & 31U)); } /*! \brief initialize the value of bit map from vector of bool*/ - inline void InitFromBool(const std::vector &vec) { + inline void InitFromBool(const std::vector& vec) { this->Resize(vec.size()); // parallel over the full cases bst_omp_uint nsize = static_cast(vec.size() / 32); @@ -59,10 +58,10 @@ struct BitMap { } } /*! \brief clear the bitmap, set all places to false */ - inline void Clear(void) { + inline void Clear() { std::fill(data.begin(), data.end(), 0U); } }; -} // namespace utils +} // namespace common } // namespace xgboost -#endif // XGBOOST_UTILS_BITMAP_H_ +#endif // XGBOOST_COMMON_BITMAP_H_ diff --git a/src/common/random.h b/src/common/random.h index 16441a9c9..98e8332b4 100644 --- a/src/common/random.h +++ b/src/common/random.h @@ -19,7 +19,7 @@ typedef std::mt19937 RandomEngine; * \brief global singleton of a random engine. * Only use this engine when necessary, not thread-safe. */ -static RandomEngine* GlobalRandom(); +RandomEngine& GlobalRandom(); // NOLINT(*) } // namespace common } // namespace xgboost diff --git a/src/global.cc b/src/global.cc index 7b9a41cc2..5f6dd50c8 100644 --- a/src/global.cc +++ b/src/global.cc @@ -6,6 +6,7 @@ #include #include #include +#include "./common/random.h" namespace dmlc { DMLC_REGISTRY_ENABLE(::xgboost::ObjFunctionReg); @@ -52,5 +53,11 @@ TreeUpdater* TreeUpdater::Create(const char* name) { return (e->body)(); } +namespace common { +RandomEngine& GlobalRandom() { + static RandomEngine inst; + return inst; +} +} } // namespace xgboost diff --git a/old_src/tree/param.h b/src/tree/param.h similarity index 57% rename from old_src/tree/param.h rename to src/tree/param.h index 364e3572d..d1e8c6d7e 100644 --- a/old_src/tree/param.h +++ b/src/tree/param.h @@ -1,7 +1,7 @@ /*! * Copyright 2014 by Contributors * \file param.h - * \brief training parameters, statistics used to support tree construction + * \brief training parameters, statistics used to support tree construction. * \author Tianqi Chen */ #ifndef XGBOOST_TREE_PARAM_H_ @@ -9,17 +9,16 @@ #include #include -#include "../data.h" namespace xgboost { namespace tree { /*! \brief training parameters for regression tree */ -struct TrainParam{ +struct TrainParam : public dmlc::Parameter { // learning step size for a time - float learning_rate; + float eta; // minimum loss change required for a split - float min_split_loss; + float gamma; // maximum depth of a tree int max_depth; //----- the rest parameters are less important ---- @@ -52,67 +51,55 @@ struct TrainParam{ // option for parallelization int parallel_option; // option to open cacheline optimization - int cache_opt; + bool cache_opt; // number of threads to be used for tree construction, // if OpenMP is enabled, if equals 0, use system default int nthread; - /*! \brief constructor */ - TrainParam(void) { - learning_rate = 0.3f; - min_split_loss = 0.0f; - min_child_weight = 1.0f; - max_delta_step = 0.0f; - max_depth = 6; - reg_lambda = 1.0f; - reg_alpha = 0.0f; - default_direction = 0; - subsample = 1.0f; - colsample_bytree = 1.0f; - colsample_bylevel = 1.0f; - opt_dense_col = 1.0f; - nthread = 0; - size_leaf_vector = 0; - // enforce parallel option to 0 for now, investigate the other strategy - parallel_option = 0; - sketch_eps = 0.1f; - sketch_ratio = 2.0f; - cache_opt = 1; - } - /*! - * \brief set parameters from outside - * \param name name of the parameter - * \param val value of the parameter - */ - inline void SetParam(const char *name, const char *val) { - using namespace std; - // sync-names - if (!strcmp(name, "gamma")) min_split_loss = static_cast(atof(val)); - if (!strcmp(name, "eta")) learning_rate = static_cast(atof(val)); - if (!strcmp(name, "lambda")) reg_lambda = static_cast(atof(val)); - if (!strcmp(name, "alpha")) reg_alpha = static_cast(atof(val)); - if (!strcmp(name, "learning_rate")) learning_rate = static_cast(atof(val)); - if (!strcmp(name, "min_child_weight")) min_child_weight = static_cast(atof(val)); - if (!strcmp(name, "min_split_loss")) min_split_loss = static_cast(atof(val)); - if (!strcmp(name, "max_delta_step")) max_delta_step = static_cast(atof(val)); - if (!strcmp(name, "reg_lambda")) reg_lambda = static_cast(atof(val)); - if (!strcmp(name, "reg_alpha")) reg_alpha = static_cast(atof(val)); - if (!strcmp(name, "subsample")) subsample = static_cast(atof(val)); - if (!strcmp(name, "colsample_bylevel")) colsample_bylevel = static_cast(atof(val)); - if (!strcmp(name, "colsample_bytree")) colsample_bytree = static_cast(atof(val)); - if (!strcmp(name, "sketch_eps")) sketch_eps = static_cast(atof(val)); - if (!strcmp(name, "sketch_ratio")) sketch_ratio = static_cast(atof(val)); - if (!strcmp(name, "opt_dense_col")) opt_dense_col = static_cast(atof(val)); - if (!strcmp(name, "size_leaf_vector")) size_leaf_vector = atoi(val); - if (!strcmp(name, "cache_opt")) cache_opt = atoi(val); - if (!strcmp(name, "max_depth")) max_depth = atoi(val); - if (!strcmp(name, "nthread")) nthread = atoi(val); - if (!strcmp(name, "parallel_option")) parallel_option = atoi(val); - if (!strcmp(name, "default_direction")) { - if (!strcmp(val, "learn")) default_direction = 0; - if (!strcmp(val, "left")) default_direction = 1; - if (!strcmp(val, "right")) default_direction = 2; - } + + // declare the parameters + DMLC_DECLARE_PARAMETER(TrainParam) { + DMLC_DECLARE_FIELD(eta).set_lower_bound(0.0f).set_default(0.3f) + .describe("Learning rate(step size) of update."); + DMLC_DECLARE_FIELD(gamma).set_lower_bound(0.0f).set_default(0.0f) + .describe("Minimum loss reduction required to make a further partition."); + DMLC_DECLARE_FIELD(max_depth).set_lower_bound(0).set_default(6) + .describe("Maximum depth of the tree."); + DMLC_DECLARE_FIELD(min_child_weight).set_lower_bound(0.0f).set_default(1.0f) + .describe("Minimum sum of instance weight(hessian) needed in a child."); + DMLC_DECLARE_FIELD(reg_lambda).set_lower_bound(0.0f).set_default(1.0f) + .describe("L2 regularization on leaf weight"); + DMLC_DECLARE_FIELD(reg_alpha).set_lower_bound(0.0f).set_default(0.0f) + .describe("L1 regularization on leaf weight"); + DMLC_DECLARE_FIELD(default_direction) + .add_enum("learn", 0) + .add_enum("left", 1) + .add_enum("right", 2) + .describe("Default direction choice when encountering a missing value"); + DMLC_DECLARE_FIELD(max_delta_step).set_lower_bound(0.0f).set_default(0.0f) + .describe("Maximum delta step we allow each tree's weight estimate to be. "\ + "If the value is set to 0, it means there is no constraint"); + DMLC_DECLARE_FIELD(subsample).set_range(0.0f, 1.0f).set_default(1.0f) + .describe("Row subsample ratio of training instance."); + DMLC_DECLARE_FIELD(colsample_bylevel).set_range(0.0f, 1.0f).set_default(1.0f) + .describe("Subsample ratio of columns, resample on each level."); + DMLC_DECLARE_FIELD(colsample_bytree).set_range(0.0f, 1.0f).set_default(1.0f) + .describe("Subsample ratio of columns, resample on each tree construction."); + DMLC_DECLARE_FIELD(opt_dense_col).set_range(0.0f, 1.0f).set_default(1.0f) + .describe("EXP Param: speed optimization for dense column."); + DMLC_DECLARE_FIELD(sketch_eps).set_range(0.0f, 1.0f).set_default(0.1f) + .describe("EXP Param: Sketch accuracy of approximate algorithm."); + DMLC_DECLARE_FIELD(sketch_ratio).set_lower_bound(0.0f).set_default(2.0f) + .describe("EXP Param: Sketch accuracy related parameter of approximate algorithm."); + DMLC_DECLARE_FIELD(size_leaf_vector).set_lower_bound(0).set_default(0) + .describe("Size of leaf vectors, reserved for vector trees"); + DMLC_DECLARE_FIELD(parallel_option).set_default(0) + .describe("Different types of parallelization algorithm."); + DMLC_DECLARE_FIELD(cache_opt).set_default(true) + .describe("EXP Param: Cache aware optimization."); + DMLC_DECLARE_FIELD(nthread).set_default(0) + .describe("Number of threads used for training."); } + // calculate the cost of loss function inline double CalcGain(double sum_grad, double sum_hess) const { if (sum_hess < min_child_weight) return 0.0; @@ -169,16 +156,16 @@ struct TrainParam{ } /*! \brief given the loss change, whether we need to invoke pruning */ inline bool need_prune(double loss_chg, int depth) const { - return loss_chg < this->min_split_loss; + return loss_chg < this->gamma; } /*! \brief whether we can split with current hessian */ inline bool cannot_split(double sum_hess, int depth) const { return sum_hess < this->min_child_weight * 2.0; } /*! \brief maximum sketch size */ - inline unsigned max_sketch_size(void) const { + inline unsigned max_sketch_size() const { unsigned ret = static_cast(sketch_ratio / sketch_eps); - utils::Check(ret > 0, "sketch_ratio/sketch_eps must be bigger than 1"); + CHECK_GT(ret, 0); return ret; } @@ -206,15 +193,15 @@ struct GradStats { */ static const int kSimpleStats = 1; /*! \brief constructor, the object must be cleared during construction */ - explicit GradStats(const TrainParam ¶m) { + explicit GradStats(const TrainParam& param) { this->Clear(); } /*! \brief clear the statistics */ - inline void Clear(void) { + inline void Clear() { sum_grad = sum_hess = 0.0f; } /*! \brief check if necessary information is ready */ - inline static void CheckInfo(const BoosterInfo &info) { + inline static void CheckInfo(const MetaInfo& info) { } /*! * \brief accumulate statistics @@ -229,130 +216,53 @@ struct GradStats { * \param info the additional information * \param ridx instance index of this instance */ - inline void Add(const std::vector &gpair, - const BoosterInfo &info, + inline void Add(const std::vector& gpair, + const MetaInfo& info, bst_uint ridx) { - const bst_gpair &b = gpair[ridx]; + const bst_gpair& b = gpair[ridx]; this->Add(b.grad, b.hess); } /*! \brief calculate leaf weight */ - inline double CalcWeight(const TrainParam ¶m) const { + inline double CalcWeight(const TrainParam& param) const { return param.CalcWeight(sum_grad, sum_hess); } /*! \brief calculate gain of the solution */ - inline double CalcGain(const TrainParam ¶m) const { + inline double CalcGain(const TrainParam& param) const { return param.CalcGain(sum_grad, sum_hess); } /*! \brief add statistics to the data */ - inline void Add(const GradStats &b) { + inline void Add(const GradStats& b) { this->Add(b.sum_grad, b.sum_hess); } /*! \brief same as add, reduce is used in All Reduce */ - inline static void Reduce(GradStats &a, const GradStats &b) { // NOLINT(*) + inline static void Reduce(GradStats& a, const GradStats& b) { // NOLINT(*) a.Add(b); } /*! \brief set current value to a - b */ - inline void SetSubstract(const GradStats &a, const GradStats &b) { + inline void SetSubstract(const GradStats& a, const GradStats& b) { sum_grad = a.sum_grad - b.sum_grad; sum_hess = a.sum_hess - b.sum_hess; } /*! \return whether the statistics is not used yet */ - inline bool Empty(void) const { + inline bool Empty() const { return sum_hess == 0.0; } /*! \brief set leaf vector value based on statistics */ - inline void SetLeafVec(const TrainParam ¶m, bst_float *vec) const { + inline void SetLeafVec(const TrainParam& param, bst_float *vec) const { } // constructor to allow inheritance - GradStats(void) {} + GradStats() {} /*! \brief add statistics to the data */ inline void Add(double grad, double hess) { sum_grad += grad; sum_hess += hess; } }; -/*! \brief vectorized cv statistics */ -template -struct CVGradStats : public GradStats { - // additional statistics - GradStats train[vsize], valid[vsize]; - // constructor - explicit CVGradStats(const TrainParam ¶m) { - utils::Check(param.size_leaf_vector == vsize, - "CVGradStats: vsize must match size_leaf_vector"); - this->Clear(); - } - /*! \brief check if necessary information is ready */ - inline static void CheckInfo(const BoosterInfo &info) { - utils::Check(info.fold_index.size() != 0, - "CVGradStats: require fold_index"); - } - /*! \brief clear the statistics */ - inline void Clear(void) { - GradStats::Clear(); - for (unsigned i = 0; i < vsize; ++i) { - train[i].Clear(); valid[i].Clear(); - } - } - inline void Add(const std::vector &gpair, - const BoosterInfo &info, - bst_uint ridx) { - GradStats::Add(gpair[ridx].grad, gpair[ridx].hess); - const size_t step = info.fold_index.size(); - for (unsigned i = 0; i < vsize; ++i) { - const bst_gpair &b = gpair[(i + 1) * step + ridx]; - if (info.fold_index[ridx] == i) { - valid[i].Add(b.grad, b.hess); - } else { - train[i].Add(b.grad, b.hess); - } - } - } - /*! \brief calculate gain of the solution */ - inline double CalcGain(const TrainParam ¶m) const { - double ret = 0.0; - for (unsigned i = 0; i < vsize; ++i) { - ret += param.CalcGain(train[i].sum_grad, - train[i].sum_hess, - vsize * valid[i].sum_grad, - vsize * valid[i].sum_hess); - } - return ret / vsize; - } - /*! \brief add statistics to the data */ - inline void Add(const CVGradStats &b) { - GradStats::Add(b); - for (unsigned i = 0; i < vsize; ++i) { - train[i].Add(b.train[i]); - valid[i].Add(b.valid[i]); - } - } - /*! \brief same as add, reduce is used in All Reduce */ - inline static void Reduce(CVGradStats &a, const CVGradStats &b) { // NOLINT(*) - a.Add(b); - } - /*! \brief set current value to a - b */ - inline void SetSubstract(const CVGradStats &a, const CVGradStats &b) { - GradStats::SetSubstract(a, b); - for (int i = 0; i < vsize; ++i) { - train[i].SetSubstract(a.train[i], b.train[i]); - valid[i].SetSubstract(a.valid[i], b.valid[i]); - } - } - /*! \brief set leaf vector value based on statistics */ - inline void SetLeafVec(const TrainParam ¶m, bst_float *vec) const{ - for (int i = 0; i < vsize; ++i) { - vec[i] = param.learning_rate * - param.CalcWeight(train[i].sum_grad, train[i].sum_hess); - } - } -}; - /*! * \brief statistics that is helpful to store * and represent a split solution for the tree */ -struct SplitEntry{ +struct SplitEntry { /*! \brief loss change after split this node */ bst_float loss_chg; /*! \brief split index */ @@ -360,7 +270,7 @@ struct SplitEntry{ /*! \brief split value */ float split_value; /*! \brief constructor */ - SplitEntry(void) : loss_chg(0.0f), sindex(0), split_value(0.0f) {} + SplitEntry() : loss_chg(0.0f), sindex(0), split_value(0.0f) {} /*! * \brief decides whether we can replace current entry with the given statistics * This function gives better priority to lower index when loss_chg == new_loss_chg. @@ -380,7 +290,7 @@ struct SplitEntry{ * \param e candidate split solution * \return whether the proposed split is better and can replace current split */ - inline bool Update(const SplitEntry &e) { + inline bool Update(const SplitEntry& e) { if (this->NeedReplace(e.loss_chg, e.split_index())) { this->loss_chg = e.loss_chg; this->sindex = e.sindex; @@ -411,15 +321,15 @@ struct SplitEntry{ } } /*! \brief same as update, used by AllReduce*/ - inline static void Reduce(SplitEntry &dst, const SplitEntry &src) { // NOLINT(*) + inline static void Reduce(SplitEntry& dst, const SplitEntry& src) { // NOLINT(*) dst.Update(src); } /*!\return feature index to split on */ - inline unsigned split_index(void) const { + inline unsigned split_index() const { return sindex & ((1U << 31) - 1U); } /*!\return whether missing value goes to left branch */ - inline bool default_left(void) const { + inline bool default_left() const { return (sindex >> 31) != 0; } }; diff --git a/src/tree/tree_model.cc b/src/tree/tree_model.cc index cb0d9d132..06fb0055b 100644 --- a/src/tree/tree_model.cc +++ b/src/tree/tree_model.cc @@ -5,11 +5,15 @@ */ #include #include +#include "./param.h" namespace xgboost { // register tree parameter DMLC_REGISTER_PARAMETER(TreeParam); +namespace tree { +DMLC_REGISTER_PARAMETER(TrainParam); +} // internal function to dump regression tree to text void DumpRegTree2Text(std::stringstream& fo, // NOLINT(*) const RegTree& tree, diff --git a/old_src/tree/updater_colmaker-inl.hpp b/src/tree/updater_colmaker.cc similarity index 100% rename from old_src/tree/updater_colmaker-inl.hpp rename to src/tree/updater_colmaker.cc diff --git a/src/tree/updater_sync.cc b/src/tree/updater_sync.cc index a620833f1..00a0b95bb 100644 --- a/src/tree/updater_sync.cc +++ b/src/tree/updater_sync.cc @@ -47,4 +47,3 @@ XGBOOST_REGISTER_TREE_UPDATER(TreeSyncher, "sync") }); } // namespace tree } // namespace xgboost -