diff --git a/amalgamation/xgboost-all0.cc b/amalgamation/xgboost-all0.cc index 26fbd1b7a..5f694a63a 100644 --- a/amalgamation/xgboost-all0.cc +++ b/amalgamation/xgboost-all0.cc @@ -48,7 +48,7 @@ #include "../src/tree/tree_model.cc" #include "../src/tree/tree_updater.cc" #include "../src/tree/updater_colmaker.cc" -#include "../src/tree/updater_fast_hist.cc" +#include "../src/tree/updater_quantile_hist.cc" #include "../src/tree/updater_prune.cc" #include "../src/tree/updater_refresh.cc" #include "../src/tree/updater_sync.cc" diff --git a/src/cli_main.cc b/src/cli_main.cc index eb2719194..c5851169e 100644 --- a/src/cli_main.cc +++ b/src/cli_main.cc @@ -19,7 +19,6 @@ #include #include #include -#include "./common/sync.h" #include "./common/config.h" diff --git a/src/common/hist_util.cc b/src/common/hist_util.cc index 713f1f844..da6e4d770 100644 --- a/src/common/hist_util.cc +++ b/src/common/hist_util.cc @@ -4,10 +4,11 @@ * \brief Utilities to store histograms * \author Philip Cho, Tianqi Chen */ +#include #include #include #include -#include "./sync.h" + #include "./random.h" #include "./column_matrix.h" #include "./hist_util.h" @@ -216,7 +217,7 @@ FindGroups(const std::vector& feature_list, const std::vector& feature_nnz, const ColumnMatrix& colmat, size_t nrow, - const FastHistParam& param) { + const tree::TrainParam& param) { /* Goal: Bundle features together that has little or no "overlap", i.e. only a few data points should have nonzero values for member features. @@ -278,7 +279,7 @@ FindGroups(const std::vector& feature_list, inline std::vector> FastFeatureGrouping(const GHistIndexMatrix& gmat, const ColumnMatrix& colmat, - const FastHistParam& param) { + const tree::TrainParam& param) { const size_t nrow = gmat.row_ptr.size() - 1; const size_t nfeature = gmat.cut.row_ptr.size() - 1; @@ -332,7 +333,7 @@ FastFeatureGrouping(const GHistIndexMatrix& gmat, void GHistIndexBlockMatrix::Init(const GHistIndexMatrix& gmat, const ColumnMatrix& colmat, - const FastHistParam& param) { + const tree::TrainParam& param) { cut_ = &gmat.cut; const size_t nrow = gmat.row_ptr.size() - 1; diff --git a/src/common/hist_util.h b/src/common/hist_util.h index 1d5f3c8f1..ad83dd6c8 100644 --- a/src/common/hist_util.h +++ b/src/common/hist_util.h @@ -11,7 +11,6 @@ #include #include #include "row_set.h" -#include "../tree/fast_hist_param.h" #include "../tree/param.h" #include "./quantile.h" @@ -19,8 +18,6 @@ namespace xgboost { namespace common { -using tree::FastHistParam; - /*! \brief sums of gradient statistics corresponding to a histogram bin */ struct GHistEntry { /*! \brief sum of first-order gradient statistics */ @@ -145,7 +142,7 @@ class GHistIndexBlockMatrix { public: void Init(const GHistIndexMatrix& gmat, const ColumnMatrix& colmat, - const FastHistParam& param); + const tree::TrainParam& param); inline GHistIndexBlock operator[](size_t i) const { return {blocks_[i].row_ptr_begin, blocks_[i].index_begin}; diff --git a/src/common/io.h b/src/common/io.h index 6f792a589..29d68abec 100644 --- a/src/common/io.h +++ b/src/common/io.h @@ -9,9 +9,9 @@ #define XGBOOST_COMMON_IO_H_ #include +#include #include #include -#include "./sync.h" namespace xgboost { namespace common { diff --git a/src/common/sync.h b/src/common/sync.h deleted file mode 100644 index 31b6471a3..000000000 --- a/src/common/sync.h +++ /dev/null @@ -1,13 +0,0 @@ -/*! - * Copyright 2014 by Contributors - * \file sync.h - * \brief the synchronization module of rabit - * redirects to rabit header - * \author Tianqi Chen - */ -#ifndef XGBOOST_COMMON_SYNC_H_ -#define XGBOOST_COMMON_SYNC_H_ - -#include - -#endif // XGBOOST_COMMON_SYNC_H_ diff --git a/src/learner.cc b/src/learner.cc index 719241043..f7e51151f 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -211,8 +211,8 @@ class LearnerImpl : public Learner { break; case TreeMethod::kHist: LOG(CONSOLE) << "Tree method is selected to be 'hist', which uses a " - "single updater grow_fast_histmaker."; - cfg_["updater"] = "grow_fast_histmaker"; + "single updater grow_quantile_histmaker."; + cfg_["updater"] = "grow_quantile_histmaker"; break; case TreeMethod::kGPUExact: this->AssertGPUSupport(); diff --git a/src/logging.cc b/src/logging.cc index 6f5eb384b..3394e5c6e 100644 --- a/src/logging.cc +++ b/src/logging.cc @@ -4,9 +4,9 @@ * \brief Implementation of loggers. * \author Tianqi Chen */ +#include #include #include -#include "./common/sync.h" #if !defined(XGBOOST_STRICT_R_MODE) || XGBOOST_STRICT_R_MODE == 0 // Override logging mechanism for non-R interfaces diff --git a/src/metric/elementwise_metric.cc b/src/metric/elementwise_metric.cc index a9df69e11..6ca022e0e 100644 --- a/src/metric/elementwise_metric.cc +++ b/src/metric/elementwise_metric.cc @@ -4,11 +4,11 @@ * \brief evaluation metrics for elementwise binary or regression. * \author Kailong Chen, Tianqi Chen */ +#include #include #include #include #include "../common/math.h" -#include "../common/sync.h" namespace xgboost { namespace metric { diff --git a/src/metric/multiclass_metric.cc b/src/metric/multiclass_metric.cc index c68ebc25b..be6279980 100644 --- a/src/metric/multiclass_metric.cc +++ b/src/metric/multiclass_metric.cc @@ -4,9 +4,9 @@ * \brief evaluation metrics for multiclass classification. * \author Kailong Chen, Tianqi Chen */ +#include #include #include -#include "../common/sync.h" #include "../common/math.h" namespace xgboost { diff --git a/src/metric/rank_metric.cc b/src/metric/rank_metric.cc index 15cbfccd8..d1f6af909 100644 --- a/src/metric/rank_metric.cc +++ b/src/metric/rank_metric.cc @@ -4,10 +4,10 @@ * \brief prediction rank based metrics. * \author Kailong Chen, Tianqi Chen */ +#include #include #include #include -#include "../common/sync.h" #include "../common/math.h" namespace xgboost { diff --git a/src/tree/fast_hist_param.h b/src/tree/fast_hist_param.h deleted file mode 100644 index 39d009ff6..000000000 --- a/src/tree/fast_hist_param.h +++ /dev/null @@ -1,54 +0,0 @@ -/*! - * Copyright 2017 by Contributors - * \file updater_fast_hist.h - * \brief parameters for histogram-based training - * \author Philip Cho, Tianqi Chen - */ -#ifndef XGBOOST_TREE_FAST_HIST_PARAM_H_ -#define XGBOOST_TREE_FAST_HIST_PARAM_H_ - -namespace xgboost { -namespace tree { - -/*! \brief training parameters for histogram-based training */ -struct FastHistParam : public dmlc::Parameter { - int colmat_dtype; - // percentage threshold for treating a feature as sparse - // e.g. 0.2 indicates a feature with fewer than 20% nonzeros is considered sparse - double sparse_threshold; - // use feature grouping? (default yes) - int enable_feature_grouping; - // when grouping features, how many "conflicts" to allow. - // conflict is when an instance has nonzero values for two or more features - // default is 0, meaning features should be strictly complementary - double max_conflict_rate; - // when grouping features, how much effort to expend to prevent singleton groups - // we'll try to insert each feature into existing groups before creating a new group - // for that feature; to save time, only up to (max_search_group) of existing groups - // will be considered. If set to zero, ALL existing groups will be examined - unsigned max_search_group; - - // declare the parameters - DMLC_DECLARE_PARAMETER(FastHistParam) { - DMLC_DECLARE_FIELD(sparse_threshold).set_range(0, 1.0).set_default(0.2) - .describe("percentage threshold for treating a feature as sparse"); - DMLC_DECLARE_FIELD(enable_feature_grouping).set_lower_bound(0).set_default(0) - .describe("if >0, enable feature grouping to ameliorate work imbalance " - "among worker threads"); - DMLC_DECLARE_FIELD(max_conflict_rate).set_range(0, 1.0).set_default(0) - .describe("when grouping features, how many \"conflicts\" to allow." - "conflict is when an instance has nonzero values for two or more features." - "default is 0, meaning features should be strictly complementary."); - DMLC_DECLARE_FIELD(max_search_group).set_lower_bound(0).set_default(100) - .describe("when grouping features, how much effort to expend to prevent " - "singleton groups. We'll try to insert each feature into existing " - "groups before creating a new group for that feature; to save time, " - "only up to (max_search_group) of existing groups will be " - "considered. If set to zero, ALL existing groups will be examined."); - } -}; - -} // namespace tree -} // namespace xgboost - -#endif // XGBOOST_TREE_FAST_HIST_PARAM_H_ diff --git a/src/tree/param.h b/src/tree/param.h index 0de689eb1..3662193e5 100644 --- a/src/tree/param.h +++ b/src/tree/param.h @@ -81,6 +81,23 @@ struct TrainParam : public dmlc::Parameter { int gpu_batch_nrows; // the criteria to use for ranking splits std::string split_evaluator; + + // ------ From cpu quantile histogram -------. + // percentage threshold for treating a feature as sparse + // e.g. 0.2 indicates a feature with fewer than 20% nonzeros is considered sparse + double sparse_threshold; + // use feature grouping? (default yes) + int enable_feature_grouping; + // when grouping features, how many "conflicts" to allow. + // conflict is when an instance has nonzero values for two or more features + // default is 0, meaning features should be strictly complementary + double max_conflict_rate; + // when grouping features, how much effort to expend to prevent singleton groups + // we'll try to insert each feature into existing groups before creating a new group + // for that feature; to save time, only up to (max_search_group) of existing groups + // will be considered. If set to zero, ALL existing groups will be examined + unsigned max_search_group; + // declare the parameters DMLC_DECLARE_PARAMETER(TrainParam) { DMLC_DECLARE_FIELD(learning_rate) @@ -196,6 +213,24 @@ struct TrainParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(split_evaluator) .set_default("elastic_net,monotonic,interaction") .describe("The criteria to use for ranking splits"); + + // ------ From cpu quantile histogram -------. + DMLC_DECLARE_FIELD(sparse_threshold).set_range(0, 1.0).set_default(0.2) + .describe("percentage threshold for treating a feature as sparse"); + DMLC_DECLARE_FIELD(enable_feature_grouping).set_lower_bound(0).set_default(0) + .describe("if >0, enable feature grouping to ameliorate work imbalance " + "among worker threads"); + DMLC_DECLARE_FIELD(max_conflict_rate).set_range(0, 1.0).set_default(0) + .describe("when grouping features, how many \"conflicts\" to allow." + "conflict is when an instance has nonzero values for two or more features." + "default is 0, meaning features should be strictly complementary."); + DMLC_DECLARE_FIELD(max_search_group).set_lower_bound(0).set_default(100) + .describe("when grouping features, how much effort to expend to prevent " + "singleton groups. We'll try to insert each feature into existing " + "groups before creating a new group for that feature; to save time, " + "only up to (max_search_group) of existing groups will be " + "considered. If set to zero, ALL existing groups will be examined."); + // add alias of parameters DMLC_DECLARE_ALIAS(reg_lambda, lambda); DMLC_DECLARE_ALIAS(reg_alpha, alpha); @@ -518,7 +553,7 @@ struct SplitEntry { this->loss_chg = new_loss_chg; if (default_left) { split_index |= (1U << 31); -} + } this->sindex = split_index; this->split_value = new_split_value; return true; diff --git a/src/tree/tree_updater.cc b/src/tree/tree_updater.cc index 66227a78a..a1d4432aa 100644 --- a/src/tree/tree_updater.cc +++ b/src/tree/tree_updater.cc @@ -31,7 +31,7 @@ DMLC_REGISTRY_LINK_TAG(updater_colmaker); DMLC_REGISTRY_LINK_TAG(updater_skmaker); DMLC_REGISTRY_LINK_TAG(updater_refresh); DMLC_REGISTRY_LINK_TAG(updater_prune); -DMLC_REGISTRY_LINK_TAG(updater_fast_hist); +DMLC_REGISTRY_LINK_TAG(updater_quantile_hist); DMLC_REGISTRY_LINK_TAG(updater_histmaker); DMLC_REGISTRY_LINK_TAG(updater_sync); #ifdef XGBOOST_USE_CUDA diff --git a/src/tree/updater_basemaker-inl.h b/src/tree/updater_basemaker-inl.h index e6c021fc9..2397f249d 100644 --- a/src/tree/updater_basemaker-inl.h +++ b/src/tree/updater_basemaker-inl.h @@ -7,6 +7,8 @@ #ifndef XGBOOST_TREE_UPDATER_BASEMAKER_INL_H_ #define XGBOOST_TREE_UPDATER_BASEMAKER_INL_H_ +#include + #include #include #include @@ -14,8 +16,8 @@ #include #include #include + #include "./param.h" -#include "../common/sync.h" #include "../common/io.h" #include "../common/random.h" #include "../common/quantile.h" diff --git a/src/tree/updater_colmaker.cc b/src/tree/updater_colmaker.cc index bda3103b6..0ab671f55 100644 --- a/src/tree/updater_colmaker.cc +++ b/src/tree/updater_colmaker.cc @@ -4,15 +4,16 @@ * \brief use columnwise update to construct a tree * \author Tianqi Chen */ +#include #include #include #include #include #include + #include "./param.h" #include "../common/random.h" #include "../common/bitmap.h" -#include "../common/sync.h" #include "split_evaluator.h" namespace xgboost { diff --git a/src/tree/updater_fast_hist.cc b/src/tree/updater_fast_hist.cc deleted file mode 100644 index dff164714..000000000 --- a/src/tree/updater_fast_hist.cc +++ /dev/null @@ -1,873 +0,0 @@ -/*! - * Copyright 2017 by Contributors - * \file updater_fast_hist.cc - * \brief use quantized feature values to construct a tree - * \author Philip Cho, Tianqi Checn - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "./param.h" -#include "./fast_hist_param.h" -#include "./split_evaluator.h" -#include "../common/random.h" -#include "../common/bitmap.h" -#include "../common/sync.h" -#include "../common/hist_util.h" -#include "../common/row_set.h" -#include "../common/column_matrix.h" - -namespace xgboost { -namespace tree { - -using xgboost::common::HistCutMatrix; -using xgboost::common::GHistIndexMatrix; -using xgboost::common::GHistIndexBlockMatrix; -using xgboost::common::GHistIndexRow; -using xgboost::common::GHistEntry; -using xgboost::common::HistCollection; -using xgboost::common::RowSetCollection; -using xgboost::common::GHistRow; -using xgboost::common::GHistBuilder; -using xgboost::common::ColumnMatrix; -using xgboost::common::Column; - -DMLC_REGISTRY_FILE_TAG(updater_fast_hist); - -DMLC_REGISTER_PARAMETER(FastHistParam); - -/*! \brief construct a tree using quantized feature values */ -class FastHistMaker: public TreeUpdater { - public: - void Init(const std::vector >& args) override { - // initialize pruner - if (!pruner_) { - pruner_.reset(TreeUpdater::Create("prune")); - } - pruner_->Init(args); - param_.InitAllowUnknown(args); - fhparam_.InitAllowUnknown(args); - is_gmat_initialized_ = false; - - // initialise the split evaluator - if (!spliteval_) { - spliteval_.reset(SplitEvaluator::Create(param_.split_evaluator)); - } - - spliteval_->Init(args); - } - - void Update(HostDeviceVector* gpair, - DMatrix* dmat, - const std::vector& trees) override { - GradStats::CheckInfo(dmat->Info()); - if (is_gmat_initialized_ == false) { - double tstart = dmlc::GetTime(); - gmat_.Init(dmat, static_cast(param_.max_bin)); - column_matrix_.Init(gmat_, fhparam_.sparse_threshold); - if (fhparam_.enable_feature_grouping > 0) { - gmatb_.Init(gmat_, column_matrix_, fhparam_); - } - is_gmat_initialized_ = true; - if (param_.debug_verbose > 0) { - LOG(INFO) << "Generating gmat: " << dmlc::GetTime() - tstart << " sec"; - } - } - // rescale learning rate according to size of trees - float lr = param_.learning_rate; - param_.learning_rate = lr / trees.size(); - // build tree - if (!builder_) { - builder_.reset(new Builder( - param_, - fhparam_, - std::move(pruner_), - std::unique_ptr(spliteval_->GetHostClone()))); - } - for (auto tree : trees) { - builder_->Update - (gmat_, gmatb_, column_matrix_, gpair, dmat, tree); - } - param_.learning_rate = lr; - } - - bool UpdatePredictionCache(const DMatrix* data, - HostDeviceVector* out_preds) override { - if (!builder_ || param_.subsample < 1.0f) { - return false; - } else { - return builder_->UpdatePredictionCache(data, out_preds); - } - } - - protected: - // training parameter - TrainParam param_; - FastHistParam fhparam_; - // quantized data matrix - GHistIndexMatrix gmat_; - // (optional) data matrix with feature grouping - GHistIndexBlockMatrix gmatb_; - // column accessor - ColumnMatrix column_matrix_; - bool is_gmat_initialized_; - - // data structure - struct NodeEntry { - /*! \brief statics for node entry */ - GradStats stats; - /*! \brief loss of this node, without split */ - bst_float root_gain; - /*! \brief weight calculated related to current data */ - float weight; - /*! \brief current best solution */ - SplitEntry best; - // constructor - explicit NodeEntry(const TrainParam& param) - : stats(param), root_gain(0.0f), weight(0.0f) { - } - }; - // actual builder that runs the algorithm - - struct Builder { - public: - // constructor - explicit Builder(const TrainParam& param, - const FastHistParam& fhparam, - std::unique_ptr pruner, - std::unique_ptr spliteval) - : param_(param), fhparam_(fhparam), pruner_(std::move(pruner)), - spliteval_(std::move(spliteval)), p_last_tree_(nullptr), - p_last_fmat_(nullptr) {} - // update one tree, growing - virtual void Update(const GHistIndexMatrix& gmat, - const GHistIndexBlockMatrix& gmatb, - const ColumnMatrix& column_matrix, - HostDeviceVector* gpair, - DMatrix* p_fmat, - RegTree* p_tree) { - double gstart = dmlc::GetTime(); - - int num_leaves = 0; - unsigned timestamp = 0; - - double tstart; - double time_init_data = 0; - double time_init_new_node = 0; - double time_build_hist = 0; - double time_evaluate_split = 0; - double time_apply_split = 0; - - const std::vector& gpair_h = gpair->ConstHostVector(); - - spliteval_->Reset(); - - tstart = dmlc::GetTime(); - this->InitData(gmat, gpair_h, *p_fmat, *p_tree); - time_init_data = dmlc::GetTime() - tstart; - - // FIXME(hcho3): this code is broken when param.num_roots > 1. Please fix it - CHECK_EQ(p_tree->param.num_roots, 1) - << "tree_method=hist does not support multiple roots at this moment"; - for (int nid = 0; nid < p_tree->param.num_roots; ++nid) { - tstart = dmlc::GetTime(); - hist_.AddHistRow(nid); - BuildHist(gpair_h, row_set_collection_[nid], gmat, gmatb, hist_[nid]); - time_build_hist += dmlc::GetTime() - tstart; - - tstart = dmlc::GetTime(); - this->InitNewNode(nid, gmat, gpair_h, *p_fmat, *p_tree); - time_init_new_node += dmlc::GetTime() - tstart; - - tstart = dmlc::GetTime(); - this->EvaluateSplit(nid, gmat, hist_, *p_fmat, *p_tree); - time_evaluate_split += dmlc::GetTime() - tstart; - qexpand_->push(ExpandEntry(nid, p_tree->GetDepth(nid), - snode_[nid].best.loss_chg, - timestamp++)); - ++num_leaves; - } - - while (!qexpand_->empty()) { - const ExpandEntry candidate = qexpand_->top(); - const int nid = candidate.nid; - qexpand_->pop(); - if (candidate.loss_chg <= kRtEps - || (param_.max_depth > 0 && candidate.depth == param_.max_depth) - || (param_.max_leaves > 0 && num_leaves == param_.max_leaves) ) { - (*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate); - } else { - tstart = dmlc::GetTime(); - this->ApplySplit(nid, gmat, column_matrix, hist_, *p_fmat, p_tree); - time_apply_split += dmlc::GetTime() - tstart; - - tstart = dmlc::GetTime(); - const int cleft = (*p_tree)[nid].LeftChild(); - const int cright = (*p_tree)[nid].RightChild(); - hist_.AddHistRow(cleft); - hist_.AddHistRow(cright); - if (row_set_collection_[cleft].Size() < row_set_collection_[cright].Size()) { - BuildHist(gpair_h, row_set_collection_[cleft], gmat, gmatb, hist_[cleft]); - SubtractionTrick(hist_[cright], hist_[cleft], hist_[nid]); - } else { - BuildHist(gpair_h, row_set_collection_[cright], gmat, gmatb, hist_[cright]); - SubtractionTrick(hist_[cleft], hist_[cright], hist_[nid]); - } - time_build_hist += dmlc::GetTime() - tstart; - - tstart = dmlc::GetTime(); - this->InitNewNode(cleft, gmat, gpair_h, *p_fmat, *p_tree); - this->InitNewNode(cright, gmat, gpair_h, *p_fmat, *p_tree); - bst_uint featureid = snode_[nid].best.SplitIndex(); - spliteval_->AddSplit(nid, cleft, cright, featureid, - snode_[cleft].weight, snode_[cright].weight); - time_init_new_node += dmlc::GetTime() - tstart; - - tstart = dmlc::GetTime(); - this->EvaluateSplit(cleft, gmat, hist_, *p_fmat, *p_tree); - this->EvaluateSplit(cright, gmat, hist_, *p_fmat, *p_tree); - time_evaluate_split += dmlc::GetTime() - tstart; - - qexpand_->push(ExpandEntry(cleft, p_tree->GetDepth(cleft), - snode_[cleft].best.loss_chg, - timestamp++)); - qexpand_->push(ExpandEntry(cright, p_tree->GetDepth(cright), - snode_[cright].best.loss_chg, - timestamp++)); - - ++num_leaves; // give two and take one, as parent is no longer a leaf - } - } - - // set all the rest expanding nodes to leaf - // This post condition is not needed in current code, but may be necessary - // when there are stopping rule that leaves qexpand non-empty - while (!qexpand_->empty()) { - const int nid = qexpand_->top().nid; - qexpand_->pop(); - (*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate); - } - // remember auxiliary statistics in the tree node - for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) { - p_tree->Stat(nid).loss_chg = snode_[nid].best.loss_chg; - p_tree->Stat(nid).base_weight = snode_[nid].weight; - p_tree->Stat(nid).sum_hess = static_cast(snode_[nid].stats.sum_hess); - snode_[nid].stats.SetLeafVec(param_, p_tree->Leafvec(nid)); - } - - pruner_->Update(gpair, p_fmat, std::vector{p_tree}); - - if (param_.debug_verbose > 0) { - double total_time = dmlc::GetTime() - gstart; - LOG(INFO) << "\nInitData: " - << std::fixed << std::setw(6) << std::setprecision(4) << time_init_data - << " (" << std::fixed << std::setw(5) << std::setprecision(2) - << time_init_data / total_time * 100 << "%)\n" - << "InitNewNode: " - << std::fixed << std::setw(6) << std::setprecision(4) << time_init_new_node - << " (" << std::fixed << std::setw(5) << std::setprecision(2) - << time_init_new_node / total_time * 100 << "%)\n" - << "BuildHist: " - << std::fixed << std::setw(6) << std::setprecision(4) << time_build_hist - << " (" << std::fixed << std::setw(5) << std::setprecision(2) - << time_build_hist / total_time * 100 << "%)\n" - << "EvaluateSplit: " - << std::fixed << std::setw(6) << std::setprecision(4) << time_evaluate_split - << " (" << std::fixed << std::setw(5) << std::setprecision(2) - << time_evaluate_split / total_time * 100 << "%)\n" - << "ApplySplit: " - << std::fixed << std::setw(6) << std::setprecision(4) << time_apply_split - << " (" << std::fixed << std::setw(5) << std::setprecision(2) - << time_apply_split / total_time * 100 << "%)\n" - << "========================================\n" - << "Total: " - << std::fixed << std::setw(6) << std::setprecision(4) << total_time; - } - } - - inline void BuildHist(const std::vector& gpair, - const RowSetCollection::Elem row_indices, - const GHistIndexMatrix& gmat, - const GHistIndexBlockMatrix& gmatb, - GHistRow hist) { - if (fhparam_.enable_feature_grouping > 0) { - hist_builder_.BuildBlockHist(gpair, row_indices, gmatb, hist); - } else { - hist_builder_.BuildHist(gpair, row_indices, gmat, hist); - } - } - - inline void SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow parent) { - hist_builder_.SubtractionTrick(self, sibling, parent); - } - - inline bool UpdatePredictionCache(const DMatrix* data, - HostDeviceVector* p_out_preds) { - std::vector& out_preds = p_out_preds->HostVector(); - - // p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in - // conjunction with Update(). - if (!p_last_fmat_ || !p_last_tree_ || data != p_last_fmat_) { - return false; - } - - if (leaf_value_cache_.empty()) { - leaf_value_cache_.resize(p_last_tree_->param.num_nodes, - std::numeric_limits::infinity()); - } - - CHECK_GT(out_preds.size(), 0U); - - for (const RowSetCollection::Elem rowset : row_set_collection_) { - if (rowset.begin != nullptr && rowset.end != nullptr) { - int nid = rowset.node_id; - bst_float leaf_value; - // if a node is marked as deleted by the pruner, traverse upward to locate - // a non-deleted leaf. - if ((*p_last_tree_)[nid].IsDeleted()) { - while ((*p_last_tree_)[nid].IsDeleted()) { - nid = (*p_last_tree_)[nid].Parent(); - } - CHECK((*p_last_tree_)[nid].IsLeaf()); - } - leaf_value = (*p_last_tree_)[nid].LeafValue(); - - for (const size_t* it = rowset.begin; it < rowset.end; ++it) { - out_preds[*it] += leaf_value; - } - } - } - - return true; - } - - protected: - // initialize temp data structure - inline void InitData(const GHistIndexMatrix& gmat, - const std::vector& gpair, - const DMatrix& fmat, - const RegTree& tree) { - CHECK_EQ(tree.param.num_nodes, tree.param.num_roots) - << "ColMakerHist: can only grow new tree"; - CHECK((param_.max_depth > 0 || param_.max_leaves > 0)) - << "max_depth or max_leaves cannot be both 0 (unlimited); " - << "at least one should be a positive quantity."; - if (param_.grow_policy == TrainParam::kDepthWise) { - CHECK(param_.max_depth > 0) << "max_depth cannot be 0 (unlimited) " - << "when grow_policy is depthwise."; - } - const auto& info = fmat.Info(); - - { - // initialize the row set - row_set_collection_.Clear(); - // clear local prediction cache - leaf_value_cache_.clear(); - // initialize histogram collection - uint32_t nbins = gmat.cut.row_ptr.back(); - hist_.Init(nbins); - - // initialize histogram builder - #pragma omp parallel - { - this->nthread_ = omp_get_num_threads(); - } - hist_builder_.Init(this->nthread_, nbins); - - CHECK_EQ(info.root_index_.size(), 0U); - std::vector& row_indices = row_set_collection_.row_indices_; - // mark subsample and build list of member rows - if (param_.subsample < 1.0f) { - std::bernoulli_distribution coin_flip(param_.subsample); - auto& rnd = common::GlobalRandom(); - for (size_t i = 0; i < info.num_row_; ++i) { - if (gpair[i].GetHess() >= 0.0f && coin_flip(rnd)) { - row_indices.push_back(i); - } - } - } else { - for (size_t i = 0; i < info.num_row_; ++i) { - if (gpair[i].GetHess() >= 0.0f) { - row_indices.push_back(i); - } - } - } - row_set_collection_.Init(); - } - - { - /* determine layout of data */ - const size_t nrow = info.num_row_; - const size_t ncol = info.num_col_; - const size_t nnz = info.num_nonzero_; - // number of discrete bins for feature 0 - const uint32_t nbins_f0 = gmat.cut.row_ptr[1] - gmat.cut.row_ptr[0]; - if (nrow * ncol == nnz) { - // dense data with zero-based indexing - data_layout_ = kDenseDataZeroBased; - } else if (nbins_f0 == 0 && nrow * (ncol - 1) == nnz) { - // dense data with one-based indexing - data_layout_ = kDenseDataOneBased; - } else { - // sparse data - data_layout_ = kSparseData; - } - } - { - // store a pointer to the tree - p_last_tree_ = &tree; - // store a pointer to training data - p_last_fmat_ = &fmat; - // initialize feature index - if (data_layout_ == kDenseDataOneBased) { - column_sampler_.Init(info.num_col_, param_.colsample_bylevel, - param_.colsample_bytree, true); - } else { - column_sampler_.Init(info.num_col_, param_.colsample_bylevel, - param_.colsample_bytree, false); - } - } - if (data_layout_ == kDenseDataZeroBased || data_layout_ == kDenseDataOneBased) { - /* specialized code for dense data: - choose the column that has a least positive number of discrete bins. - For dense data (with no missing value), - the sum of gradient histogram is equal to snode[nid] */ - const std::vector& row_ptr = gmat.cut.row_ptr; - const auto nfeature = static_cast(row_ptr.size() - 1); - uint32_t min_nbins_per_feature = 0; - for (bst_uint i = 0; i < nfeature; ++i) { - const uint32_t nbins = row_ptr[i + 1] - row_ptr[i]; - if (nbins > 0) { - if (min_nbins_per_feature == 0 || min_nbins_per_feature > nbins) { - min_nbins_per_feature = nbins; - fid_least_bins_ = i; - } - } - } - CHECK_GT(min_nbins_per_feature, 0U); - } - { - snode_.reserve(256); - snode_.clear(); - } - { - if (param_.grow_policy == TrainParam::kLossGuide) { - qexpand_.reset(new ExpandQueue(LossGuide)); - } else { - qexpand_.reset(new ExpandQueue(DepthWise)); - } - } - } - - inline void EvaluateSplit(int nid, - const GHistIndexMatrix& gmat, - const HistCollection& hist, - const DMatrix& fmat, - const RegTree& tree) { - // start enumeration - const MetaInfo& info = fmat.Info(); - const auto& feature_set = column_sampler_.GetFeatureSet(tree.GetDepth(nid)).HostVector(); - const auto nfeature = static_cast(feature_set.size()); - const auto nthread = static_cast(this->nthread_); - best_split_tloc_.resize(nthread); - #pragma omp parallel for schedule(static) num_threads(nthread) - for (bst_omp_uint tid = 0; tid < nthread; ++tid) { - best_split_tloc_[tid] = snode_[nid].best; - } - #pragma omp parallel for schedule(dynamic) num_threads(nthread) - for (bst_omp_uint i = 0; i < nfeature; ++i) { - const bst_uint fid = feature_set[i]; - const unsigned tid = omp_get_thread_num(); - this->EnumerateSplit(-1, gmat, hist[nid], snode_[nid], info, - &best_split_tloc_[tid], fid, nid); - this->EnumerateSplit(+1, gmat, hist[nid], snode_[nid], info, - &best_split_tloc_[tid], fid, nid); - } - for (unsigned tid = 0; tid < nthread; ++tid) { - snode_[nid].best.Update(best_split_tloc_[tid]); - } - } - - inline void ApplySplit(int nid, - const GHistIndexMatrix& gmat, - const ColumnMatrix& column_matrix, - const HistCollection& hist, - const DMatrix& fmat, - RegTree* p_tree) { - // TODO(hcho3): support feature sampling by levels - - /* 1. Create child nodes */ - NodeEntry& e = snode_[nid]; - - p_tree->AddChilds(nid); - (*p_tree)[nid].SetSplit(e.best.SplitIndex(), e.best.split_value, e.best.DefaultLeft()); - // mark right child as 0, to indicate fresh leaf - int cleft = (*p_tree)[nid].LeftChild(); - int cright = (*p_tree)[nid].RightChild(); - (*p_tree)[cleft].SetLeaf(0.0f, 0); - (*p_tree)[cright].SetLeaf(0.0f, 0); - - /* 2. Categorize member rows */ - const auto nthread = static_cast(this->nthread_); - row_split_tloc_.resize(nthread); - for (bst_omp_uint i = 0; i < nthread; ++i) { - row_split_tloc_[i].left.clear(); - row_split_tloc_[i].right.clear(); - } - const bool default_left = (*p_tree)[nid].DefaultLeft(); - const bst_uint fid = (*p_tree)[nid].SplitIndex(); - const bst_float split_pt = (*p_tree)[nid].SplitCond(); - const uint32_t lower_bound = gmat.cut.row_ptr[fid]; - const uint32_t upper_bound = gmat.cut.row_ptr[fid + 1]; - int32_t split_cond = -1; - // convert floating-point split_pt into corresponding bin_id - // split_cond = -1 indicates that split_pt is less than all known cut points - CHECK_LT(upper_bound, - static_cast(std::numeric_limits::max())); - for (uint32_t i = lower_bound; i < upper_bound; ++i) { - if (split_pt == gmat.cut.cut[i]) { - split_cond = static_cast(i); - } - } - - const auto& rowset = row_set_collection_[nid]; - - Column column = column_matrix.GetColumn(fid); - if (column.GetType() == xgboost::common::kDenseColumn) { - ApplySplitDenseData(rowset, gmat, &row_split_tloc_, column, split_cond, - default_left); - } else { - ApplySplitSparseData(rowset, gmat, &row_split_tloc_, column, lower_bound, - upper_bound, split_cond, default_left); - } - - row_set_collection_.AddSplit( - nid, row_split_tloc_, (*p_tree)[nid].LeftChild(), (*p_tree)[nid].RightChild()); - } - - inline void ApplySplitDenseData(const RowSetCollection::Elem rowset, - const GHistIndexMatrix& gmat, - std::vector* p_row_split_tloc, - const Column& column, - bst_int split_cond, - bool default_left) { - std::vector& row_split_tloc = *p_row_split_tloc; - constexpr int kUnroll = 8; // loop unrolling factor - const size_t nrows = rowset.end - rowset.begin; - const size_t rest = nrows % kUnroll; - - #pragma omp parallel for num_threads(nthread_) schedule(static) - for (bst_omp_uint i = 0; i < nrows - rest; i += kUnroll) { - const bst_uint tid = omp_get_thread_num(); - auto& left = row_split_tloc[tid].left; - auto& right = row_split_tloc[tid].right; - size_t rid[kUnroll]; - uint32_t rbin[kUnroll]; - for (int k = 0; k < kUnroll; ++k) { - rid[k] = rowset.begin[i + k]; - } - for (int k = 0; k < kUnroll; ++k) { - rbin[k] = column.GetFeatureBinIdx(rid[k]); - } - for (int k = 0; k < kUnroll; ++k) { // NOLINT - if (rbin[k] == std::numeric_limits::max()) { // missing value - if (default_left) { - left.push_back(rid[k]); - } else { - right.push_back(rid[k]); - } - } else { - if (static_cast(rbin[k] + column.GetBaseIdx()) <= split_cond) { - left.push_back(rid[k]); - } else { - right.push_back(rid[k]); - } - } - } - } - for (size_t i = nrows - rest; i < nrows; ++i) { - auto& left = row_split_tloc[nthread_-1].left; - auto& right = row_split_tloc[nthread_-1].right; - const size_t rid = rowset.begin[i]; - const uint32_t rbin = column.GetFeatureBinIdx(rid); - if (rbin == std::numeric_limits::max()) { // missing value - if (default_left) { - left.push_back(rid); - } else { - right.push_back(rid); - } - } else { - if (static_cast(rbin + column.GetBaseIdx()) <= split_cond) { - left.push_back(rid); - } else { - right.push_back(rid); - } - } - } - } - - inline void ApplySplitSparseData(const RowSetCollection::Elem rowset, - const GHistIndexMatrix& gmat, - std::vector* p_row_split_tloc, - const Column& column, - bst_uint lower_bound, - bst_uint upper_bound, - bst_int split_cond, - bool default_left) { - std::vector& row_split_tloc = *p_row_split_tloc; - const size_t nrows = rowset.end - rowset.begin; - - #pragma omp parallel num_threads(nthread_) - { - const auto tid = static_cast(omp_get_thread_num()); - const size_t ibegin = tid * nrows / nthread_; - const size_t iend = (tid + 1) * nrows / nthread_; - if (ibegin < iend) { // ensure that [ibegin, iend) is nonempty range - // search first nonzero row with index >= rowset[ibegin] - const size_t* p = std::lower_bound(column.GetRowData(), - column.GetRowData() + column.Size(), - rowset.begin[ibegin]); - - auto& left = row_split_tloc[tid].left; - auto& right = row_split_tloc[tid].right; - if (p != column.GetRowData() + column.Size() && *p <= rowset.begin[iend - 1]) { - size_t cursor = p - column.GetRowData(); - - for (size_t i = ibegin; i < iend; ++i) { - const size_t rid = rowset.begin[i]; - while (cursor < column.Size() - && column.GetRowIdx(cursor) < rid - && column.GetRowIdx(cursor) <= rowset.begin[iend - 1]) { - ++cursor; - } - if (cursor < column.Size() && column.GetRowIdx(cursor) == rid) { - const uint32_t rbin = column.GetFeatureBinIdx(cursor); - if (static_cast(rbin + column.GetBaseIdx()) <= split_cond) { - left.push_back(rid); - } else { - right.push_back(rid); - } - ++cursor; - } else { - // missing value - if (default_left) { - left.push_back(rid); - } else { - right.push_back(rid); - } - } - } - } else { // all rows in [ibegin, iend) have missing values - if (default_left) { - for (size_t i = ibegin; i < iend; ++i) { - const size_t rid = rowset.begin[i]; - left.push_back(rid); - } - } else { - for (size_t i = ibegin; i < iend; ++i) { - const size_t rid = rowset.begin[i]; - right.push_back(rid); - } - } - } - } - } - } - - inline void InitNewNode(int nid, - const GHistIndexMatrix& gmat, - const std::vector& gpair, - const DMatrix& fmat, - const RegTree& tree) { - { - snode_.resize(tree.param.num_nodes, NodeEntry(param_)); - } - - { - auto& stats = snode_[nid].stats; - if (data_layout_ == kDenseDataZeroBased || data_layout_ == kDenseDataOneBased) { - /* specialized code for dense data - For dense data (with no missing value), - the sum of gradient histogram is equal to snode[nid] */ - GHistRow hist = hist_[nid]; - const std::vector& row_ptr = gmat.cut.row_ptr; - - const uint32_t ibegin = row_ptr[fid_least_bins_]; - const uint32_t iend = row_ptr[fid_least_bins_ + 1]; - for (uint32_t i = ibegin; i < iend; ++i) { - const GHistEntry et = hist.begin[i]; - stats.Add(et.sum_grad, et.sum_hess); - } - } else { - const RowSetCollection::Elem e = row_set_collection_[nid]; - for (const size_t* it = e.begin; it < e.end; ++it) { - stats.Add(gpair[*it]); - } - } - } - - // calculating the weights - { - bst_uint parentid = tree[nid].Parent(); - snode_[nid].weight = static_cast( - spliteval_->ComputeWeight(parentid, snode_[nid].stats)); - snode_[nid].root_gain = static_cast( - spliteval_->ComputeScore(parentid, snode_[nid].stats, snode_[nid].weight)); - } - } - - // enumerate the split values of specific feature - inline void EnumerateSplit(int d_step, - const GHistIndexMatrix& gmat, - const GHistRow& hist, - const NodeEntry& snode, - const MetaInfo& info, - SplitEntry* p_best, - bst_uint fid, - bst_uint nodeID) { - CHECK(d_step == +1 || d_step == -1); - - // aliases - const std::vector& cut_ptr = gmat.cut.row_ptr; - const std::vector& cut_val = gmat.cut.cut; - - // statistics on both sides of split - GradStats c(param_); - GradStats e(param_); - // best split so far - SplitEntry best; - - // bin boundaries - CHECK_LE(cut_ptr[fid], - static_cast(std::numeric_limits::max())); - CHECK_LE(cut_ptr[fid + 1], - static_cast(std::numeric_limits::max())); - // imin: index (offset) of the minimum value for feature fid - // need this for backward enumeration - const auto imin = static_cast(cut_ptr[fid]); - // ibegin, iend: smallest/largest cut points for feature fid - // use int to allow for value -1 - int32_t ibegin, iend; - if (d_step > 0) { - ibegin = static_cast(cut_ptr[fid]); - iend = static_cast(cut_ptr[fid + 1]); - } else { - ibegin = static_cast(cut_ptr[fid + 1]) - 1; - iend = static_cast(cut_ptr[fid]) - 1; - } - - for (int32_t i = ibegin; i != iend; i += d_step) { - // start working - // try to find a split - e.Add(hist.begin[i].sum_grad, hist.begin[i].sum_hess); - if (e.sum_hess >= param_.min_child_weight) { - c.SetSubstract(snode.stats, e); - if (c.sum_hess >= param_.min_child_weight) { - bst_float loss_chg; - bst_float split_pt; - if (d_step > 0) { - // forward enumeration: split at right bound of each bin - loss_chg = static_cast( - spliteval_->ComputeSplitScore(nodeID, fid, e, c) - - snode.root_gain); - split_pt = cut_val[i]; - } else { - // backward enumeration: split at left bound of each bin - loss_chg = static_cast( - spliteval_->ComputeSplitScore(nodeID, fid, c, e) - - snode.root_gain); - if (i == imin) { - // for leftmost bin, left bound is the smallest feature value - split_pt = gmat.cut.min_val[fid]; - } else { - split_pt = cut_val[i - 1]; - } - } - best.Update(loss_chg, fid, split_pt, d_step == -1); - } - } - } - p_best->Update(best); - } - - /* tree growing policies */ - struct ExpandEntry { - int nid; - int depth; - bst_float loss_chg; - unsigned timestamp; - ExpandEntry(int nid, int depth, bst_float loss_chg, unsigned tstmp) - : nid(nid), depth(depth), loss_chg(loss_chg), timestamp(tstmp) {} - }; - inline static bool DepthWise(ExpandEntry lhs, ExpandEntry rhs) { - if (lhs.depth == rhs.depth) { - return lhs.timestamp > rhs.timestamp; // favor small timestamp - } else { - return lhs.depth > rhs.depth; // favor small depth - } - } - inline static bool LossGuide(ExpandEntry lhs, ExpandEntry rhs) { - if (lhs.loss_chg == rhs.loss_chg) { - return lhs.timestamp > rhs.timestamp; // favor small timestamp - } else { - return lhs.loss_chg < rhs.loss_chg; // favor large loss_chg - } - } - - // --data fields-- - const TrainParam& param_; - const FastHistParam& fhparam_; - // number of omp thread used during training - int nthread_; - common::ColumnSampler column_sampler_; - // the internal row sets - RowSetCollection row_set_collection_; - // the temp space for split - std::vector row_split_tloc_; - std::vector best_split_tloc_; - /*! \brief TreeNode Data: statistics for each constructed node */ - std::vector snode_; - /*! \brief culmulative histogram of gradients. */ - HistCollection hist_; - /*! \brief feature with least # of bins. to be used for dense specialization - of InitNewNode() */ - uint32_t fid_least_bins_; - /*! \brief local prediction cache; maps node id to leaf value */ - std::vector leaf_value_cache_; - - GHistBuilder hist_builder_; - std::unique_ptr pruner_; - std::unique_ptr spliteval_; - - // back pointers to tree and data matrix - const RegTree* p_last_tree_; - const DMatrix* p_last_fmat_; - - using ExpandQueue = - std::priority_queue, - std::function>; - std::unique_ptr qexpand_; - - enum DataLayout { kDenseDataZeroBased, kDenseDataOneBased, kSparseData }; - DataLayout data_layout_; - }; - - std::unique_ptr builder_; - std::unique_ptr pruner_; - std::unique_ptr spliteval_; -}; - -XGBOOST_REGISTER_TREE_UPDATER(FastHistMaker, "grow_fast_histmaker") -.describe("Grow tree using quantized histogram.") -.set_body([]() { - return new FastHistMaker(); - }); - -} // namespace tree -} // namespace xgboost diff --git a/src/tree/updater_histmaker.cc b/src/tree/updater_histmaker.cc index 0beb623e2..729324b81 100644 --- a/src/tree/updater_histmaker.cc +++ b/src/tree/updater_histmaker.cc @@ -4,11 +4,12 @@ * \brief use histogram counting to construct a tree * \author Tianqi Chen */ +#include #include #include #include #include -#include "../common/sync.h" + #include "../common/quantile.h" #include "../common/group_data.h" #include "./updater_basemaker-inl.h" diff --git a/src/tree/updater_prune.cc b/src/tree/updater_prune.cc index 3710cd28b..b36e5d153 100644 --- a/src/tree/updater_prune.cc +++ b/src/tree/updater_prune.cc @@ -4,12 +4,13 @@ * \brief prune a tree given the statistics * \author Tianqi Chen */ - +#include #include + #include #include + #include "./param.h" -#include "../common/sync.h" #include "../common/io.h" namespace xgboost { diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc new file mode 100644 index 000000000..6a5dde326 --- /dev/null +++ b/src/tree/updater_quantile_hist.cc @@ -0,0 +1,748 @@ +/*! + * Copyright 2017-2018 by Contributors + * \file updater_quantile_hist.cc + * \brief use quantized feature values to construct a tree + * \author Philip Cho, Tianqi Checn + */ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "./param.h" +#include "./updater_quantile_hist.h" +#include "./split_evaluator.h" +#include "../common/random.h" +#include "../common/hist_util.h" +#include "../common/row_set.h" +#include "../common/column_matrix.h" + +namespace xgboost { +namespace tree { + +DMLC_REGISTRY_FILE_TAG(updater_quantile_hist); + +void QuantileHistMaker::Init(const std::vector >& args) { + // initialize pruner + if (!pruner_) { + pruner_.reset(TreeUpdater::Create("prune")); + } + pruner_->Init(args); + param_.InitAllowUnknown(args); + is_gmat_initialized_ = false; + + // initialise the split evaluator + if (!spliteval_) { + spliteval_.reset(SplitEvaluator::Create(param_.split_evaluator)); + } + + spliteval_->Init(args); +} + +void QuantileHistMaker::Update(HostDeviceVector *gpair, + DMatrix *dmat, + const std::vector &trees) { + GradStats::CheckInfo(dmat->Info()); + if (is_gmat_initialized_ == false) { + double tstart = dmlc::GetTime(); + gmat_.Init(dmat, static_cast(param_.max_bin)); + column_matrix_.Init(gmat_, param_.sparse_threshold); + if (param_.enable_feature_grouping > 0) { + gmatb_.Init(gmat_, column_matrix_, param_); + } + is_gmat_initialized_ = true; + if (param_.debug_verbose > 0) { + LOG(INFO) << "Generating gmat: " << dmlc::GetTime() - tstart << " sec"; + } + } + // rescale learning rate according to size of trees + float lr = param_.learning_rate; + param_.learning_rate = lr / trees.size(); + // build tree + if (!builder_) { + builder_.reset(new Builder( + param_, + std::move(pruner_), + std::unique_ptr(spliteval_->GetHostClone()))); + } + for (auto tree : trees) { + builder_->Update + (gmat_, gmatb_, column_matrix_, gpair, dmat, tree); + } + param_.learning_rate = lr; +} + +bool QuantileHistMaker::UpdatePredictionCache( + const DMatrix* data, + HostDeviceVector* out_preds) { + if (!builder_ || param_.subsample < 1.0f) { + return false; + } else { + return builder_->UpdatePredictionCache(data, out_preds); + } +} + +void QuantileHistMaker::Builder::Update(const GHistIndexMatrix& gmat, + const GHistIndexBlockMatrix& gmatb, + const ColumnMatrix& column_matrix, + HostDeviceVector* gpair, + DMatrix* p_fmat, + RegTree* p_tree) { + double gstart = dmlc::GetTime(); + + int num_leaves = 0; + unsigned timestamp = 0; + + double tstart; + double time_init_data = 0; + double time_init_new_node = 0; + double time_build_hist = 0; + double time_evaluate_split = 0; + double time_apply_split = 0; + + const std::vector& gpair_h = gpair->ConstHostVector(); + + spliteval_->Reset(); + + tstart = dmlc::GetTime(); + this->InitData(gmat, gpair_h, *p_fmat, *p_tree); + time_init_data = dmlc::GetTime() - tstart; + + // FIXME(hcho3): this code is broken when param.num_roots > 1. Please fix it + CHECK_EQ(p_tree->param.num_roots, 1) + << "tree_method=hist does not support multiple roots at this moment"; + for (int nid = 0; nid < p_tree->param.num_roots; ++nid) { + tstart = dmlc::GetTime(); + hist_.AddHistRow(nid); + BuildHist(gpair_h, row_set_collection_[nid], gmat, gmatb, hist_[nid]); + time_build_hist += dmlc::GetTime() - tstart; + + tstart = dmlc::GetTime(); + this->InitNewNode(nid, gmat, gpair_h, *p_fmat, *p_tree); + time_init_new_node += dmlc::GetTime() - tstart; + + tstart = dmlc::GetTime(); + this->EvaluateSplit(nid, gmat, hist_, *p_fmat, *p_tree); + time_evaluate_split += dmlc::GetTime() - tstart; + qexpand_->push(ExpandEntry(nid, p_tree->GetDepth(nid), + snode_[nid].best.loss_chg, + timestamp++)); + ++num_leaves; + } + + while (!qexpand_->empty()) { + const ExpandEntry candidate = qexpand_->top(); + const int nid = candidate.nid; + qexpand_->pop(); + if (candidate.loss_chg <= kRtEps + || (param_.max_depth > 0 && candidate.depth == param_.max_depth) + || (param_.max_leaves > 0 && num_leaves == param_.max_leaves) ) { + (*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate); + } else { + tstart = dmlc::GetTime(); + this->ApplySplit(nid, gmat, column_matrix, hist_, *p_fmat, p_tree); + time_apply_split += dmlc::GetTime() - tstart; + + tstart = dmlc::GetTime(); + const int cleft = (*p_tree)[nid].LeftChild(); + const int cright = (*p_tree)[nid].RightChild(); + hist_.AddHistRow(cleft); + hist_.AddHistRow(cright); + if (row_set_collection_[cleft].Size() < row_set_collection_[cright].Size()) { + BuildHist(gpair_h, row_set_collection_[cleft], gmat, gmatb, hist_[cleft]); + SubtractionTrick(hist_[cright], hist_[cleft], hist_[nid]); + } else { + BuildHist(gpair_h, row_set_collection_[cright], gmat, gmatb, hist_[cright]); + SubtractionTrick(hist_[cleft], hist_[cright], hist_[nid]); + } + time_build_hist += dmlc::GetTime() - tstart; + + tstart = dmlc::GetTime(); + this->InitNewNode(cleft, gmat, gpair_h, *p_fmat, *p_tree); + this->InitNewNode(cright, gmat, gpair_h, *p_fmat, *p_tree); + bst_uint featureid = snode_[nid].best.SplitIndex(); + spliteval_->AddSplit(nid, cleft, cright, featureid, + snode_[cleft].weight, snode_[cright].weight); + time_init_new_node += dmlc::GetTime() - tstart; + + tstart = dmlc::GetTime(); + this->EvaluateSplit(cleft, gmat, hist_, *p_fmat, *p_tree); + this->EvaluateSplit(cright, gmat, hist_, *p_fmat, *p_tree); + time_evaluate_split += dmlc::GetTime() - tstart; + + qexpand_->push(ExpandEntry(cleft, p_tree->GetDepth(cleft), + snode_[cleft].best.loss_chg, + timestamp++)); + qexpand_->push(ExpandEntry(cright, p_tree->GetDepth(cright), + snode_[cright].best.loss_chg, + timestamp++)); + + ++num_leaves; // give two and take one, as parent is no longer a leaf + } + } + + // set all the rest expanding nodes to leaf + // This post condition is not needed in current code, but may be necessary + // when there are stopping rule that leaves qexpand non-empty + while (!qexpand_->empty()) { + const int nid = qexpand_->top().nid; + qexpand_->pop(); + (*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate); + } + // remember auxiliary statistics in the tree node + for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) { + p_tree->Stat(nid).loss_chg = snode_[nid].best.loss_chg; + p_tree->Stat(nid).base_weight = snode_[nid].weight; + p_tree->Stat(nid).sum_hess = static_cast(snode_[nid].stats.sum_hess); + snode_[nid].stats.SetLeafVec(param_, p_tree->Leafvec(nid)); + } + + pruner_->Update(gpair, p_fmat, std::vector{p_tree}); + + if (param_.debug_verbose > 0) { + double total_time = dmlc::GetTime() - gstart; + LOG(INFO) << "\nInitData: " + << std::fixed << std::setw(6) << std::setprecision(4) << time_init_data + << " (" << std::fixed << std::setw(5) << std::setprecision(2) + << time_init_data / total_time * 100 << "%)\n" + << "InitNewNode: " + << std::fixed << std::setw(6) << std::setprecision(4) << time_init_new_node + << " (" << std::fixed << std::setw(5) << std::setprecision(2) + << time_init_new_node / total_time * 100 << "%)\n" + << "BuildHist: " + << std::fixed << std::setw(6) << std::setprecision(4) << time_build_hist + << " (" << std::fixed << std::setw(5) << std::setprecision(2) + << time_build_hist / total_time * 100 << "%)\n" + << "EvaluateSplit: " + << std::fixed << std::setw(6) << std::setprecision(4) << time_evaluate_split + << " (" << std::fixed << std::setw(5) << std::setprecision(2) + << time_evaluate_split / total_time * 100 << "%)\n" + << "ApplySplit: " + << std::fixed << std::setw(6) << std::setprecision(4) << time_apply_split + << " (" << std::fixed << std::setw(5) << std::setprecision(2) + << time_apply_split / total_time * 100 << "%)\n" + << "========================================\n" + << "Total: " + << std::fixed << std::setw(6) << std::setprecision(4) << total_time; + } +} + +bool QuantileHistMaker::Builder::UpdatePredictionCache( + const DMatrix* data, + HostDeviceVector* p_out_preds) { + std::vector& out_preds = p_out_preds->HostVector(); + + // p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in + // conjunction with Update(). + if (!p_last_fmat_ || !p_last_tree_ || data != p_last_fmat_) { + return false; + } + + if (leaf_value_cache_.empty()) { + leaf_value_cache_.resize(p_last_tree_->param.num_nodes, + std::numeric_limits::infinity()); + } + + CHECK_GT(out_preds.size(), 0U); + + for (const RowSetCollection::Elem rowset : row_set_collection_) { + if (rowset.begin != nullptr && rowset.end != nullptr) { + int nid = rowset.node_id; + bst_float leaf_value; + // if a node is marked as deleted by the pruner, traverse upward to locate + // a non-deleted leaf. + if ((*p_last_tree_)[nid].IsDeleted()) { + while ((*p_last_tree_)[nid].IsDeleted()) { + nid = (*p_last_tree_)[nid].Parent(); + } + CHECK((*p_last_tree_)[nid].IsLeaf()); + } + leaf_value = (*p_last_tree_)[nid].LeafValue(); + + for (const size_t* it = rowset.begin; it < rowset.end; ++it) { + out_preds[*it] += leaf_value; + } + } + } + + return true; +} + +void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat, + const std::vector& gpair, + const DMatrix& fmat, + const RegTree& tree) { + CHECK_EQ(tree.param.num_nodes, tree.param.num_roots) + << "ColMakerHist: can only grow new tree"; + CHECK((param_.max_depth > 0 || param_.max_leaves > 0)) + << "max_depth or max_leaves cannot be both 0 (unlimited); " + << "at least one should be a positive quantity."; + if (param_.grow_policy == TrainParam::kDepthWise) { + CHECK(param_.max_depth > 0) << "max_depth cannot be 0 (unlimited) " + << "when grow_policy is depthwise."; + } + const auto& info = fmat.Info(); + + { + // initialize the row set + row_set_collection_.Clear(); + // clear local prediction cache + leaf_value_cache_.clear(); + // initialize histogram collection + uint32_t nbins = gmat.cut.row_ptr.back(); + hist_.Init(nbins); + + // initialize histogram builder +#pragma omp parallel + { + this->nthread_ = omp_get_num_threads(); + } + hist_builder_.Init(this->nthread_, nbins); + + CHECK_EQ(info.root_index_.size(), 0U); + std::vector& row_indices = row_set_collection_.row_indices_; + // mark subsample and build list of member rows + if (param_.subsample < 1.0f) { + std::bernoulli_distribution coin_flip(param_.subsample); + auto& rnd = common::GlobalRandom(); + for (size_t i = 0; i < info.num_row_; ++i) { + if (gpair[i].GetHess() >= 0.0f && coin_flip(rnd)) { + row_indices.push_back(i); + } + } + } else { + for (size_t i = 0; i < info.num_row_; ++i) { + if (gpair[i].GetHess() >= 0.0f) { + row_indices.push_back(i); + } + } + } + row_set_collection_.Init(); + } + + { + /* determine layout of data */ + const size_t nrow = info.num_row_; + const size_t ncol = info.num_col_; + const size_t nnz = info.num_nonzero_; + // number of discrete bins for feature 0 + const uint32_t nbins_f0 = gmat.cut.row_ptr[1] - gmat.cut.row_ptr[0]; + if (nrow * ncol == nnz) { + // dense data with zero-based indexing + data_layout_ = kDenseDataZeroBased; + } else if (nbins_f0 == 0 && nrow * (ncol - 1) == nnz) { + // dense data with one-based indexing + data_layout_ = kDenseDataOneBased; + } else { + // sparse data + data_layout_ = kSparseData; + } + } + { + // store a pointer to the tree + p_last_tree_ = &tree; + // store a pointer to training data + p_last_fmat_ = &fmat; + // initialize feature index + if (data_layout_ == kDenseDataOneBased) { + column_sampler_.Init(info.num_col_, param_.colsample_bylevel, + param_.colsample_bytree, true); + } else { + column_sampler_.Init(info.num_col_, param_.colsample_bylevel, + param_.colsample_bytree, false); + } + } + if (data_layout_ == kDenseDataZeroBased || data_layout_ == kDenseDataOneBased) { + /* specialized code for dense data: + choose the column that has a least positive number of discrete bins. + For dense data (with no missing value), + the sum of gradient histogram is equal to snode[nid] */ + const std::vector& row_ptr = gmat.cut.row_ptr; + const auto nfeature = static_cast(row_ptr.size() - 1); + uint32_t min_nbins_per_feature = 0; + for (bst_uint i = 0; i < nfeature; ++i) { + const uint32_t nbins = row_ptr[i + 1] - row_ptr[i]; + if (nbins > 0) { + if (min_nbins_per_feature == 0 || min_nbins_per_feature > nbins) { + min_nbins_per_feature = nbins; + fid_least_bins_ = i; + } + } + } + CHECK_GT(min_nbins_per_feature, 0U); + } + { + snode_.reserve(256); + snode_.clear(); + } + { + if (param_.grow_policy == TrainParam::kLossGuide) { + qexpand_.reset(new ExpandQueue(LossGuide)); + } else { + qexpand_.reset(new ExpandQueue(DepthWise)); + } + } +} + +void QuantileHistMaker::Builder::EvaluateSplit(int nid, + const GHistIndexMatrix& gmat, + const HistCollection& hist, + const DMatrix& fmat, + const RegTree& tree) { + // start enumeration + const MetaInfo& info = fmat.Info(); + const auto& feature_set = column_sampler_.GetFeatureSet( + tree.GetDepth(nid)).HostVector(); + const auto nfeature = static_cast(feature_set.size()); + const auto nthread = static_cast(this->nthread_); + best_split_tloc_.resize(nthread); +#pragma omp parallel for schedule(static) num_threads(nthread) + for (bst_omp_uint tid = 0; tid < nthread; ++tid) { + best_split_tloc_[tid] = snode_[nid].best; + } +#pragma omp parallel for schedule(dynamic) num_threads(nthread) + for (bst_omp_uint i = 0; i < nfeature; ++i) { + const bst_uint fid = feature_set[i]; + const unsigned tid = omp_get_thread_num(); + this->EnumerateSplit(-1, gmat, hist[nid], snode_[nid], info, + &best_split_tloc_[tid], fid, nid); + this->EnumerateSplit(+1, gmat, hist[nid], snode_[nid], info, + &best_split_tloc_[tid], fid, nid); + } + for (unsigned tid = 0; tid < nthread; ++tid) { + snode_[nid].best.Update(best_split_tloc_[tid]); + } +} + +void QuantileHistMaker::Builder::ApplySplit(int nid, + const GHistIndexMatrix& gmat, + const ColumnMatrix& column_matrix, + const HistCollection& hist, + const DMatrix& fmat, + RegTree* p_tree) { + // TODO(hcho3): support feature sampling by levels + + /* 1. Create child nodes */ + NodeEntry& e = snode_[nid]; + + p_tree->AddChilds(nid); + (*p_tree)[nid].SetSplit(e.best.SplitIndex(), e.best.split_value, e.best.DefaultLeft()); + // mark right child as 0, to indicate fresh leaf + int cleft = (*p_tree)[nid].LeftChild(); + int cright = (*p_tree)[nid].RightChild(); + (*p_tree)[cleft].SetLeaf(0.0f, 0); + (*p_tree)[cright].SetLeaf(0.0f, 0); + + /* 2. Categorize member rows */ + const auto nthread = static_cast(this->nthread_); + row_split_tloc_.resize(nthread); + for (bst_omp_uint i = 0; i < nthread; ++i) { + row_split_tloc_[i].left.clear(); + row_split_tloc_[i].right.clear(); + } + const bool default_left = (*p_tree)[nid].DefaultLeft(); + const bst_uint fid = (*p_tree)[nid].SplitIndex(); + const bst_float split_pt = (*p_tree)[nid].SplitCond(); + const uint32_t lower_bound = gmat.cut.row_ptr[fid]; + const uint32_t upper_bound = gmat.cut.row_ptr[fid + 1]; + int32_t split_cond = -1; + // convert floating-point split_pt into corresponding bin_id + // split_cond = -1 indicates that split_pt is less than all known cut points + CHECK_LT(upper_bound, + static_cast(std::numeric_limits::max())); + for (uint32_t i = lower_bound; i < upper_bound; ++i) { + if (split_pt == gmat.cut.cut[i]) { + split_cond = static_cast(i); + } + } + + const auto& rowset = row_set_collection_[nid]; + + Column column = column_matrix.GetColumn(fid); + if (column.GetType() == xgboost::common::kDenseColumn) { + ApplySplitDenseData(rowset, gmat, &row_split_tloc_, column, split_cond, + default_left); + } else { + ApplySplitSparseData(rowset, gmat, &row_split_tloc_, column, lower_bound, + upper_bound, split_cond, default_left); + } + + row_set_collection_.AddSplit( + nid, row_split_tloc_, (*p_tree)[nid].LeftChild(), (*p_tree)[nid].RightChild()); +} + +void QuantileHistMaker::Builder::ApplySplitDenseData( + const RowSetCollection::Elem rowset, + const GHistIndexMatrix& gmat, + std::vector* p_row_split_tloc, + const Column& column, + bst_int split_cond, + bool default_left) { + std::vector& row_split_tloc = *p_row_split_tloc; + constexpr int kUnroll = 8; // loop unrolling factor + const size_t nrows = rowset.end - rowset.begin; + const size_t rest = nrows % kUnroll; + +#pragma omp parallel for num_threads(nthread_) schedule(static) + for (bst_omp_uint i = 0; i < nrows - rest; i += kUnroll) { + const bst_uint tid = omp_get_thread_num(); + auto& left = row_split_tloc[tid].left; + auto& right = row_split_tloc[tid].right; + size_t rid[kUnroll]; + uint32_t rbin[kUnroll]; + for (int k = 0; k < kUnroll; ++k) { + rid[k] = rowset.begin[i + k]; + } + for (int k = 0; k < kUnroll; ++k) { + rbin[k] = column.GetFeatureBinIdx(rid[k]); + } + for (int k = 0; k < kUnroll; ++k) { // NOLINT + if (rbin[k] == std::numeric_limits::max()) { // missing value + if (default_left) { + left.push_back(rid[k]); + } else { + right.push_back(rid[k]); + } + } else { + if (static_cast(rbin[k] + column.GetBaseIdx()) <= split_cond) { + left.push_back(rid[k]); + } else { + right.push_back(rid[k]); + } + } + } + } + for (size_t i = nrows - rest; i < nrows; ++i) { + auto& left = row_split_tloc[nthread_-1].left; + auto& right = row_split_tloc[nthread_-1].right; + const size_t rid = rowset.begin[i]; + const uint32_t rbin = column.GetFeatureBinIdx(rid); + if (rbin == std::numeric_limits::max()) { // missing value + if (default_left) { + left.push_back(rid); + } else { + right.push_back(rid); + } + } else { + if (static_cast(rbin + column.GetBaseIdx()) <= split_cond) { + left.push_back(rid); + } else { + right.push_back(rid); + } + } + } +} + +void QuantileHistMaker::Builder::ApplySplitSparseData( + const RowSetCollection::Elem rowset, + const GHistIndexMatrix& gmat, + std::vector* p_row_split_tloc, + const Column& column, + bst_uint lower_bound, + bst_uint upper_bound, + bst_int split_cond, + bool default_left) { + std::vector& row_split_tloc = *p_row_split_tloc; + const size_t nrows = rowset.end - rowset.begin; + +#pragma omp parallel num_threads(nthread_) + { + const auto tid = static_cast(omp_get_thread_num()); + const size_t ibegin = tid * nrows / nthread_; + const size_t iend = (tid + 1) * nrows / nthread_; + if (ibegin < iend) { // ensure that [ibegin, iend) is nonempty range + // search first nonzero row with index >= rowset[ibegin] + const size_t* p = std::lower_bound(column.GetRowData(), + column.GetRowData() + column.Size(), + rowset.begin[ibegin]); + + auto& left = row_split_tloc[tid].left; + auto& right = row_split_tloc[tid].right; + if (p != column.GetRowData() + column.Size() && *p <= rowset.begin[iend - 1]) { + size_t cursor = p - column.GetRowData(); + + for (size_t i = ibegin; i < iend; ++i) { + const size_t rid = rowset.begin[i]; + while (cursor < column.Size() + && column.GetRowIdx(cursor) < rid + && column.GetRowIdx(cursor) <= rowset.begin[iend - 1]) { + ++cursor; + } + if (cursor < column.Size() && column.GetRowIdx(cursor) == rid) { + const uint32_t rbin = column.GetFeatureBinIdx(cursor); + if (static_cast(rbin + column.GetBaseIdx()) <= split_cond) { + left.push_back(rid); + } else { + right.push_back(rid); + } + ++cursor; + } else { + // missing value + if (default_left) { + left.push_back(rid); + } else { + right.push_back(rid); + } + } + } + } else { // all rows in [ibegin, iend) have missing values + if (default_left) { + for (size_t i = ibegin; i < iend; ++i) { + const size_t rid = rowset.begin[i]; + left.push_back(rid); + } + } else { + for (size_t i = ibegin; i < iend; ++i) { + const size_t rid = rowset.begin[i]; + right.push_back(rid); + } + } + } + } + } +} + +void QuantileHistMaker::Builder::InitNewNode(int nid, + const GHistIndexMatrix& gmat, + const std::vector& gpair, + const DMatrix& fmat, + const RegTree& tree) { + { + snode_.resize(tree.param.num_nodes, NodeEntry(param_)); + } + + { + auto& stats = snode_[nid].stats; + if (data_layout_ == kDenseDataZeroBased || data_layout_ == kDenseDataOneBased) { + /* specialized code for dense data + For dense data (with no missing value), + the sum of gradient histogram is equal to snode[nid] */ + GHistRow hist = hist_[nid]; + const std::vector& row_ptr = gmat.cut.row_ptr; + + const uint32_t ibegin = row_ptr[fid_least_bins_]; + const uint32_t iend = row_ptr[fid_least_bins_ + 1]; + for (uint32_t i = ibegin; i < iend; ++i) { + const GHistEntry et = hist.begin[i]; + stats.Add(et.sum_grad, et.sum_hess); + } + } else { + const RowSetCollection::Elem e = row_set_collection_[nid]; + for (const size_t* it = e.begin; it < e.end; ++it) { + stats.Add(gpair[*it]); + } + } + } + + // calculating the weights + { + bst_uint parentid = tree[nid].Parent(); + snode_[nid].weight = static_cast( + spliteval_->ComputeWeight(parentid, snode_[nid].stats)); + snode_[nid].root_gain = static_cast( + spliteval_->ComputeScore(parentid, snode_[nid].stats, snode_[nid].weight)); + } +} + +// enumerate the split values of specific feature +void QuantileHistMaker::Builder::EnumerateSplit(int d_step, + const GHistIndexMatrix& gmat, + const GHistRow& hist, + const NodeEntry& snode, + const MetaInfo& info, + SplitEntry* p_best, + bst_uint fid, + bst_uint nodeID) { + CHECK(d_step == +1 || d_step == -1); + + // aliases + const std::vector& cut_ptr = gmat.cut.row_ptr; + const std::vector& cut_val = gmat.cut.cut; + + // statistics on both sides of split + GradStats c(param_); + GradStats e(param_); + // best split so far + SplitEntry best; + + // bin boundaries + CHECK_LE(cut_ptr[fid], + static_cast(std::numeric_limits::max())); + CHECK_LE(cut_ptr[fid + 1], + static_cast(std::numeric_limits::max())); + // imin: index (offset) of the minimum value for feature fid + // need this for backward enumeration + const auto imin = static_cast(cut_ptr[fid]); + // ibegin, iend: smallest/largest cut points for feature fid + // use int to allow for value -1 + int32_t ibegin, iend; + if (d_step > 0) { + ibegin = static_cast(cut_ptr[fid]); + iend = static_cast(cut_ptr[fid + 1]); + } else { + ibegin = static_cast(cut_ptr[fid + 1]) - 1; + iend = static_cast(cut_ptr[fid]) - 1; + } + + for (int32_t i = ibegin; i != iend; i += d_step) { + // start working + // try to find a split + e.Add(hist.begin[i].sum_grad, hist.begin[i].sum_hess); + if (e.sum_hess >= param_.min_child_weight) { + c.SetSubstract(snode.stats, e); + if (c.sum_hess >= param_.min_child_weight) { + bst_float loss_chg; + bst_float split_pt; + if (d_step > 0) { + // forward enumeration: split at right bound of each bin + loss_chg = static_cast( + spliteval_->ComputeSplitScore(nodeID, fid, e, c) - + snode.root_gain); + split_pt = cut_val[i]; + } else { + // backward enumeration: split at left bound of each bin + loss_chg = static_cast( + spliteval_->ComputeSplitScore(nodeID, fid, c, e) - + snode.root_gain); + if (i == imin) { + // for leftmost bin, left bound is the smallest feature value + split_pt = gmat.cut.min_val[fid]; + } else { + split_pt = cut_val[i - 1]; + } + } + best.Update(loss_chg, fid, split_pt, d_step == -1); + } + } + } + p_best->Update(best); +} + +XGBOOST_REGISTER_TREE_UPDATER(FastHistMaker, "grow_fast_histmaker") +.describe("(Deprecated, use grow_quantile_histmaker instead.)" + " Grow tree using quantized histogram.") +.set_body( + []() { + LOG(WARNING) << "grow_fast_histmaker is deprecated, " + << "use grow_quantile_histmaker instead."; + return new QuantileHistMaker(); + }); + +XGBOOST_REGISTER_TREE_UPDATER(QuantileHistMaker, "grow_quantile_histmaker") +.describe("Grow tree using quantized histogram.") +.set_body( + []() { + return new QuantileHistMaker(); + }); + +} // namespace tree +} // namespace xgboost diff --git a/src/tree/updater_quantile_hist.h b/src/tree/updater_quantile_hist.h new file mode 100644 index 000000000..75d551070 --- /dev/null +++ b/src/tree/updater_quantile_hist.h @@ -0,0 +1,238 @@ +/*! + * Copyright 2017-2018 by Contributors + * \file updater_quantile_hist.h + * \brief use quantized feature values to construct a tree + * \author Philip Cho, Tianqi Chen + */ +#ifndef XGBOOST_TREE_UPDATER_QUANTILE_HIST_H_ +#define XGBOOST_TREE_UPDATER_QUANTILE_HIST_H_ + +#include +#include + +#include +#include +#include +#include +#include + +#include "./param.h" +#include "./split_evaluator.h" +#include "../common/random.h" +#include "../common/hist_util.h" +#include "../common/row_set.h" +#include "../common/column_matrix.h" + +namespace xgboost { +namespace tree { + +using xgboost::common::HistCutMatrix; +using xgboost::common::GHistIndexMatrix; +using xgboost::common::GHistIndexBlockMatrix; +using xgboost::common::GHistIndexRow; +using xgboost::common::GHistEntry; +using xgboost::common::HistCollection; +using xgboost::common::RowSetCollection; +using xgboost::common::GHistRow; +using xgboost::common::GHistBuilder; +using xgboost::common::ColumnMatrix; +using xgboost::common::Column; + +/*! \brief construct a tree using quantized feature values */ +class QuantileHistMaker: public TreeUpdater { + public: + void Init(const std::vector >& args) override; + + void Update(HostDeviceVector* gpair, + DMatrix* dmat, + const std::vector& trees) override; + + bool UpdatePredictionCache(const DMatrix* data, + HostDeviceVector* out_preds) override; + + protected: + // training parameter + TrainParam param_; + // quantized data matrix + GHistIndexMatrix gmat_; + // (optional) data matrix with feature grouping + GHistIndexBlockMatrix gmatb_; + // column accessor + ColumnMatrix column_matrix_; + bool is_gmat_initialized_; + + // data structure + struct NodeEntry { + /*! \brief statics for node entry */ + GradStats stats; + /*! \brief loss of this node, without split */ + bst_float root_gain; + /*! \brief weight calculated related to current data */ + float weight; + /*! \brief current best solution */ + SplitEntry best; + // constructor + explicit NodeEntry(const TrainParam& param) + : stats(param), root_gain(0.0f), weight(0.0f) { + } + }; + // actual builder that runs the algorithm + + struct Builder { + public: + // constructor + explicit Builder(const TrainParam& param, + std::unique_ptr pruner, + std::unique_ptr spliteval) + : param_(param), pruner_(std::move(pruner)), + spliteval_(std::move(spliteval)), p_last_tree_(nullptr), + p_last_fmat_(nullptr) {} + // update one tree, growing + virtual void Update(const GHistIndexMatrix& gmat, + const GHistIndexBlockMatrix& gmatb, + const ColumnMatrix& column_matrix, + HostDeviceVector* gpair, + DMatrix* p_fmat, + RegTree* p_tree); + + inline void BuildHist(const std::vector& gpair, + const RowSetCollection::Elem row_indices, + const GHistIndexMatrix& gmat, + const GHistIndexBlockMatrix& gmatb, + GHistRow hist) { + if (param_.enable_feature_grouping > 0) { + hist_builder_.BuildBlockHist(gpair, row_indices, gmatb, hist); + } else { + hist_builder_.BuildHist(gpair, row_indices, gmat, hist); + } + } + + inline void SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow parent) { + hist_builder_.SubtractionTrick(self, sibling, parent); + } + + bool UpdatePredictionCache(const DMatrix* data, + HostDeviceVector* p_out_preds); + + protected: + // initialize temp data structure + void InitData(const GHistIndexMatrix& gmat, + const std::vector& gpair, + const DMatrix& fmat, + const RegTree& tree); + + void EvaluateSplit(int nid, + const GHistIndexMatrix& gmat, + const HistCollection& hist, + const DMatrix& fmat, + const RegTree& tree); + + void ApplySplit(int nid, + const GHistIndexMatrix& gmat, + const ColumnMatrix& column_matrix, + const HistCollection& hist, + const DMatrix& fmat, + RegTree* p_tree); + + void ApplySplitDenseData(const RowSetCollection::Elem rowset, + const GHistIndexMatrix& gmat, + std::vector* p_row_split_tloc, + const Column& column, + bst_int split_cond, + bool default_left); + + void ApplySplitSparseData(const RowSetCollection::Elem rowset, + const GHistIndexMatrix& gmat, + std::vector* p_row_split_tloc, + const Column& column, + bst_uint lower_bound, + bst_uint upper_bound, + bst_int split_cond, + bool default_left); + + void InitNewNode(int nid, + const GHistIndexMatrix& gmat, + const std::vector& gpair, + const DMatrix& fmat, + const RegTree& tree); + + // enumerate the split values of specific feature + void EnumerateSplit(int d_step, + const GHistIndexMatrix& gmat, + const GHistRow& hist, + const NodeEntry& snode, + const MetaInfo& info, + SplitEntry* p_best, + bst_uint fid, + bst_uint nodeID); + + /* tree growing policies */ + struct ExpandEntry { + int nid; + int depth; + bst_float loss_chg; + unsigned timestamp; + ExpandEntry(int nid, int depth, bst_float loss_chg, unsigned tstmp) + : nid(nid), depth(depth), loss_chg(loss_chg), timestamp(tstmp) {} + }; + inline static bool DepthWise(ExpandEntry lhs, ExpandEntry rhs) { + if (lhs.depth == rhs.depth) { + return lhs.timestamp > rhs.timestamp; // favor small timestamp + } else { + return lhs.depth > rhs.depth; // favor small depth + } + } + inline static bool LossGuide(ExpandEntry lhs, ExpandEntry rhs) { + if (lhs.loss_chg == rhs.loss_chg) { + return lhs.timestamp > rhs.timestamp; // favor small timestamp + } else { + return lhs.loss_chg < rhs.loss_chg; // favor large loss_chg + } + } + + // --data fields-- + const TrainParam& param_; + // number of omp thread used during training + int nthread_; + common::ColumnSampler column_sampler_; + // the internal row sets + RowSetCollection row_set_collection_; + // the temp space for split + std::vector row_split_tloc_; + std::vector best_split_tloc_; + /*! \brief TreeNode Data: statistics for each constructed node */ + std::vector snode_; + /*! \brief culmulative histogram of gradients. */ + HistCollection hist_; + /*! \brief feature with least # of bins. to be used for dense specialization + of InitNewNode() */ + uint32_t fid_least_bins_; + /*! \brief local prediction cache; maps node id to leaf value */ + std::vector leaf_value_cache_; + + GHistBuilder hist_builder_; + std::unique_ptr pruner_; + std::unique_ptr spliteval_; + + // back pointers to tree and data matrix + const RegTree* p_last_tree_; + const DMatrix* p_last_fmat_; + + using ExpandQueue = + std::priority_queue, + std::function>; + std::unique_ptr qexpand_; + + enum DataLayout { kDenseDataZeroBased, kDenseDataOneBased, kSparseData }; + DataLayout data_layout_; + }; + + std::unique_ptr builder_; + std::unique_ptr pruner_; + std::unique_ptr spliteval_; +}; + +} // namespace tree +} // namespace xgboost + +#endif // XGBOOST_TREE_UPDATER_QUANTILE_HIST_H_ diff --git a/src/tree/updater_refresh.cc b/src/tree/updater_refresh.cc index 9fb273a50..92ae5be30 100644 --- a/src/tree/updater_refresh.cc +++ b/src/tree/updater_refresh.cc @@ -4,12 +4,13 @@ * \brief refresh the statistics and leaf value on the tree on the dataset * \author Tianqi Chen */ - +#include #include + #include #include + #include "./param.h" -#include "../common/sync.h" #include "../common/io.h" namespace xgboost { diff --git a/src/tree/updater_skmaker.cc b/src/tree/updater_skmaker.cc index 6848c9b0f..05eea7d14 100644 --- a/src/tree/updater_skmaker.cc +++ b/src/tree/updater_skmaker.cc @@ -5,12 +5,12 @@ a refresh is needed to make the statistics exactly correct * \author Tianqi Chen */ - +#include #include #include #include #include -#include "../common/sync.h" + #include "../common/quantile.h" #include "../common/group_data.h" #include "./updater_basemaker-inl.h" diff --git a/src/tree/updater_sync.cc b/src/tree/updater_sync.cc index f69607fa2..6d14f9600 100644 --- a/src/tree/updater_sync.cc +++ b/src/tree/updater_sync.cc @@ -7,7 +7,6 @@ #include #include #include -#include "../common/sync.h" #include "../common/io.h" namespace xgboost { diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc index 6291e2b8a..6ea92b7c1 100644 --- a/tests/cpp/test_learner.cc +++ b/tests/cpp/test_learner.cc @@ -32,7 +32,7 @@ TEST(learner, SelectTreeMethod) { "grow_colmaker,prune"); learner->Configure({arg("tree_method", "hist")}); ASSERT_EQ(learner->GetConfigurationArguments().at("updater"), - "grow_fast_histmaker"); + "grow_quantile_histmaker"); #ifdef XGBOOST_USE_CUDA learner->Configure({arg("tree_method", "gpu_exact")}); ASSERT_EQ(learner->GetConfigurationArguments().at("updater"), diff --git a/tests/cpp/tree/test_gpu_hist.cu b/tests/cpp/tree/test_gpu_hist.cu index 3c7f05e0c..d8a907f70 100644 --- a/tests/cpp/tree/test_gpu_hist.cu +++ b/tests/cpp/tree/test_gpu_hist.cu @@ -328,8 +328,8 @@ TEST(GpuHist, ApplySplit) { shard->row_stride = n_cols; thrust::sequence(shard->ridx.CurrentDVec().tbegin(), shard->ridx.CurrentDVec().tend()); + // Free inside DeviceShard dh::safe_cuda(cudaMallocHost(&(shard->tmp_pinned), sizeof(int64_t))); - // Initialize GPUHistMaker hist_maker.param_ = param; RegTree tree; @@ -390,15 +390,5 @@ TEST(GpuHist, ApplySplit) { ASSERT_EQ(shard->ridx_segments[right_nidx].end, 16); } -TEST(GpuHist, MGPU_mock) { - // Attempt to choose multiple GPU devices - int ngpu; - dh::safe_cuda(cudaGetDeviceCount(&ngpu)); - CHECK_GT(ngpu, 1); - for (int i = 0; i < ngpu; ++i) { - dh::safe_cuda(cudaSetDevice(i)); - } -} - } // namespace tree } // namespace xgboost diff --git a/tests/cpp/tree/test_param.cc b/tests/cpp/tree/test_param.cc index a569556e3..6f2a84c74 100644 --- a/tests/cpp/tree/test_param.cc +++ b/tests/cpp/tree/test_param.cc @@ -1,13 +1,13 @@ // Copyright by Contributors #include "../../../src/tree/param.h" - #include "../helpers.h" +#include TEST(Param, VectorIOStream) { std::vector vals = {3, 2, 1}; std::stringstream ss; std::vector vals_in; - + ss << vals; EXPECT_EQ(ss.str(), "(3,2,1)"); diff --git a/tests/cpp/tree/test_prune.cc b/tests/cpp/tree/test_prune.cc new file mode 100644 index 000000000..0b1878e41 --- /dev/null +++ b/tests/cpp/tree/test_prune.cc @@ -0,0 +1,72 @@ +/*! + * Copyright 2018 by Contributors + */ +#include "../helpers.h" +#include "../../../src/common/host_device_vector.h" +#include +#include +#include +#include +#include + +namespace xgboost { +namespace tree { + +TEST(Updater, Prune) { + int constexpr n_rows = 32, n_cols = 16; + + std::vector> cfg; + cfg.push_back(std::pair( + "num_feature", std::to_string(n_cols))); + cfg.push_back(std::pair( + "min_split_loss", "10")); + cfg.push_back(std::pair( + "silent", "1")); + + // These data are just place holders. + HostDeviceVector gpair = + { {0.50f, 0.25f}, {0.50f, 0.25f}, {0.50f, 0.25f}, {0.50f, 0.25f}, + {0.25f, 0.24f}, {0.25f, 0.24f}, {0.25f, 0.24f}, {0.25f, 0.24f} }; + auto dmat = CreateDMatrix(32, 16, 0.4, 3); + + // prepare tree + RegTree tree = RegTree(); + tree.InitModel(); + tree.param.InitAllowUnknown(cfg); + std::vector trees {&tree}; + // prepare pruner + std::unique_ptr pruner(TreeUpdater::Create("prune")); + pruner->Init(cfg); + + // loss_chg < min_split_loss; + tree.AddChilds(0); + int cleft = tree[0].LeftChild(); + int cright = tree[0].RightChild(); + tree[cleft].SetLeaf(0.3f, 0); + tree[cright].SetLeaf(0.4f, 0); + pruner->Update(&gpair, dmat->get(), trees); + + ASSERT_EQ(tree.NumExtraNodes(), 0); + + // loss_chg > min_split_loss; + tree.AddChilds(0); + cleft = tree[0].LeftChild(); + cright = tree[0].RightChild(); + tree[cleft].SetLeaf(0.3f, 0); + tree[cright].SetLeaf(0.4f, 0); + tree.Stat(0).loss_chg = 11; + pruner->Update(&gpair, dmat->get(), trees); + + ASSERT_EQ(tree.NumExtraNodes(), 2); + + // loss_chg == min_split_loss; + tree.Stat(0).loss_chg = 10; + pruner->Update(&gpair, dmat->get(), trees); + + ASSERT_EQ(tree.NumExtraNodes(), 2); + + delete dmat; +} + +} // namespace tree +} // namespace xgboost diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc new file mode 100644 index 000000000..b4336b857 --- /dev/null +++ b/tests/cpp/tree/test_quantile_hist.cc @@ -0,0 +1,181 @@ +/*! + * Copyright 2018 by Contributors + */ +#include "../helpers.h" +#include "../../../src/tree/param.h" +#include "../../../src/tree/updater_quantile_hist.h" +#include "../../../src/common/host_device_vector.h" + +#include +#include + +#include +#include + +namespace xgboost { +namespace tree { + +class QuantileHistMock : public QuantileHistMaker { + static double constexpr kEps = 1e-6; + + struct BuilderMock : public QuantileHistMaker::Builder { + using RealImpl = QuantileHistMaker::Builder; + + BuilderMock(const TrainParam& param, + std::unique_ptr pruner, + std::unique_ptr spliteval) + : RealImpl(param, std::move(pruner), std::move(spliteval)) {} + + public: + void TestInitData(const GHistIndexMatrix& gmat, + const std::vector& gpair, + const DMatrix& fmat, + const RegTree& tree) { + RealImpl::InitData(gmat, gpair, fmat, tree); + ASSERT_EQ(data_layout_, kSparseData); + } + + void TestBuildHist(int nid, + const GHistIndexMatrix& gmat, + const DMatrix& fmat, + const RegTree& tree) { + std::vector gpair = + { {0.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {0.27f, 0.28f}, + {0.27f, 0.29f}, {0.37f, 0.39f}, {0.47f, 0.49f}, {0.57f, 0.59f} }; + RealImpl::InitData(gmat, gpair, fmat, tree); + GHistIndexBlockMatrix quantile_index_block; + hist_.AddHistRow(nid); + BuildHist(gpair, row_set_collection_[nid], + gmat, quantile_index_block, hist_[nid]); + std::vector solution { + {0.27, 0.29}, {0.27, 0.29}, {0.47, 0.49}, + {0.27, 0.29}, {0.57, 0.59}, {0.26, 0.27}, + {0.37, 0.39}, {0.23, 0.24}, {0.37, 0.39}, + {0.27, 0.28}, {0.27, 0.29}, {0.37, 0.39}, + {0.26, 0.27}, {0.23, 0.24}, {0.57, 0.59}, + {0.47, 0.49}, {0.47, 0.49}, {0.37, 0.39}, + {0.26, 0.27}, {0.23, 0.24}, {0.27, 0.28}, + {0.57, 0.59}, {0.23, 0.24}, {0.47, 0.49}}; + + for (size_t i = 0; i < hist_[nid].size; ++i) { + GradientPairPrecise sol = solution[i]; + ASSERT_NEAR(sol.GetGrad(), hist_[nid].begin[i].sum_grad, kEps); + ASSERT_NEAR(sol.GetHess(), hist_[nid].begin[i].sum_hess, kEps); + } + } + + void TestEvaluateSplit(const GHistIndexBlockMatrix& quantile_index_block, + const RegTree& tree) { + std::vector row_gpairs = + { {0.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {0.27f, 0.28f}, + {0.27f, 0.29f}, {0.37f, 0.39f}, {0.47f, 0.49f}, {0.57f, 0.59f} }; + size_t constexpr max_bins = 4; + auto dmat = CreateDMatrix(n_rows, n_cols, 0, 3); // dense + + common::GHistIndexMatrix gmat; + gmat.Init((*dmat).get(), max_bins); + + RealImpl::InitData(gmat, row_gpairs, *(*dmat), tree); + hist_.AddHistRow(0); + + BuildHist(row_gpairs, row_set_collection_[0], + gmat, quantile_index_block, hist_[0]); + + RealImpl::InitNewNode(0, gmat, row_gpairs, *(*dmat), tree); + // Manipulate the root_gain so that I don't have to invent an actual + // split. Yes, I'm cheating. + snode_[0].root_gain = 0.8; + RealImpl::EvaluateSplit(0, gmat, hist_, *(*dmat), tree); + + ASSERT_NEAR(snode_.at(0).best.loss_chg, 0.7128048, kEps); + ASSERT_EQ(snode_.at(0).best.SplitIndex(), 10); + ASSERT_NEAR(snode_.at(0).best.split_value, 0.182258, kEps); + + delete dmat; + } + }; + + int static constexpr n_rows = 8, n_cols = 16; + std::shared_ptr *dmat; + const std::vector > cfg; + std::shared_ptr builder_; + + public: + explicit QuantileHistMock( + const std::vector >& args) : + cfg{args} { + QuantileHistMaker::Init(args); + builder_.reset( + new BuilderMock( + param_, + std::move(pruner_), + std::unique_ptr(spliteval_->GetHostClone()))); + dmat = CreateDMatrix(n_rows, n_cols, 0.8, 3); + } + ~QuantileHistMock() { delete dmat; } + + static size_t GetNumColumns() { return n_cols; } + + void TestInitData() { + size_t constexpr max_bins = 4; + common::GHistIndexMatrix gmat; + gmat.Init((*dmat).get(), max_bins); + + RegTree tree = RegTree(); + tree.InitModel(); + tree.param.InitAllowUnknown(cfg); + + std::vector gpair = + { {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, + {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f} }; + + builder_->TestInitData(gmat, gpair, *(*dmat), tree); + } + + void TestBuildHist() { + RegTree tree = RegTree(); + tree.InitModel(); + tree.param.InitAllowUnknown(cfg); + + size_t constexpr max_bins = 4; + common::GHistIndexMatrix gmat; + gmat.Init((*dmat).get(), max_bins); + + builder_->TestBuildHist(0, gmat, *(*dmat).get(), tree); + } + + void TestEvaluateSplit() { + RegTree tree = RegTree(); + tree.InitModel(); + tree.param.InitAllowUnknown(cfg); + + builder_->TestEvaluateSplit(gmatb_, tree); + } +}; + +TEST(Updater, QuantileHist_InitData) { + std::vector> cfg + {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}}; + QuantileHistMock maker(cfg); + maker.TestInitData(); +} + +TEST(Updater, QuantileHist_BuildHist) { + // Don't enable feature grouping + std::vector> cfg + {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}, + {"enable_feature_grouping", std::to_string(0)}}; + QuantileHistMock maker(cfg); + maker.TestBuildHist(); +} + +TEST(Updater, QuantileHist_EvalSplits) { + std::vector> cfg + {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}, + {"split_evaluator", "elastic_net"}}; + QuantileHistMock maker(cfg); + maker.TestEvaluateSplit(); +} + +} // namespace tree +} // namespace xgboost diff --git a/tests/cpp/tree/test_refresh.cc b/tests/cpp/tree/test_refresh.cc new file mode 100644 index 000000000..78d2db2f3 --- /dev/null +++ b/tests/cpp/tree/test_refresh.cc @@ -0,0 +1,57 @@ +/*! + * Copyright 2018 by Contributors + */ +#include "../helpers.h" +#include "../../../src/common/host_device_vector.h" +#include +#include +#include +#include +#include + +namespace xgboost { +namespace tree { + +TEST(Updater, Refresh) { + int constexpr n_rows = 8, n_cols = 16; + + HostDeviceVector gpair = + { {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, + {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f} }; + auto dmat = CreateDMatrix(n_rows, n_cols, 0.4, 3); + std::vector> cfg { + {"reg_alpha", "0.0"}, + {"num_feature", std::to_string(n_cols)}, + {"reg_lambda", "1"}}; + + RegTree tree = RegTree(); + tree.InitModel(); + tree.param.InitAllowUnknown(cfg); + std::vector trees {&tree}; + std::unique_ptr refresher(TreeUpdater::Create("refresh")); + + tree.AddChilds(0); + int cleft = tree[0].LeftChild(); + int cright = tree[0].RightChild(); + tree[cleft].SetLeaf(0.2f, 0); + tree[cright].SetLeaf(0.8f, 0); + tree[0].SetSplit(2, 0.2f); + + tree.Stat(cleft).base_weight = 1.2; + tree.Stat(cright).base_weight = 1.3; + + refresher->Init(cfg); + refresher->Update(&gpair, dmat->get(), trees); + + bst_float constexpr kEps = 1e-6; + ASSERT_NEAR(-0.183392, tree[cright].LeafValue(), kEps); + ASSERT_NEAR(-0.224489, tree.Stat(0).loss_chg, kEps); + ASSERT_NEAR(0, tree.Stat(cleft).loss_chg, kEps); + ASSERT_NEAR(0, tree.Stat(1).loss_chg, kEps); + ASSERT_NEAR(0, tree.Stat(2).loss_chg, kEps); + + delete dmat; +} + +} // namespace tree +} // namespace xgboost