diff --git a/src/common/column_matrix.h b/src/common/column_matrix.h index bebc70c8c..5d3fea87e 100644 --- a/src/common/column_matrix.h +++ b/src/common/column_matrix.h @@ -29,6 +29,9 @@ switch (dtype) { \ #include #include #include "hist_util.h" +#include "../tree/fast_hist_param.h" + +using xgboost::tree::FastHistParam; namespace xgboost { namespace common { @@ -68,8 +71,9 @@ class ColumnMatrix { } // construct column matrix from GHistIndexMatrix - inline void Init(const GHistIndexMatrix& gmat, DataType dtype) { - this->dtype = dtype; + inline void Init(const GHistIndexMatrix& gmat, + const FastHistParam& param) { + this->dtype = static_cast(param.colmat_dtype); /* if dtype is smaller than uint32_t, multiple bin_id's will be stored in each slot of internal buffer. */ packing_factor_ = sizeof(uint32_t) / static_cast(this->dtype); @@ -93,7 +97,8 @@ class ColumnMatrix { gmat.GetFeatureCounts(&feature_counts_[0]); // classify features for (uint32_t fid = 0; fid < nfeature; ++fid) { - if (static_cast(feature_counts_[fid]) < 0.5*nrow) { + if (static_cast(feature_counts_[fid]) + < param.sparse_threshold * nrow) { type_[fid] = kSparseColumn; } else { type_[fid] = kDenseColumn; diff --git a/src/common/hist_util.cc b/src/common/hist_util.cc index 9c8228ab9..fe27ac8c5 100644 --- a/src/common/hist_util.cc +++ b/src/common/hist_util.cc @@ -5,10 +5,12 @@ * \author Philip Cho, Tianqi Chen */ #include +#include #include #include "./sync.h" -#include "./hist_util.h" +#include "./random.h" #include "./column_matrix.h" +#include "./hist_util.h" #include "./quantile.h" namespace xgboost { @@ -154,6 +156,246 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat) { } } +template +static unsigned GetConflictCount(const std::vector& mark, + const Column& column, + unsigned max_cnt) { + unsigned ret = 0; + if (column.type == xgboost::common::kDenseColumn) { + for (size_t i = 0; i < column.len; ++i) { + if (column.index[i] != std::numeric_limits::max() && mark[i]) { + ++ret; + if (ret > max_cnt) { + return max_cnt + 1; + } + } + } + } else { + for (size_t i = 0; i < column.len; ++i) { + if (mark[column.row_ind[i]]) { + ++ret; + if (ret > max_cnt) { + return max_cnt + 1; + } + } + } + } + return ret; +} + +template +inline void +MarkUsed(std::vector* p_mark, const Column& column) { + std::vector& mark = *p_mark; + if (column.type == xgboost::common::kDenseColumn) { + for (size_t i = 0; i < column.len; ++i) { + if (column.index[i] != std::numeric_limits::max()) { + mark[i] = true; + } + } + } else { + for (size_t i = 0; i < column.len; ++i) { + mark[column.row_ind[i]] = true; + } + } +} + +template +inline std::vector> +FindGroups_(const std::vector& feature_list, + const std::vector& feature_nnz, + const ColumnMatrix& colmat, + unsigned nrow, + const FastHistParam& param) { + /* Goal: Bundle features together that has little or no "overlap", i.e. + only a few data points should have nonzero values for + member features. + Note that one-hot encoded features will be grouped together. */ + + std::vector> groups; + std::vector> conflict_marks; + std::vector group_nnz; + std::vector group_conflict_cnt; + const unsigned max_conflict_cnt + = static_cast(param.max_conflict_rate * nrow); + + for (auto fid : feature_list) { + const Column& column = colmat.GetColumn(fid); + + const size_t cur_fid_nnz = feature_nnz[fid]; + bool need_new_group = true; + + // randomly choose some of existing groups as candidates + std::vector search_groups; + for (size_t gid = 0; gid < groups.size(); ++gid) { + if (group_nnz[gid] + cur_fid_nnz <= nrow + max_conflict_cnt) { + search_groups.push_back(gid); + } + } + std::shuffle(search_groups.begin(), search_groups.end(), common::GlobalRandom()); + if (param.max_search_group > 0 && search_groups.size() > param.max_search_group) { + search_groups.resize(param.max_search_group); + } + + // examine each candidate group: is it okay to insert fid? + for (auto gid : search_groups) { + const unsigned rest_max_cnt = max_conflict_cnt - group_conflict_cnt[gid]; + const unsigned cnt = GetConflictCount(conflict_marks[gid], column, rest_max_cnt); + if (cnt <= rest_max_cnt) { + need_new_group = false; + groups[gid].push_back(fid); + group_conflict_cnt[gid] += cnt; + group_nnz[gid] += cur_fid_nnz - cnt; + MarkUsed(&conflict_marks[gid], column); + break; + } + } + + // create new group if necessary + if (need_new_group) { + groups.emplace_back(); + groups.back().push_back(fid); + group_conflict_cnt.push_back(0); + conflict_marks.emplace_back(nrow, false); + MarkUsed(&conflict_marks.back(), column); + group_nnz.emplace_back(cur_fid_nnz); + } + } + + return groups; +} + +inline std::vector> +FindGroups(const std::vector& feature_list, + const std::vector& feature_nnz, + const ColumnMatrix& colmat, + unsigned nrow, + const FastHistParam& param) { + XGBOOST_TYPE_SWITCH(colmat.dtype, { + return FindGroups_(feature_list, feature_nnz, colmat, nrow, param); + }); + return std::vector>(); // to avoid warning message +} + +inline std::vector> +FastFeatureGrouping(const GHistIndexMatrix& gmat, + const ColumnMatrix& colmat, + const FastHistParam& param) { + const size_t nrow = gmat.row_ptr.size() - 1; + const size_t nfeature = gmat.cut->row_ptr.size() - 1; + + std::vector feature_list(nfeature); + std::iota(feature_list.begin(), feature_list.end(), 0); + + // sort features by nonzero counts, descending order + std::vector feature_nnz(nfeature); + std::vector features_by_nnz(feature_list); + gmat.GetFeatureCounts(&feature_nnz[0]); + std::sort(features_by_nnz.begin(), features_by_nnz.end(), + [&feature_nnz](int a, int b) { + return feature_nnz[a] > feature_nnz[b]; + }); + + auto groups_alt1 = FindGroups(feature_list, feature_nnz, colmat, nrow, param); + auto groups_alt2 = FindGroups(features_by_nnz, feature_nnz, colmat, nrow, param); + auto& groups = (groups_alt1.size() > groups_alt2.size()) ? groups_alt2 : groups_alt1; + + // take apart small, sparse groups, as it won't help speed + { + std::vector> ret; + for (const auto& group : groups) { + if (group.size() <= 1 || group.size() >= 5) { + ret.push_back(group); // keep singleton groups and large (5+) groups + } else { + unsigned nnz = 0; + for (auto fid : group) { + nnz += feature_nnz[fid]; + } + double nnz_rate = static_cast(nnz) / nrow; + // take apart small sparse group, due it will not gain on speed + if (nnz_rate <= param.sparse_threshold) { + for (auto fid : group) { + ret.emplace_back(); + ret.back().push_back(fid); + } + } else { + ret.push_back(group); + } + } + } + groups = std::move(ret); + } + + // shuffle groups + std::shuffle(groups.begin(), groups.end(), common::GlobalRandom()); + + return groups; +} + +void GHistIndexBlockMatrix::Init(const GHistIndexMatrix& gmat, + const ColumnMatrix& colmat, + const FastHistParam& param) { + cut = gmat.cut; + + const size_t nrow = gmat.row_ptr.size() - 1; + const size_t nbins = gmat.cut->row_ptr.back(); + + /* step 1: form feature groups */ + auto groups = FastFeatureGrouping(gmat, colmat, param); + const size_t nblock = groups.size(); + + /* step 2: build a new CSR matrix for each feature group */ + std::vector bin2block(nbins); // lookup table [bin id] => [block id] + for (size_t group_id = 0; group_id < nblock; ++group_id) { + for (auto& fid : groups[group_id]) { + const unsigned bin_begin = gmat.cut->row_ptr[fid]; + const unsigned bin_end = gmat.cut->row_ptr[fid + 1]; + for (unsigned bin_id = bin_begin; bin_id < bin_end; ++bin_id) { + bin2block[bin_id] = group_id; + } + } + } + std::vector> index_temp(nblock); + std::vector> row_ptr_temp(nblock); + for (size_t block_id = 0; block_id < nblock; ++block_id) { + row_ptr_temp[block_id].push_back(0); + } + for (size_t rid = 0; rid < nrow; ++rid) { + const size_t ibegin = static_cast(gmat.row_ptr[rid]); + const size_t iend = static_cast(gmat.row_ptr[rid + 1]); + for (size_t j = ibegin; j < iend; ++j) { + const size_t bin_id = gmat.index[j]; + const size_t block_id = bin2block[bin_id]; + index_temp[block_id].push_back(bin_id); + } + for (size_t block_id = 0; block_id < nblock; ++block_id) { + row_ptr_temp[block_id].push_back(index_temp[block_id].size()); + } + } + + /* step 3: concatenate CSR matrices into one (index, row_ptr) pair */ + std::vector index_blk_ptr; + std::vector row_ptr_blk_ptr; + index_blk_ptr.push_back(0); + row_ptr_blk_ptr.push_back(0); + for (size_t block_id = 0; block_id < nblock; ++block_id) { + index.insert(index.end(), index_temp[block_id].begin(), index_temp[block_id].end()); + row_ptr.insert(row_ptr.end(), row_ptr_temp[block_id].begin(), row_ptr_temp[block_id].end()); + index_blk_ptr.push_back(index.size()); + row_ptr_blk_ptr.push_back(row_ptr.size()); + } + + // save shortcut for each block + for (size_t block_id = 0; block_id < nblock; ++block_id) { + Block blk; + blk.index_begin = &index[index_blk_ptr[block_id]]; + blk.row_ptr_begin = &row_ptr[row_ptr_blk_ptr[block_id]]; + blk.index_end = &index[index_blk_ptr[block_id + 1]]; + blk.row_ptr_end = &row_ptr[row_ptr_blk_ptr[block_id + 1]]; + blocks.push_back(blk); + } +} + void GHistBuilder::BuildHist(const std::vector& gpair, const RowSetCollection::Elem row_indices, const GHistIndexMatrix& gmat, @@ -161,33 +403,12 @@ void GHistBuilder::BuildHist(const std::vector& gpair, GHistRow hist) { data_.resize(nbins_ * nthread_, GHistEntry()); std::fill(data_.begin(), data_.end(), GHistEntry()); - stat_buf_.resize(row_indices.size()); const int K = 8; // loop unrolling factor const bst_omp_uint nthread = static_cast(this->nthread_); const bst_omp_uint nrows = row_indices.end - row_indices.begin; const bst_omp_uint rest = nrows % K; - #pragma omp parallel for num_threads(nthread) schedule(static) - for (bst_omp_uint i = 0; i < nrows - rest; i += K) { - bst_uint rid[K]; - bst_gpair stat[K]; - for (int k = 0; k < K; ++k) { - rid[k] = row_indices.begin[i + k]; - } - for (int k = 0; k < K; ++k) { - stat[k] = gpair[rid[k]]; - } - for (int k = 0; k < K; ++k) { - stat_buf_[i + k] = stat[k]; - } - } - for (bst_omp_uint i = nrows - rest; i < nrows; ++i) { - const bst_uint rid = row_indices.begin[i]; - const bst_gpair stat = gpair[rid]; - stat_buf_[i] = stat; - } - #pragma omp parallel for num_threads(nthread) schedule(guided) for (bst_omp_uint i = 0; i < nrows - rest; i += K) { const bst_omp_uint tid = omp_get_thread_num(); @@ -204,7 +425,7 @@ void GHistBuilder::BuildHist(const std::vector& gpair, iend[k] = static_cast(gmat.row_ptr[rid[k] + 1]); } for (int k = 0; k < K; ++k) { - stat[k] = stat_buf_[i + k]; + stat[k] = gpair[rid[k]]; } for (int k = 0; k < K; ++k) { for (size_t j = ibegin[k]; j < iend[k]; ++j) { @@ -217,7 +438,7 @@ void GHistBuilder::BuildHist(const std::vector& gpair, const bst_uint rid = row_indices.begin[i]; const size_t ibegin = static_cast(gmat.row_ptr[rid]); const size_t iend = static_cast(gmat.row_ptr[rid + 1]); - const bst_gpair stat = stat_buf_[i]; + const bst_gpair stat = gpair[rid]; for (size_t j = ibegin; j < iend; ++j) { const size_t bin = gmat.index[j]; data_[bin].Add(stat); @@ -234,10 +455,60 @@ void GHistBuilder::BuildHist(const std::vector& gpair, } } +void GHistBuilder::BuildBlockHist(const std::vector& gpair, + const RowSetCollection::Elem row_indices, + const GHistIndexBlockMatrix& gmatb, + const std::vector& feat_set, + GHistRow hist) { + const int K = 8; // loop unrolling factor + const bst_omp_uint nthread = static_cast(this->nthread_); + const bst_omp_uint nblock = gmatb.GetNumBlock(); + const bst_omp_uint nrows = row_indices.end - row_indices.begin; + const bst_omp_uint rest = nrows % K; + + #pragma omp parallel for num_threads(nthread) schedule(guided) + for (bst_omp_uint bid = 0; bid < nblock; ++bid) { + auto gmat = gmatb[bid]; + + for (bst_omp_uint i = 0; i < nrows - rest; i += K) { + bst_uint rid[K]; + size_t ibegin[K]; + size_t iend[K]; + bst_gpair stat[K]; + for (int k = 0; k < K; ++k) { + rid[k] = row_indices.begin[i + k]; + } + for (int k = 0; k < K; ++k) { + ibegin[k] = static_cast(gmat.row_ptr[rid[k]]); + iend[k] = static_cast(gmat.row_ptr[rid[k] + 1]); + } + for (int k = 0; k < K; ++k) { + stat[k] = gpair[rid[k]]; + } + for (int k = 0; k < K; ++k) { + for (size_t j = ibegin[k]; j < iend[k]; ++j) { + const size_t bin = gmat.index[j]; + hist.begin[bin].Add(stat[k]); + } + } + } + for (bst_omp_uint i = nrows - rest; i < nrows; ++i) { + const bst_uint rid = row_indices.begin[i]; + const size_t ibegin = static_cast(gmat.row_ptr[rid]); + const size_t iend = static_cast(gmat.row_ptr[rid + 1]); + const bst_gpair stat = gpair[rid]; + for (size_t j = ibegin; j < iend; ++j) { + const size_t bin = gmat.index[j]; + hist.begin[bin].Add(stat); + } + } + } +} + void GHistBuilder::SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow parent) { const bst_omp_uint nthread = static_cast(this->nthread_); const bst_omp_uint nbins = static_cast(nbins_); - const int K = 8; + const int K = 8; // loop unrolling factor const bst_omp_uint rest = nbins % K; #pragma omp parallel for num_threads(nthread) schedule(static) for (bst_omp_uint bin_id = 0; bin_id < nbins - rest; bin_id += K) { diff --git a/src/common/hist_util.h b/src/common/hist_util.h index 0a9c74c26..9c58cca73 100644 --- a/src/common/hist_util.h +++ b/src/common/hist_util.h @@ -11,6 +11,9 @@ #include #include #include "row_set.h" +#include "../tree/fast_hist_param.h" + +using xgboost::tree::FastHistParam; namespace xgboost { namespace common { @@ -24,6 +27,10 @@ struct GHistEntry { GHistEntry() : sum_grad(0), sum_hess(0) {} + inline void Clear() { + sum_grad = sum_hess = 0; + } + /*! \brief add a bst_gpair to the sum */ inline void Add(const bst_gpair& e) { sum_grad += e.grad; @@ -125,6 +132,48 @@ struct GHistIndexMatrix { std::vector hit_count_tloc_; }; +struct GHistIndexBlock { + const unsigned* row_ptr; + const unsigned* index; + + inline GHistIndexBlock(const unsigned* row_ptr, const unsigned* index) + : row_ptr(row_ptr), index(index) {} + + // get i-th row + inline GHistIndexRow operator[](bst_uint i) const { + return GHistIndexRow(&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]); + } +}; + +class ColumnMatrix; + +class GHistIndexBlockMatrix { + public: + void Init(const GHistIndexMatrix& gmat, + const ColumnMatrix& colmat, + const FastHistParam& param); + + inline GHistIndexBlock operator[](bst_uint i) const { + return GHistIndexBlock(blocks[i].row_ptr_begin, blocks[i].index_begin); + } + + inline unsigned GetNumBlock() const { + return blocks.size(); + } + + private: + std::vector row_ptr; + std::vector index; + const HistCutMatrix* cut; + struct Block { + const unsigned* row_ptr_begin; + const unsigned* row_ptr_end; + const unsigned* index_begin; + const unsigned* index_end; + }; + std::vector blocks; +}; + /*! * \brief histogram of graident statistics for a single node. * Consists of multiple GHistEntry's, each entry showing total graident statistics @@ -206,6 +255,12 @@ class GHistBuilder { const GHistIndexMatrix& gmat, const std::vector& feat_set, GHistRow hist); + // same, with feature grouping + void BuildBlockHist(const std::vector& gpair, + const RowSetCollection::Elem row_indices, + const GHistIndexBlockMatrix& gmatb, + const std::vector& feat_set, + GHistRow hist); // construct a histogram via subtraction trick void SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow parent); @@ -215,7 +270,6 @@ class GHistBuilder { /*! \brief number of all bins over all features */ size_t nbins_; std::vector data_; - std::vector stat_buf_; }; diff --git a/src/tree/fast_hist_param.h b/src/tree/fast_hist_param.h new file mode 100644 index 000000000..7bd3be9f2 --- /dev/null +++ b/src/tree/fast_hist_param.h @@ -0,0 +1,64 @@ +/*! + * Copyright 2017 by Contributors + * \file updater_fast_hist.h + * \brief parameters for histogram-based training + * \author Philip Cho, Tianqi Chen + */ +#ifndef XGBOOST_TREE_FAST_HIST_PARAM_H_ +#define XGBOOST_TREE_FAST_HIST_PARAM_H_ + +namespace xgboost { +namespace tree { + +/*! \brief training parameters for histogram-based training */ +struct FastHistParam : public dmlc::Parameter { + // integral data type to be used with columnar data storage + enum class DataType { uint8 = 1, uint16 = 2, uint32 = 4 }; + int colmat_dtype; + // percentage threshold for treating a feature as sparse + // e.g. 0.2 indicates a feature with fewer than 20% nonzeros is considered sparse + double sparse_threshold; + // use feature grouping? (default yes) + int enable_feature_grouping; + // when grouping features, how many "conflicts" to allow. + // conflict is when an instance has nonzero values for two or more features + // default is 0, meaning features should be strictly complementary + double max_conflict_rate; + // when grouping features, how much effort to expend to prevent singleton groups + // we'll try to insert each feature into existing groups before creating a new group + // for that feature; to save time, only up to (max_search_group) of existing groups + // will be considered. If set to zero, ALL existing groups will be examined + unsigned max_search_group; + + // declare the parameters + DMLC_DECLARE_PARAMETER(FastHistParam) { + DMLC_DECLARE_FIELD(colmat_dtype) + .set_default(static_cast(DataType::uint32)) + .add_enum("uint8", static_cast(DataType::uint8)) + .add_enum("uint16", static_cast(DataType::uint16)) + .add_enum("uint32", static_cast(DataType::uint32)) + .describe("Integral data type to be used with columnar data storage." + "May carry marginal performance implications. Reserved for " + "advanced use"); + DMLC_DECLARE_FIELD(sparse_threshold).set_range(0, 1.0).set_default(0.2) + .describe("percentage threshold for treating a feature as sparse"); + DMLC_DECLARE_FIELD(enable_feature_grouping).set_lower_bound(0).set_default(1) + .describe("if >0, enable feature grouping to ameliorate work imbalance " + "among worker threads"); + DMLC_DECLARE_FIELD(max_conflict_rate).set_range(0, 1.0).set_default(0) + .describe("when grouping features, how many \"conflicts\" to allow." + "conflict is when an instance has nonzero values for two or more features." + "default is 0, meaning features should be strictly complementary."); + DMLC_DECLARE_FIELD(max_search_group).set_lower_bound(0).set_default(100) + .describe("when grouping features, how much effort to expend to prevent " + "singleton groups. We'll try to insert each feature into existing " + "groups before creating a new group for that feature; to save time, " + "only up to (max_search_group) of existing groups will be " + "considered. If set to zero, ALL existing groups will be examined."); + } +}; + +} // namespace tree +} // namespace xgboost + +#endif // XGBOOST_TREE_FAST_HIST_PARAM_H_ diff --git a/src/tree/param.h b/src/tree/param.h index 12baa1c1b..8995c9ee9 100644 --- a/src/tree/param.h +++ b/src/tree/param.h @@ -30,8 +30,6 @@ struct TrainParam : public dmlc::Parameter { int max_leaves; // if using histogram based algorithm, maximum number of bins per feature int max_bin; - enum class DataType { uint8 = 1, uint16 = 2, uint32 = 4 }; - int colmat_dtype; // growing policy enum TreeGrowPolicy { kDepthWise = 0, kLossGuide = 1 }; int grow_policy; @@ -111,14 +109,6 @@ struct TrainParam : public dmlc::Parameter { "Tree growing policy. 0: favor splitting at nodes closest to the node, " "i.e. grow depth-wise. 1: favor splitting at nodes with highest loss " "change. (cf. LightGBM)"); - DMLC_DECLARE_FIELD(colmat_dtype) - .set_default(static_cast(DataType::uint32)) - .add_enum("uint8", static_cast(DataType::uint8)) - .add_enum("uint16", static_cast(DataType::uint16)) - .add_enum("uint32", static_cast(DataType::uint32)) - .describe("Integral data type to be used with columnar data storage." - "May carry marginal performance implications. Reserved for " - "advanced use"); DMLC_DECLARE_FIELD(min_child_weight) .set_lower_bound(0.0f) .set_default(1.0f) diff --git a/src/tree/updater_fast_hist.cc b/src/tree/updater_fast_hist.cc index 37ba03736..95c2142a8 100644 --- a/src/tree/updater_fast_hist.cc +++ b/src/tree/updater_fast_hist.cc @@ -13,6 +13,7 @@ #include #include #include "./param.h" +#include "./fast_hist_param.h" #include "../common/random.h" #include "../common/bitmap.h" #include "../common/sync.h" @@ -25,6 +26,7 @@ namespace tree { using xgboost::common::HistCutMatrix; using xgboost::common::GHistIndexMatrix; +using xgboost::common::GHistIndexBlockMatrix; using xgboost::common::GHistIndexRow; using xgboost::common::GHistEntry; using xgboost::common::HistCollection; @@ -36,6 +38,8 @@ using xgboost::common::Column; DMLC_REGISTRY_FILE_TAG(updater_fast_hist); +DMLC_REGISTER_PARAMETER(FastHistParam); + /*! \brief construct a tree using quantized feature values */ template class FastHistMaker: public TreeUpdater { @@ -47,6 +51,7 @@ class FastHistMaker: public TreeUpdater { } pruner_->Init(args); param.InitAllowUnknown(args); + fhparam.InitAllowUnknown(args); is_gmat_initialized_ = false; } @@ -59,7 +64,10 @@ class FastHistMaker: public TreeUpdater { hmat_.Init(dmat, param.max_bin); gmat_.cut = &hmat_; gmat_.Init(dmat); - column_matrix_.Init(gmat_, static_cast(param.colmat_dtype)); + column_matrix_.Init(gmat_, fhparam); + if (fhparam.enable_feature_grouping > 0) { + gmatb_.Init(gmat_, column_matrix_, fhparam); + } is_gmat_initialized_ = true; if (param.debug_verbose > 0) { LOG(INFO) << "Generating gmat: " << dmlc::GetTime() - tstart << " sec"; @@ -71,10 +79,10 @@ class FastHistMaker: public TreeUpdater { TConstraint::Init(¶m, dmat->info().num_col); // build tree if (!builder_) { - builder_.reset(new Builder(param, std::move(pruner_))); + builder_.reset(new Builder(param, fhparam, std::move(pruner_))); } for (size_t i = 0; i < trees.size(); ++i) { - builder_->Update(gmat_, column_matrix_, gpair, dmat, trees[i]); + builder_->Update(gmat_, gmatb_, column_matrix_, gpair, dmat, trees[i]); } param.learning_rate = lr; } @@ -91,9 +99,13 @@ class FastHistMaker: public TreeUpdater { protected: // training parameter TrainParam param; + FastHistParam fhparam; // data sketch HistCutMatrix hmat_; + // quantized data matrix GHistIndexMatrix gmat_; + // (optional) data matrix with feature grouping + GHistIndexBlockMatrix gmatb_; // column accessor ColumnMatrix column_matrix_; bool is_gmat_initialized_; @@ -136,11 +148,13 @@ class FastHistMaker: public TreeUpdater { public: // constructor explicit Builder(const TrainParam& param, + const FastHistParam& fhparam, std::unique_ptr pruner) - : param(param), pruner_(std::move(pruner)), + : param(param), fhparam(fhparam), pruner_(std::move(pruner)), p_last_tree_(nullptr), p_last_fmat_(nullptr) {} // update one tree, growing virtual void Update(const GHistIndexMatrix& gmat, + const GHistIndexBlockMatrix& gmatb, const ColumnMatrix& column_matrix, const std::vector& gpair, DMatrix* p_fmat, @@ -168,7 +182,7 @@ class FastHistMaker: public TreeUpdater { for (int nid = 0; nid < p_tree->param.num_roots; ++nid) { tstart = dmlc::GetTime(); hist_.AddHistRow(nid); - builder_.BuildHist(gpair, row_set_collection_[nid], gmat, feat_set, hist_[nid]); + BuildHist(gpair, row_set_collection_[nid], gmat, gmatb, feat_set, hist_[nid]); time_build_hist += dmlc::GetTime() - tstart; tstart = dmlc::GetTime(); @@ -203,13 +217,11 @@ class FastHistMaker: public TreeUpdater { hist_.AddHistRow(cleft); hist_.AddHistRow(cright); if (row_set_collection_[cleft].size() < row_set_collection_[cright].size()) { - builder_.BuildHist(gpair, row_set_collection_[cleft], gmat, feat_set, - hist_[cleft]); - builder_.SubtractionTrick(hist_[cright], hist_[cleft], hist_[nid]); + BuildHist(gpair, row_set_collection_[cleft], gmat, gmatb, feat_set, hist_[cleft]); + SubtractionTrick(hist_[cright], hist_[cleft], hist_[nid]); } else { - builder_.BuildHist(gpair, row_set_collection_[cright], gmat, feat_set, - hist_[cright]); - builder_.SubtractionTrick(hist_[cleft], hist_[cright], hist_[nid]); + BuildHist(gpair, row_set_collection_[cright], gmat, gmatb, feat_set, hist_[cright]); + SubtractionTrick(hist_[cleft], hist_[cright], hist_[nid]); } time_build_hist += dmlc::GetTime() - tstart; @@ -280,6 +292,23 @@ class FastHistMaker: public TreeUpdater { } } + inline void BuildHist(const std::vector& gpair, + const RowSetCollection::Elem row_indices, + const GHistIndexMatrix& gmat, + const GHistIndexBlockMatrix& gmatb, + const std::vector& feat_set, + GHistRow hist) { + if (fhparam.enable_feature_grouping > 0) { + hist_builder_.BuildBlockHist(gpair, row_indices, gmatb, feat_set, hist); + } else { + hist_builder_.BuildHist(gpair, row_indices, gmat, feat_set, hist); + } + } + + inline void SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow parent) { + hist_builder_.SubtractionTrick(self, sibling, parent); + } + inline bool UpdatePredictionCache(const DMatrix* data, std::vector* p_out_preds) { std::vector& out_preds = *p_out_preds; @@ -351,7 +380,7 @@ class FastHistMaker: public TreeUpdater { { this->nthread = omp_get_num_threads(); } - builder_.Init(this->nthread, nbins); + hist_builder_.Init(this->nthread, nbins); CHECK_EQ(info.root_index.size(), 0U); std::vector& row_indices = row_set_collection_.row_indices_; @@ -885,6 +914,7 @@ class FastHistMaker: public TreeUpdater { // --data fields-- const TrainParam& param; + const FastHistParam& fhparam; // number of omp thread used during training int nthread; // Per feature: shuffle index of each feature index @@ -904,7 +934,7 @@ class FastHistMaker: public TreeUpdater { /*! \brief local prediction cache; maps node id to leaf value */ std::vector leaf_value_cache_; - GHistBuilder builder_; + GHistBuilder hist_builder_; std::unique_ptr pruner_; // back pointers to tree and data matrix