From 5cdaac00c1ca61da3abf19ec89f3ecb0968df264 Mon Sep 17 00:00:00 2001 From: ShvetsKS <33296480+ShvetsKS@users.noreply.github.com> Date: Wed, 2 Jun 2021 23:35:26 +0300 Subject: [PATCH] Remove feature grouping (#7018) Co-authored-by: Kirill Shvets --- src/common/hist_util.cc | 326 --------------------------- src/common/hist_util.h | 45 ---- src/tree/param.h | 5 - src/tree/updater_quantile_hist.cc | 23 +- src/tree/updater_quantile_hist.h | 16 +- tests/cpp/tree/test_quantile_hist.cc | 27 +-- 6 files changed, 22 insertions(+), 420 deletions(-) diff --git a/src/common/hist_util.cc b/src/common/hist_util.cc index 75096c654..d65d19f99 100644 --- a/src/common/hist_util.cc +++ b/src/common/hist_util.cc @@ -187,267 +187,6 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_bins) { } } -template -static size_t GetConflictCount(const std::vector& mark, - const Column& column_input, - size_t max_cnt) { - size_t ret = 0; - if (column_input.GetType() == xgboost::common::kDenseColumn) { - const DenseColumn& column - = static_cast& >(column_input); - for (size_t i = 0; i < column.Size(); ++i) { - if ((!column.IsMissing(i)) && mark[i]) { - ++ret; - if (ret > max_cnt) { - return max_cnt + 1; - } - } - } - } else { - const SparseColumn& column - = static_cast& >(column_input); - for (size_t i = 0; i < column.Size(); ++i) { - if (mark[column.GetRowIdx(i)]) { - ++ret; - if (ret > max_cnt) { - return max_cnt + 1; - } - } - } - } - return ret; -} - -template -inline void -MarkUsed(std::vector* p_mark, const Column& column_input) { - std::vector& mark = *p_mark; - if (column_input.GetType() == xgboost::common::kDenseColumn) { - const DenseColumn& column - = static_cast& >(column_input); - for (size_t i = 0; i < column.Size(); ++i) { - if (!column.IsMissing(i)) { - mark[i] = true; - } - } - } else { - const SparseColumn& column - = static_cast& >(column_input); - for (size_t i = 0; i < column.Size(); ++i) { - mark[column.GetRowIdx(i)] = true; - } - } -} - -template -inline void SetGroup(const unsigned fid, const Column& column, - const size_t max_conflict_cnt, const std::vector& search_groups, - std::vector* p_group_conflict_cnt, - std::vector>* p_conflict_marks, - std::vector>* p_groups, - std::vector* p_group_nnz, const size_t cur_fid_nnz, const size_t nrow) { - bool need_new_group = true; - std::vector& group_conflict_cnt = *p_group_conflict_cnt; - std::vector>& conflict_marks = *p_conflict_marks; - std::vector>& groups = *p_groups; - std::vector& group_nnz = *p_group_nnz; - - // examine each candidate group: is it okay to insert fid? - for (auto gid : search_groups) { - const size_t rest_max_cnt = max_conflict_cnt - group_conflict_cnt[gid]; - const size_t cnt = GetConflictCount(conflict_marks[gid], column, rest_max_cnt); - if (cnt <= rest_max_cnt) { - need_new_group = false; - groups[gid].push_back(fid); - group_conflict_cnt[gid] += cnt; - group_nnz[gid] += cur_fid_nnz - cnt; - MarkUsed(&conflict_marks[gid], column); - break; - } - } - // create new group if necessary - if (need_new_group) { - groups.emplace_back(); - groups.back().push_back(fid); - group_conflict_cnt.push_back(0); - conflict_marks.emplace_back(nrow, false); - MarkUsed(&conflict_marks.back(), column); - group_nnz.emplace_back(cur_fid_nnz); - } -} - -inline std::vector> -FindGroups(const std::vector& feature_list, - const std::vector& feature_nnz, - const ColumnMatrix& colmat, - size_t nrow, - const tree::TrainParam& param) { - /* Goal: Bundle features together that has little or no "overlap", i.e. - only a few data points should have nonzero values for - member features. - Note that one-hot encoded features will be grouped together. */ - - std::vector> groups; - std::vector> conflict_marks; - std::vector group_nnz; - std::vector group_conflict_cnt; - const auto max_conflict_cnt - = static_cast(param.max_conflict_rate * nrow); - - for (auto fid : feature_list) { - const size_t cur_fid_nnz = feature_nnz[fid]; - - // randomly choose some of existing groups as candidates - std::vector search_groups; - for (size_t gid = 0; gid < groups.size(); ++gid) { - if (group_nnz[gid] + cur_fid_nnz <= nrow + max_conflict_cnt) { - search_groups.push_back(gid); - } - } - std::shuffle(search_groups.begin(), search_groups.end(), common::GlobalRandom()); - if (param.max_search_group > 0 && search_groups.size() > param.max_search_group) { - search_groups.resize(param.max_search_group); - } - - BinTypeSize bins_type_size = colmat.GetTypeSize(); - if (bins_type_size == kUint8BinsTypeSize) { - const auto column = colmat.GetColumn(fid); - SetGroup(fid, *(column.get()), max_conflict_cnt, search_groups, - &group_conflict_cnt, &conflict_marks, &groups, &group_nnz, cur_fid_nnz, nrow); - } else if (bins_type_size == kUint16BinsTypeSize) { - const auto column = colmat.GetColumn(fid); - SetGroup(fid, *(column.get()), max_conflict_cnt, search_groups, - &group_conflict_cnt, &conflict_marks, &groups, &group_nnz, cur_fid_nnz, nrow); - } else { - CHECK_EQ(bins_type_size, kUint32BinsTypeSize); - const auto column = colmat.GetColumn(fid); - SetGroup(fid, *(column.get()), max_conflict_cnt, search_groups, - &group_conflict_cnt, &conflict_marks, &groups, &group_nnz, cur_fid_nnz, nrow); - } - } - return groups; -} - -inline std::vector> -FastFeatureGrouping(const GHistIndexMatrix& gmat, - const ColumnMatrix& colmat, - const tree::TrainParam& param) { - const size_t nrow = gmat.row_ptr.size() - 1; - const size_t nfeature = gmat.cut.Ptrs().size() - 1; - - std::vector feature_list(nfeature); - std::iota(feature_list.begin(), feature_list.end(), 0); - - // sort features by nonzero counts, descending order - std::vector feature_nnz(nfeature); - std::vector features_by_nnz(feature_list); - gmat.GetFeatureCounts(&feature_nnz[0]); - std::sort(features_by_nnz.begin(), features_by_nnz.end(), - [&feature_nnz](unsigned a, unsigned b) { - return feature_nnz[a] > feature_nnz[b]; - }); - - auto groups_alt1 = FindGroups(feature_list, feature_nnz, colmat, nrow, param); - auto groups_alt2 = FindGroups(features_by_nnz, feature_nnz, colmat, nrow, param); - auto& groups = (groups_alt1.size() > groups_alt2.size()) ? groups_alt2 : groups_alt1; - - // take apart small, sparse groups, as it won't help speed - { - std::vector> ret; - for (const auto& group : groups) { - if (group.size() <= 1 || group.size() >= 5) { - ret.push_back(group); // keep singleton groups and large (5+) groups - } else { - size_t nnz = 0; - for (auto fid : group) { - nnz += feature_nnz[fid]; - } - double nnz_rate = static_cast(nnz) / nrow; - // take apart small sparse group, due it will not gain on speed - if (nnz_rate <= param.sparse_threshold) { - for (auto fid : group) { - ret.emplace_back(); - ret.back().push_back(fid); - } - } else { - ret.push_back(group); - } - } - } - groups = std::move(ret); - } - - // shuffle groups - std::shuffle(groups.begin(), groups.end(), common::GlobalRandom()); - - return groups; -} - -void GHistIndexBlockMatrix::Init(const GHistIndexMatrix& gmat, - const ColumnMatrix& colmat, - const tree::TrainParam& param) { - cut_ = &gmat.cut; - - const size_t nrow = gmat.row_ptr.size() - 1; - const uint32_t nbins = gmat.cut.Ptrs().back(); - - /* step 1: form feature groups */ - auto groups = FastFeatureGrouping(gmat, colmat, param); - const auto nblock = static_cast(groups.size()); - - /* step 2: build a new CSR matrix for each feature group */ - std::vector bin2block(nbins); // lookup table [bin id] => [block id] - for (uint32_t group_id = 0; group_id < nblock; ++group_id) { - for (auto& fid : groups[group_id]) { - const uint32_t bin_begin = gmat.cut.Ptrs()[fid]; - const uint32_t bin_end = gmat.cut.Ptrs()[fid + 1]; - for (uint32_t bin_id = bin_begin; bin_id < bin_end; ++bin_id) { - bin2block[bin_id] = group_id; - } - } - } - - std::vector> index_temp(nblock); - std::vector> row_ptr_temp(nblock); - for (uint32_t block_id = 0; block_id < nblock; ++block_id) { - row_ptr_temp[block_id].push_back(0); - } - for (size_t rid = 0; rid < nrow; ++rid) { - const size_t ibegin = gmat.row_ptr[rid]; - const size_t iend = gmat.row_ptr[rid + 1]; - for (size_t j = ibegin; j < iend; ++j) { - const uint32_t bin_id = gmat.index[j]; - const uint32_t block_id = bin2block[bin_id]; - index_temp[block_id].push_back(bin_id); - } - for (uint32_t block_id = 0; block_id < nblock; ++block_id) { - row_ptr_temp[block_id].push_back(index_temp[block_id].size()); - } - } - - /* step 3: concatenate CSR matrices into one (index, row_ptr) pair */ - std::vector index_blk_ptr; - std::vector row_ptr_blk_ptr; - index_blk_ptr.push_back(0); - row_ptr_blk_ptr.push_back(0); - for (uint32_t block_id = 0; block_id < nblock; ++block_id) { - index_.insert(index_.end(), index_temp[block_id].begin(), index_temp[block_id].end()); - row_ptr_.insert(row_ptr_.end(), row_ptr_temp[block_id].begin(), row_ptr_temp[block_id].end()); - index_blk_ptr.push_back(index_.size()); - row_ptr_blk_ptr.push_back(row_ptr_.size()); - } - - // save shortcut for each block - for (uint32_t block_id = 0; block_id < nblock; ++block_id) { - Block blk; - blk.index_begin = &index_[index_blk_ptr[block_id]]; - blk.row_ptr_begin = &row_ptr_[row_ptr_blk_ptr[block_id]]; - blk.index_end = &index_[index_blk_ptr[block_id + 1]]; - blk.row_ptr_end = &row_ptr_[row_ptr_blk_ptr[block_id + 1]]; - blocks_.push_back(blk); - } -} - /*! * \brief fill a histogram by zeros in range [begin, end) */ @@ -703,71 +442,6 @@ void GHistBuilder::BuildHist(const std::vector& gpair, GHistRow hist, bool isDense); -template -void GHistBuilder::BuildBlockHist(const std::vector& gpair, - const RowSetCollection::Elem row_indices, - const GHistIndexBlockMatrix& gmatb, - GHistRowT hist) { - static constexpr int kUnroll = 8; // loop unrolling factor - const size_t nblock = gmatb.GetNumBlock(); - const size_t nrows = row_indices.end - row_indices.begin; - const size_t rest = nrows % kUnroll; -#if defined(_OPENMP) - const auto nthread = static_cast(this->nthread_); // NOLINT -#endif // defined(_OPENMP) - xgboost::detail::GradientPairInternal* p_hist = hist.data(); - - dmlc::OMPException exc; -#pragma omp parallel for num_threads(nthread) schedule(guided) - for (bst_omp_uint bid = 0; bid < nblock; ++bid) { - exc.Run([&]() { - auto gmat = gmatb[bid]; - - for (size_t i = 0; i < nrows - rest; i += kUnroll) { - size_t rid[kUnroll]; - size_t ibegin[kUnroll]; - size_t iend[kUnroll]; - GradientPair stat[kUnroll]; - - for (int k = 0; k < kUnroll; ++k) { - rid[k] = row_indices.begin[i + k]; - ibegin[k] = gmat.row_ptr[rid[k]]; - iend[k] = gmat.row_ptr[rid[k] + 1]; - stat[k] = gpair[rid[k]]; - } - for (int k = 0; k < kUnroll; ++k) { - for (size_t j = ibegin[k]; j < iend[k]; ++j) { - const uint32_t bin = gmat.index[j]; - p_hist[bin].Add(stat[k].GetGrad(), stat[k].GetHess()); - } - } - } - for (size_t i = nrows - rest; i < nrows; ++i) { - const size_t rid = row_indices.begin[i]; - const size_t ibegin = gmat.row_ptr[rid]; - const size_t iend = gmat.row_ptr[rid + 1]; - const GradientPair stat = gpair[rid]; - for (size_t j = ibegin; j < iend; ++j) { - const uint32_t bin = gmat.index[j]; - p_hist[bin].Add(stat.GetGrad(), stat.GetHess()); - } - } - }); - } - exc.Rethrow(); -} -template -void GHistBuilder::BuildBlockHist(const std::vector& gpair, - const RowSetCollection::Elem row_indices, - const GHistIndexBlockMatrix& gmatb, - GHistRow hist); -template -void GHistBuilder::BuildBlockHist(const std::vector& gpair, - const RowSetCollection::Elem row_indices, - const GHistIndexBlockMatrix& gmatb, - GHistRow hist); - - template void GHistBuilder::SubtractionTrick(GHistRowT self, GHistRowT sibling, diff --git a/src/common/hist_util.h b/src/common/hist_util.h index abfbbebcd..75cbe14d0 100644 --- a/src/common/hist_util.h +++ b/src/common/hist_util.h @@ -321,48 +321,8 @@ int32_t XGBOOST_HOST_DEV_INLINE BinarySearchBin(bst_uint begin, bst_uint end, return -1; } -struct GHistIndexBlock { - const size_t* row_ptr; - const uint32_t* index; - - inline GHistIndexBlock(const size_t* row_ptr, const uint32_t* index) - : row_ptr(row_ptr), index(index) {} - - // get i-th row - inline GHistIndexRow operator[](size_t i) const { - return {&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]}; - } -}; - class ColumnMatrix; -class GHistIndexBlockMatrix { - public: - void Init(const GHistIndexMatrix& gmat, - const ColumnMatrix& colmat, - const tree::TrainParam& param); - - inline GHistIndexBlock operator[](size_t i) const { - return {blocks_[i].row_ptr_begin, blocks_[i].index_begin}; - } - - inline size_t GetNumBlock() const { - return blocks_.size(); - } - - private: - std::vector row_ptr_; - std::vector index_; - const HistogramCuts* cut_; - struct Block { - const size_t* row_ptr_begin; - const size_t* row_ptr_end; - const uint32_t* index_begin; - const uint32_t* index_end; - }; - std::vector blocks_; -}; - template using GHistRow = Span >; @@ -672,11 +632,6 @@ class GHistBuilder { const GHistIndexMatrix& gmat, GHistRowT hist, bool isDense); - // same, with feature grouping - void BuildBlockHist(const std::vector& gpair, - const RowSetCollection::Elem row_indices, - const GHistIndexBlockMatrix& gmatb, - GHistRowT hist); // construct a histogram via subtraction trick void SubtractionTrick(GHistRowT self, GHistRowT sibling, diff --git a/src/tree/param.h b/src/tree/param.h index 0367dd2de..bebf5b8d6 100644 --- a/src/tree/param.h +++ b/src/tree/param.h @@ -80,8 +80,6 @@ struct TrainParam : public XGBoostParameter { // percentage threshold for treating a feature as sparse // e.g. 0.2 indicates a feature with fewer than 20% nonzeros is considered sparse double sparse_threshold; - // use feature grouping? (default yes) - int enable_feature_grouping; // when grouping features, how many "conflicts" to allow. // conflict is when an instance has nonzero values for two or more features // default is 0, meaning features should be strictly complementary @@ -199,9 +197,6 @@ struct TrainParam : public XGBoostParameter { // ------ From cpu quantile histogram -------. DMLC_DECLARE_FIELD(sparse_threshold).set_range(0, 1.0).set_default(0.2) .describe("percentage threshold for treating a feature as sparse"); - DMLC_DECLARE_FIELD(enable_feature_grouping).set_lower_bound(0).set_default(0) - .describe("if >0, enable feature grouping to ameliorate work imbalance " - "among worker threads"); DMLC_DECLARE_FIELD(max_conflict_rate).set_range(0, 1.0).set_default(0) .describe("when grouping features, how many \"conflicts\" to allow." "conflict is when an instance has nonzero values for two or more features." diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 62080c5c8..a81c10965 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -71,7 +71,7 @@ void QuantileHistMaker::CallBuilderUpdate(const std::unique_ptr &trees) { for (auto tree : trees) { - builder->Update(gmat_, gmatb_, column_matrix_, gpair, dmat, tree); + builder->Update(gmat_, column_matrix_, gpair, dmat, tree); } } void QuantileHistMaker::Update(HostDeviceVector *gpair, @@ -81,9 +81,6 @@ void QuantileHistMaker::Update(HostDeviceVector *gpair, updater_monitor_.Start("GmatInitialization"); gmat_.Init(dmat, static_cast(param_.max_bin)); column_matrix_.Init(gmat_, param_.sparse_threshold); - if (param_.enable_feature_grouping > 0) { - gmatb_.Init(gmat_, column_matrix_, param_); - } updater_monitor_.Stop("GmatInitialization"); // A proper solution is puting cut matrix in DMatrix, see: // https://github.com/dmlc/xgboost/issues/5143 @@ -295,7 +292,6 @@ void QuantileHistMaker::Builder::SetHistRowsAdder( template void QuantileHistMaker::Builder::InitRoot( const GHistIndexMatrix &gmat, - const GHistIndexBlockMatrix &gmatb, const DMatrix& fmat, RegTree *p_tree, const std::vector &gpair_h, @@ -311,7 +307,7 @@ void QuantileHistMaker::Builder::InitRoot( int sync_count = 0; hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, p_tree); - BuildLocalHistograms(gmat, gmatb, p_tree, gpair_h); + BuildLocalHistograms(gmat, p_tree, gpair_h); hist_synchronizer_->SyncHistograms(this, starting_index, sync_count, p_tree); this->InitNewNode(CPUExpandEntry::kRootNid, gmat, gpair_h, fmat, *p_tree); @@ -325,7 +321,6 @@ void QuantileHistMaker::Builder::InitRoot( template void QuantileHistMaker::Builder::BuildLocalHistograms( const GHistIndexMatrix &gmat, - const GHistIndexBlockMatrix &gmatb, RegTree *p_tree, const std::vector &gpair_h) { builder_monitor_.Start("BuildLocalHistograms"); @@ -355,7 +350,7 @@ void QuantileHistMaker::Builder::BuildLocalHistograms( auto rid_set = RowSetCollection::Elem(start_of_row_set + r.begin(), start_of_row_set + r.end(), nid); - BuildHist(gpair_h, rid_set, gmat, gmatb, hist_buffer_.GetInitializedHist(tid, nid_in_set)); + BuildHist(gpair_h, rid_set, gmat, hist_buffer_.GetInitializedHist(tid, nid_in_set)); }); builder_monitor_.Stop("BuildLocalHistograms"); @@ -446,7 +441,6 @@ void QuantileHistMaker::Builder::BuildNodeStats( template void QuantileHistMaker::Builder::ExpandTree( const GHistIndexMatrix& gmat, - const GHistIndexBlockMatrix& gmatb, const ColumnMatrix& column_matrix, DMatrix* p_fmat, RegTree* p_tree, @@ -456,7 +450,7 @@ void QuantileHistMaker::Builder::ExpandTree( Driver driver(static_cast(param_.grow_policy)); std::vector expand; - InitRoot(gmat, gmatb, *p_fmat, p_tree, gpair_h, &num_leaves, &expand); + InitRoot(gmat, *p_fmat, p_tree, gpair_h, &num_leaves, &expand); driver.Push(expand[0]); int depth = 0; @@ -478,7 +472,7 @@ void QuantileHistMaker::Builder::ExpandTree( int sync_count = 0; hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, p_tree); if (depth < param_.max_depth) { - BuildLocalHistograms(gmat, gmatb, p_tree, gpair_h); + BuildLocalHistograms(gmat, p_tree, gpair_h); hist_synchronizer_->SyncHistograms(this, starting_index, sync_count, p_tree); } @@ -506,8 +500,9 @@ void QuantileHistMaker::Builder::ExpandTree( template void QuantileHistMaker::Builder::Update( - const GHistIndexMatrix &gmat, const GHistIndexBlockMatrix &gmatb, - const ColumnMatrix &column_matrix, HostDeviceVector *gpair, + const GHistIndexMatrix &gmat, + const ColumnMatrix &column_matrix, + HostDeviceVector *gpair, DMatrix *p_fmat, RegTree *p_tree) { builder_monitor_.Start("Update"); @@ -525,7 +520,7 @@ void QuantileHistMaker::Builder::Update( this->InitData(gmat, *p_fmat, *p_tree, gpair_ptr); - ExpandTree(gmat, gmatb, column_matrix, p_fmat, p_tree, *gpair_ptr); + ExpandTree(gmat, column_matrix, p_fmat, p_tree, *gpair_ptr); for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) { p_tree->Stat(nid).loss_chg = snode_[nid].best.loss_chg; diff --git a/src/tree/updater_quantile_hist.h b/src/tree/updater_quantile_hist.h index 2e848f739..dae0f845c 100644 --- a/src/tree/updater_quantile_hist.h +++ b/src/tree/updater_quantile_hist.h @@ -111,7 +111,6 @@ class MemStackAllocator { namespace tree { using xgboost::common::GHistIndexMatrix; -using xgboost::common::GHistIndexBlockMatrix; using xgboost::common::GHistIndexRow; using xgboost::common::HistCollection; using xgboost::common::RowSetCollection; @@ -245,8 +244,6 @@ class QuantileHistMaker: public TreeUpdater { TrainParam param_; // quantized data matrix GHistIndexMatrix gmat_; - // (optional) data matrix with feature grouping - GHistIndexBlockMatrix gmatb_; // column accessor ColumnMatrix column_matrix_; DMatrix const* p_last_dmat_ {nullptr}; @@ -289,7 +286,6 @@ class QuantileHistMaker: public TreeUpdater { } // update one tree, growing virtual void Update(const GHistIndexMatrix& gmat, - const GHistIndexBlockMatrix& gmatb, const ColumnMatrix& column_matrix, HostDeviceVector* gpair, DMatrix* p_fmat, @@ -298,14 +294,9 @@ class QuantileHistMaker: public TreeUpdater { inline void BuildHist(const std::vector& gpair, const RowSetCollection::Elem row_indices, const GHistIndexMatrix& gmat, - const GHistIndexBlockMatrix& gmatb, GHistRowT hist) { - if (param_.enable_feature_grouping > 0) { - hist_builder_.BuildBlockHist(gpair, row_indices, gmatb, hist); - } else { - hist_builder_.BuildHist(gpair, row_indices, gmat, hist, - data_layout_ != DataLayout::kSparseData); - } + hist_builder_.BuildHist(gpair, row_indices, gmat, hist, + data_layout_ != DataLayout::kSparseData); } inline void SubtractionTrick(GHistRowT self, @@ -386,12 +377,10 @@ class QuantileHistMaker: public TreeUpdater { bool SplitContainsMissingValues(const GradStats e, const NodeEntry& snode); void BuildLocalHistograms(const GHistIndexMatrix &gmat, - const GHistIndexBlockMatrix &gmatb, RegTree *p_tree, const std::vector &gpair_h); void InitRoot(const GHistIndexMatrix &gmat, - const GHistIndexBlockMatrix &gmatb, const DMatrix& fmat, RegTree *p_tree, const std::vector &gpair_h, @@ -415,7 +404,6 @@ class QuantileHistMaker: public TreeUpdater { const std::vector& nodes_for_apply_split, RegTree *p_tree); void ExpandTree(const GHistIndexMatrix& gmat, - const GHistIndexBlockMatrix& gmatb, const ColumnMatrix& column_matrix, DMatrix* p_fmat, RegTree* p_tree, diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc index bee6d4ed1..0009cd238 100644 --- a/tests/cpp/tree/test_quantile_hist.cc +++ b/tests/cpp/tree/test_quantile_hist.cc @@ -307,11 +307,10 @@ class QuantileHistMock : public QuantileHistMaker { { {0.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {0.27f, 0.28f}, {0.27f, 0.29f}, {0.37f, 0.39f}, {0.47f, 0.49f}, {0.57f, 0.59f} }; RealImpl::InitData(gmat, fmat, tree, &gpair); - GHistIndexBlockMatrix dummy; this->hist_.AddHistRow(nid); this->hist_.AllocateAllData(); this->BuildHist(gpair, this->row_set_collection_[nid], - gmat, dummy, this->hist_[nid]); + gmat, this->hist_[nid]); // Check if number of histogram bins is correct ASSERT_EQ(this->hist_[nid].size(), gmat.cut.Ptrs().back()); @@ -337,8 +336,7 @@ class QuantileHistMock : public QuantileHistMaker { } } - void TestEvaluateSplit(const GHistIndexBlockMatrix& quantile_index_block, - const RegTree& tree) { + void TestEvaluateSplit(const RegTree& tree) { std::vector row_gpairs = { {1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f}, {0.27f, 0.29f}, {0.37f, 0.39f}, {-0.47f, 0.49f}, {0.57f, 0.59f} }; @@ -353,7 +351,7 @@ class QuantileHistMock : public QuantileHistMaker { this->hist_.AddHistRow(0); this->hist_.AllocateAllData(); this->BuildHist(row_gpairs, this->row_set_collection_[0], - gmat, quantile_index_block, this->hist_[0]); + gmat, this->hist_[0]); RealImpl::InitNewNode(0, gmat, row_gpairs, *dmat, tree); @@ -419,15 +417,13 @@ class QuantileHistMock : public QuantileHistMaker { ASSERT_EQ(this->snode_[0].best.split_value, gmat.cut.Values()[best_split_threshold]); } - void TestEvaluateSplitParallel(const GHistIndexBlockMatrix &quantile_index_block, - const RegTree &tree) { + void TestEvaluateSplitParallel(const RegTree &tree) { omp_set_num_threads(2); - TestEvaluateSplit(quantile_index_block, tree); + TestEvaluateSplit(tree); omp_set_num_threads(1); } - void TestApplySplit(const GHistIndexBlockMatrix& quantile_index_block, - const RegTree& tree) { + void TestApplySplit(const RegTree& tree) { std::vector row_gpairs = { {1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f}, {0.27f, 0.29f}, {0.37f, 0.39f}, {-0.47f, 0.49f}, {0.57f, 0.59f} }; @@ -632,9 +628,9 @@ class QuantileHistMock : public QuantileHistMaker { RegTree tree = RegTree(); tree.param.UpdateAllowUnknown(cfg_); if (double_builder_) { - double_builder_->TestEvaluateSplit(gmatb_, tree); + double_builder_->TestEvaluateSplit(tree); } else { - float_builder_->TestEvaluateSplit(gmatb_, tree); + float_builder_->TestEvaluateSplit(tree); } } @@ -642,9 +638,9 @@ class QuantileHistMock : public QuantileHistMaker { RegTree tree = RegTree(); tree.param.UpdateAllowUnknown(cfg_); if (double_builder_) { - double_builder_->TestApplySplit(gmatb_, tree); + double_builder_->TestApplySplit(tree); } else { - float_builder_->TestEvaluateSplit(gmatb_, tree); + float_builder_->TestEvaluateSplit(tree); } } }; @@ -714,8 +710,7 @@ TEST(QuantileHist, DistributedSyncHistograms) { TEST(QuantileHist, BuildHist) { // Don't enable feature grouping std::vector> cfg - {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}, - {"enable_feature_grouping", std::to_string(0)}}; + {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}}; QuantileHistMock maker(cfg); maker.TestBuildHist(); const bool single_precision_histogram = true;