Remove feature grouping (#7018)

Co-authored-by: Kirill Shvets <kirill.shvets@intel.com>
2021-06-02 23:35:26 +03:00 · 2021-06-02 23:35:26 +03:00 · 5cdaac00c1
commit 5cdaac00c1
parent 05db6a6c29
6 changed files with 22 additions and 420 deletions
--- a/src/common/hist_util.cc
+++ b/src/common/hist_util.cc
@ -187,267 +187,6 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_bins) {
  }
 }

-template <typename BinIdxType>
-static size_t GetConflictCount(const std::vector<bool>& mark,
-                               const Column<BinIdxType>& column_input,
-                               size_t max_cnt) {
-  size_t ret = 0;
-  if (column_input.GetType() == xgboost::common::kDenseColumn) {
-    const DenseColumn<BinIdxType>& column
-      = static_cast<const DenseColumn<BinIdxType>& >(column_input);
-    for (size_t i = 0; i < column.Size(); ++i) {
-      if ((!column.IsMissing(i)) && mark[i]) {
-        ++ret;
-        if (ret > max_cnt) {
-          return max_cnt + 1;
-        }
-      }
-    }
-  } else {
-    const SparseColumn<BinIdxType>& column
-      = static_cast<const SparseColumn<BinIdxType>& >(column_input);
-    for (size_t i = 0; i < column.Size(); ++i) {
-      if (mark[column.GetRowIdx(i)]) {
-        ++ret;
-        if (ret > max_cnt) {
-          return max_cnt + 1;
-        }
-      }
-    }
-  }
-  return ret;
-}
-
-template <typename BinIdxType>
-inline void
-MarkUsed(std::vector<bool>* p_mark, const Column<BinIdxType>& column_input) {
-  std::vector<bool>& mark = *p_mark;
-  if (column_input.GetType() == xgboost::common::kDenseColumn) {
-    const DenseColumn<BinIdxType>& column
-      = static_cast<const DenseColumn<BinIdxType>& >(column_input);
-    for (size_t i = 0; i < column.Size(); ++i) {
-      if (!column.IsMissing(i)) {
-        mark[i] = true;
-      }
-    }
-  } else {
-    const SparseColumn<BinIdxType>& column
-      = static_cast<const SparseColumn<BinIdxType>& >(column_input);
-    for (size_t i = 0; i < column.Size(); ++i) {
-      mark[column.GetRowIdx(i)] = true;
-    }
-  }
-}
-
-template <typename BinIdxType>
-inline void SetGroup(const unsigned fid, const Column<BinIdxType>& column,
-  const size_t max_conflict_cnt, const std::vector<size_t>& search_groups,
-  std::vector<size_t>* p_group_conflict_cnt,
-  std::vector<std::vector<bool>>* p_conflict_marks,
-  std::vector<std::vector<unsigned>>* p_groups,
-  std::vector<size_t>* p_group_nnz, const size_t cur_fid_nnz, const size_t nrow) {
-  bool need_new_group = true;
-  std::vector<size_t>& group_conflict_cnt = *p_group_conflict_cnt;
-  std::vector<std::vector<bool>>& conflict_marks = *p_conflict_marks;
-  std::vector<std::vector<unsigned>>& groups = *p_groups;
-  std::vector<size_t>& group_nnz = *p_group_nnz;
-
-  // examine each candidate group: is it okay to insert fid?
-  for (auto gid : search_groups) {
-    const size_t rest_max_cnt = max_conflict_cnt - group_conflict_cnt[gid];
-    const size_t cnt = GetConflictCount(conflict_marks[gid], column, rest_max_cnt);
-    if (cnt <= rest_max_cnt) {
-      need_new_group = false;
-      groups[gid].push_back(fid);
-      group_conflict_cnt[gid] += cnt;
-      group_nnz[gid] += cur_fid_nnz - cnt;
-      MarkUsed(&conflict_marks[gid], column);
-      break;
-    }
-  }
-  // create new group if necessary
-  if (need_new_group) {
-    groups.emplace_back();
-    groups.back().push_back(fid);
-    group_conflict_cnt.push_back(0);
-    conflict_marks.emplace_back(nrow, false);
-    MarkUsed(&conflict_marks.back(), column);
-    group_nnz.emplace_back(cur_fid_nnz);
-  }
-}
-
-inline std::vector<std::vector<unsigned>>
-FindGroups(const std::vector<unsigned>& feature_list,
-           const std::vector<size_t>& feature_nnz,
-           const ColumnMatrix& colmat,
-           size_t nrow,
-           const tree::TrainParam& param) {
-  /* Goal: Bundle features together that has little or no "overlap", i.e.
-           only a few data points should have nonzero values for
-           member features.
-           Note that one-hot encoded features will be grouped together. */
-
-  std::vector<std::vector<unsigned>> groups;
-  std::vector<std::vector<bool>> conflict_marks;
-  std::vector<size_t> group_nnz;
-  std::vector<size_t> group_conflict_cnt;
-  const auto max_conflict_cnt
-    = static_cast<size_t>(param.max_conflict_rate * nrow);
-
-  for (auto fid : feature_list) {
-    const size_t cur_fid_nnz = feature_nnz[fid];
-
-    // randomly choose some of existing groups as candidates
-    std::vector<size_t> search_groups;
-    for (size_t gid = 0; gid < groups.size(); ++gid) {
-      if (group_nnz[gid] + cur_fid_nnz <= nrow + max_conflict_cnt) {
-        search_groups.push_back(gid);
-      }
-    }
-    std::shuffle(search_groups.begin(), search_groups.end(), common::GlobalRandom());
-    if (param.max_search_group > 0 && search_groups.size() > param.max_search_group) {
-      search_groups.resize(param.max_search_group);
-    }
-
-    BinTypeSize bins_type_size = colmat.GetTypeSize();
-    if (bins_type_size == kUint8BinsTypeSize) {
-        const auto column = colmat.GetColumn<uint8_t>(fid);
-        SetGroup(fid, *(column.get()), max_conflict_cnt, search_groups,
-                 &group_conflict_cnt, &conflict_marks, &groups, &group_nnz, cur_fid_nnz, nrow);
-    } else if (bins_type_size == kUint16BinsTypeSize) {
-        const auto column = colmat.GetColumn<uint16_t>(fid);
-        SetGroup(fid, *(column.get()), max_conflict_cnt, search_groups,
-                 &group_conflict_cnt, &conflict_marks, &groups, &group_nnz, cur_fid_nnz, nrow);
-    } else {
-        CHECK_EQ(bins_type_size, kUint32BinsTypeSize);
-        const auto column = colmat.GetColumn<uint32_t>(fid);
-        SetGroup(fid, *(column.get()), max_conflict_cnt, search_groups,
-                 &group_conflict_cnt, &conflict_marks, &groups, &group_nnz, cur_fid_nnz, nrow);
-    }
-  }
-  return groups;
-}
-
-inline std::vector<std::vector<unsigned>>
-FastFeatureGrouping(const GHistIndexMatrix& gmat,
-                    const ColumnMatrix& colmat,
-                    const tree::TrainParam& param) {
-  const size_t nrow = gmat.row_ptr.size() - 1;
-  const size_t nfeature = gmat.cut.Ptrs().size() - 1;
-
-  std::vector<unsigned> feature_list(nfeature);
-  std::iota(feature_list.begin(), feature_list.end(), 0);
-
-  // sort features by nonzero counts, descending order
-  std::vector<size_t> feature_nnz(nfeature);
-  std::vector<unsigned> features_by_nnz(feature_list);
-  gmat.GetFeatureCounts(&feature_nnz[0]);
-  std::sort(features_by_nnz.begin(), features_by_nnz.end(),
-            [&feature_nnz](unsigned a, unsigned b) {
-    return feature_nnz[a] > feature_nnz[b];
-  });
-
-  auto groups_alt1 = FindGroups(feature_list, feature_nnz, colmat, nrow, param);
-  auto groups_alt2 = FindGroups(features_by_nnz, feature_nnz, colmat, nrow, param);
-  auto& groups = (groups_alt1.size() > groups_alt2.size()) ? groups_alt2 : groups_alt1;
-
-  // take apart small, sparse groups, as it won't help speed
-  {
-    std::vector<std::vector<unsigned>> ret;
-    for (const auto& group : groups) {
-      if (group.size() <= 1 || group.size() >= 5) {
-        ret.push_back(group);  // keep singleton groups and large (5+) groups
-      } else {
-        size_t nnz = 0;
-        for (auto fid : group) {
-          nnz += feature_nnz[fid];
-        }
-        double nnz_rate = static_cast<double>(nnz) / nrow;
-        // take apart small sparse group, due it will not gain on speed
-        if (nnz_rate <= param.sparse_threshold) {
-          for (auto fid : group) {
-            ret.emplace_back();
-            ret.back().push_back(fid);
-          }
-        } else {
-          ret.push_back(group);
-        }
-      }
-    }
-    groups = std::move(ret);
-  }
-
-  // shuffle groups
-  std::shuffle(groups.begin(), groups.end(), common::GlobalRandom());
-
-  return groups;
-}
-
-void GHistIndexBlockMatrix::Init(const GHistIndexMatrix& gmat,
-                                 const ColumnMatrix& colmat,
-                                 const tree::TrainParam& param) {
-  cut_ = &gmat.cut;
-
-  const size_t nrow = gmat.row_ptr.size() - 1;
-  const uint32_t nbins = gmat.cut.Ptrs().back();
-
-  /* step 1: form feature groups */
-  auto groups = FastFeatureGrouping(gmat, colmat, param);
-  const auto nblock = static_cast<uint32_t>(groups.size());
-
-  /* step 2: build a new CSR matrix for each feature group */
-  std::vector<uint32_t> bin2block(nbins);  // lookup table [bin id] => [block id]
-  for (uint32_t group_id = 0; group_id < nblock; ++group_id) {
-    for (auto& fid : groups[group_id]) {
-      const uint32_t bin_begin = gmat.cut.Ptrs()[fid];
-      const uint32_t bin_end = gmat.cut.Ptrs()[fid + 1];
-      for (uint32_t bin_id = bin_begin; bin_id < bin_end; ++bin_id) {
-        bin2block[bin_id] = group_id;
-      }
-    }
-  }
-
-  std::vector<std::vector<uint32_t>> index_temp(nblock);
-  std::vector<std::vector<size_t>> row_ptr_temp(nblock);
-  for (uint32_t block_id = 0; block_id < nblock; ++block_id) {
-    row_ptr_temp[block_id].push_back(0);
-  }
-  for (size_t rid = 0; rid < nrow; ++rid) {
-    const size_t ibegin = gmat.row_ptr[rid];
-    const size_t iend = gmat.row_ptr[rid + 1];
-    for (size_t j = ibegin; j < iend; ++j) {
-      const uint32_t bin_id = gmat.index[j];
-      const uint32_t block_id = bin2block[bin_id];
-      index_temp[block_id].push_back(bin_id);
-    }
-    for (uint32_t block_id = 0; block_id < nblock; ++block_id) {
-      row_ptr_temp[block_id].push_back(index_temp[block_id].size());
-    }
-  }
-
-  /* step 3: concatenate CSR matrices into one (index, row_ptr) pair */
-  std::vector<size_t> index_blk_ptr;
-  std::vector<size_t> row_ptr_blk_ptr;
-  index_blk_ptr.push_back(0);
-  row_ptr_blk_ptr.push_back(0);
-  for (uint32_t block_id = 0; block_id < nblock; ++block_id) {
-    index_.insert(index_.end(), index_temp[block_id].begin(), index_temp[block_id].end());
-    row_ptr_.insert(row_ptr_.end(), row_ptr_temp[block_id].begin(), row_ptr_temp[block_id].end());
-    index_blk_ptr.push_back(index_.size());
-    row_ptr_blk_ptr.push_back(row_ptr_.size());
-  }
-
-  // save shortcut for each block
-  for (uint32_t block_id = 0; block_id < nblock; ++block_id) {
-    Block blk;
-    blk.index_begin = &index_[index_blk_ptr[block_id]];
-    blk.row_ptr_begin = &row_ptr_[row_ptr_blk_ptr[block_id]];
-    blk.index_end = &index_[index_blk_ptr[block_id + 1]];
-    blk.row_ptr_end = &row_ptr_[row_ptr_blk_ptr[block_id + 1]];
-    blocks_.push_back(blk);
-  }
-}
-
 /*!
 * \brief fill a histogram by zeros in range [begin, end)
 */
@ -703,71 +442,6 @@ void GHistBuilder<double>::BuildHist(const std::vector<GradientPair>& gpair,
                             GHistRow<double> hist,
                             bool isDense);

-template<typename GradientSumT>
-void GHistBuilder<GradientSumT>::BuildBlockHist(const std::vector<GradientPair>& gpair,
-                                  const RowSetCollection::Elem row_indices,
-                                  const GHistIndexBlockMatrix& gmatb,
-                                  GHistRowT hist) {
-  static constexpr int kUnroll = 8;  // loop unrolling factor
-  const size_t nblock = gmatb.GetNumBlock();
-  const size_t nrows = row_indices.end - row_indices.begin;
-  const size_t rest = nrows % kUnroll;
-#if defined(_OPENMP)
-  const auto nthread = static_cast<bst_omp_uint>(this->nthread_);  // NOLINT
-#endif  // defined(_OPENMP)
-  xgboost::detail::GradientPairInternal<GradientSumT>* p_hist = hist.data();
-
-  dmlc::OMPException exc;
-#pragma omp parallel for num_threads(nthread) schedule(guided)
-  for (bst_omp_uint bid = 0; bid < nblock; ++bid) {
-    exc.Run([&]() {
-      auto gmat = gmatb[bid];
-
-      for (size_t i = 0; i < nrows - rest; i += kUnroll) {
-        size_t rid[kUnroll];
-        size_t ibegin[kUnroll];
-        size_t iend[kUnroll];
-        GradientPair stat[kUnroll];
-
-        for (int k = 0; k < kUnroll; ++k) {
-          rid[k] = row_indices.begin[i + k];
-          ibegin[k] = gmat.row_ptr[rid[k]];
-          iend[k] = gmat.row_ptr[rid[k] + 1];
-          stat[k] = gpair[rid[k]];
-        }
-        for (int k = 0; k < kUnroll; ++k) {
-          for (size_t j = ibegin[k]; j < iend[k]; ++j) {
-            const uint32_t bin = gmat.index[j];
-            p_hist[bin].Add(stat[k].GetGrad(), stat[k].GetHess());
-          }
-        }
-      }
-      for (size_t i = nrows - rest; i < nrows; ++i) {
-        const size_t rid = row_indices.begin[i];
-        const size_t ibegin = gmat.row_ptr[rid];
-        const size_t iend = gmat.row_ptr[rid + 1];
-        const GradientPair stat = gpair[rid];
-        for (size_t j = ibegin; j < iend; ++j) {
-          const uint32_t bin = gmat.index[j];
-          p_hist[bin].Add(stat.GetGrad(), stat.GetHess());
-        }
-      }
-    });
-  }
-  exc.Rethrow();
-}
-template
-void GHistBuilder<float>::BuildBlockHist(const std::vector<GradientPair>& gpair,
-                                  const RowSetCollection::Elem row_indices,
-                                  const GHistIndexBlockMatrix& gmatb,
-                                  GHistRow<float> hist);
-template
-void GHistBuilder<double>::BuildBlockHist(const std::vector<GradientPair>& gpair,
-                                  const RowSetCollection::Elem row_indices,
-                                  const GHistIndexBlockMatrix& gmatb,
-                                  GHistRow<double> hist);
-
-
 template<typename GradientSumT>
 void GHistBuilder<GradientSumT>::SubtractionTrick(GHistRowT self,
                                                  GHistRowT sibling,
--- a/src/common/hist_util.h
+++ b/src/common/hist_util.h
@ -321,48 +321,8 @@ int32_t XGBOOST_HOST_DEV_INLINE BinarySearchBin(bst_uint begin, bst_uint end,
  return -1;
 }

-struct GHistIndexBlock {
-  const size_t* row_ptr;
-  const uint32_t* index;
-
-  inline GHistIndexBlock(const size_t* row_ptr, const uint32_t* index)
-    : row_ptr(row_ptr), index(index) {}
-
-  // get i-th row
-  inline GHistIndexRow operator[](size_t i) const {
-    return {&index[0] + row_ptr[i], row_ptr[i + 1] - row_ptr[i]};
-  }
-};
-
 class ColumnMatrix;

-class GHistIndexBlockMatrix {
- public:
-  void Init(const GHistIndexMatrix& gmat,
-            const ColumnMatrix& colmat,
-            const tree::TrainParam& param);
-
-  inline GHistIndexBlock operator[](size_t i) const {
-    return {blocks_[i].row_ptr_begin, blocks_[i].index_begin};
-  }
-
-  inline size_t GetNumBlock() const {
-    return blocks_.size();
-  }
-
- private:
-  std::vector<size_t> row_ptr_;
-  std::vector<uint32_t> index_;
-  const HistogramCuts* cut_;
-  struct Block {
-    const size_t* row_ptr_begin;
-    const size_t* row_ptr_end;
-    const uint32_t* index_begin;
-    const uint32_t* index_end;
-  };
-  std::vector<Block> blocks_;
-};
-
 template<typename GradientSumT>
 using GHistRow = Span<xgboost::detail::GradientPairInternal<GradientSumT> >;

@ -672,11 +632,6 @@ class GHistBuilder {
                 const GHistIndexMatrix& gmat,
                 GHistRowT hist,
                 bool isDense);
-  // same, with feature grouping
-  void BuildBlockHist(const std::vector<GradientPair>& gpair,
-                      const RowSetCollection::Elem row_indices,
-                      const GHistIndexBlockMatrix& gmatb,
-                      GHistRowT hist);
  // construct a histogram via subtraction trick
  void SubtractionTrick(GHistRowT self,
                        GHistRowT sibling,
--- a/src/tree/param.h
+++ b/src/tree/param.h
@ -80,8 +80,6 @@ struct TrainParam : public XGBoostParameter<TrainParam> {
  // percentage threshold for treating a feature as sparse
  // e.g. 0.2 indicates a feature with fewer than 20% nonzeros is considered sparse
  double sparse_threshold;
-  // use feature grouping? (default yes)
-  int enable_feature_grouping;
  // when grouping features, how many "conflicts" to allow.
  // conflict is when an instance has nonzero values for two or more features
  // default is 0, meaning features should be strictly complementary
@ -199,9 +197,6 @@ struct TrainParam : public XGBoostParameter<TrainParam> {
    // ------ From cpu quantile histogram -------.
    DMLC_DECLARE_FIELD(sparse_threshold).set_range(0, 1.0).set_default(0.2)
        .describe("percentage threshold for treating a feature as sparse");
-    DMLC_DECLARE_FIELD(enable_feature_grouping).set_lower_bound(0).set_default(0)
-        .describe("if >0, enable feature grouping to ameliorate work imbalance "
-                  "among worker threads");
    DMLC_DECLARE_FIELD(max_conflict_rate).set_range(0, 1.0).set_default(0)
        .describe("when grouping features, how many \"conflicts\" to allow."
       "conflict is when an instance has nonzero values for two or more features."
--- a/src/tree/updater_quantile_hist.cc
+++ b/src/tree/updater_quantile_hist.cc
@ -71,7 +71,7 @@ void QuantileHistMaker::CallBuilderUpdate(const std::unique_ptr<Builder<Gradient
                                          DMatrix *dmat,
                                          const std::vector<RegTree *> &trees) {
  for (auto tree : trees) {
-    builder->Update(gmat_, gmatb_, column_matrix_, gpair, dmat, tree);
+    builder->Update(gmat_, column_matrix_, gpair, dmat, tree);
  }
 }
 void QuantileHistMaker::Update(HostDeviceVector<GradientPair> *gpair,
@ -81,9 +81,6 @@ void QuantileHistMaker::Update(HostDeviceVector<GradientPair> *gpair,
    updater_monitor_.Start("GmatInitialization");
    gmat_.Init(dmat, static_cast<uint32_t>(param_.max_bin));
    column_matrix_.Init(gmat_, param_.sparse_threshold);
-    if (param_.enable_feature_grouping > 0) {
-      gmatb_.Init(gmat_, column_matrix_, param_);
-    }
    updater_monitor_.Stop("GmatInitialization");
    // A proper solution is puting cut matrix in DMatrix, see:
    // https://github.com/dmlc/xgboost/issues/5143
@ -295,7 +292,6 @@ void QuantileHistMaker::Builder<GradientSumT>::SetHistRowsAdder(
 template <typename GradientSumT>
 void QuantileHistMaker::Builder<GradientSumT>::InitRoot(
    const GHistIndexMatrix &gmat,
-    const GHistIndexBlockMatrix &gmatb,
    const DMatrix& fmat,
    RegTree *p_tree,
    const std::vector<GradientPair> &gpair_h,
@ -311,7 +307,7 @@ void QuantileHistMaker::Builder<GradientSumT>::InitRoot(
  int sync_count = 0;

  hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, p_tree);
-  BuildLocalHistograms(gmat, gmatb, p_tree, gpair_h);
+  BuildLocalHistograms(gmat, p_tree, gpair_h);
  hist_synchronizer_->SyncHistograms(this, starting_index, sync_count, p_tree);

  this->InitNewNode(CPUExpandEntry::kRootNid, gmat, gpair_h, fmat, *p_tree);
@ -325,7 +321,6 @@ void QuantileHistMaker::Builder<GradientSumT>::InitRoot(
 template<typename GradientSumT>
 void QuantileHistMaker::Builder<GradientSumT>::BuildLocalHistograms(
    const GHistIndexMatrix &gmat,
-    const GHistIndexBlockMatrix &gmatb,
    RegTree *p_tree,
    const std::vector<GradientPair> &gpair_h) {
  builder_monitor_.Start("BuildLocalHistograms");
@ -355,7 +350,7 @@ void QuantileHistMaker::Builder<GradientSumT>::BuildLocalHistograms(
    auto rid_set = RowSetCollection::Elem(start_of_row_set + r.begin(),
                                      start_of_row_set + r.end(),
                                      nid);
-    BuildHist(gpair_h, rid_set, gmat, gmatb, hist_buffer_.GetInitializedHist(tid, nid_in_set));
+    BuildHist(gpair_h, rid_set, gmat, hist_buffer_.GetInitializedHist(tid, nid_in_set));
  });

  builder_monitor_.Stop("BuildLocalHistograms");
@ -446,7 +441,6 @@ void QuantileHistMaker::Builder<GradientSumT>::BuildNodeStats(
 template<typename GradientSumT>
 void QuantileHistMaker::Builder<GradientSumT>::ExpandTree(
    const GHistIndexMatrix& gmat,
-    const GHistIndexBlockMatrix& gmatb,
    const ColumnMatrix& column_matrix,
    DMatrix* p_fmat,
    RegTree* p_tree,
@ -456,7 +450,7 @@ void QuantileHistMaker::Builder<GradientSumT>::ExpandTree(

  Driver<CPUExpandEntry> driver(static_cast<TrainParam::TreeGrowPolicy>(param_.grow_policy));
  std::vector<CPUExpandEntry> expand;
-  InitRoot(gmat, gmatb, *p_fmat, p_tree, gpair_h, &num_leaves, &expand);
+  InitRoot(gmat, *p_fmat, p_tree, gpair_h, &num_leaves, &expand);
  driver.Push(expand[0]);

  int depth = 0;
@ -478,7 +472,7 @@ void QuantileHistMaker::Builder<GradientSumT>::ExpandTree(
      int sync_count = 0;
      hist_rows_adder_->AddHistRows(this, &starting_index, &sync_count, p_tree);
      if (depth < param_.max_depth) {
-        BuildLocalHistograms(gmat, gmatb, p_tree, gpair_h);
+        BuildLocalHistograms(gmat, p_tree, gpair_h);
        hist_synchronizer_->SyncHistograms(this, starting_index, sync_count, p_tree);
      }

@ -506,8 +500,9 @@ void QuantileHistMaker::Builder<GradientSumT>::ExpandTree(

 template <typename GradientSumT>
 void QuantileHistMaker::Builder<GradientSumT>::Update(
-    const GHistIndexMatrix &gmat, const GHistIndexBlockMatrix &gmatb,
-    const ColumnMatrix &column_matrix, HostDeviceVector<GradientPair> *gpair,
+    const GHistIndexMatrix &gmat,
+    const ColumnMatrix &column_matrix,
+    HostDeviceVector<GradientPair> *gpair,
    DMatrix *p_fmat, RegTree *p_tree) {
  builder_monitor_.Start("Update");

@ -525,7 +520,7 @@ void QuantileHistMaker::Builder<GradientSumT>::Update(

  this->InitData(gmat, *p_fmat, *p_tree, gpair_ptr);

-  ExpandTree(gmat, gmatb, column_matrix, p_fmat, p_tree, *gpair_ptr);
+  ExpandTree(gmat, column_matrix, p_fmat, p_tree, *gpair_ptr);

  for (int nid = 0; nid < p_tree->param.num_nodes; ++nid) {
    p_tree->Stat(nid).loss_chg = snode_[nid].best.loss_chg;
--- a/src/tree/updater_quantile_hist.h
+++ b/src/tree/updater_quantile_hist.h
@ -111,7 +111,6 @@ class MemStackAllocator {
 namespace tree {

 using xgboost::common::GHistIndexMatrix;
-using xgboost::common::GHistIndexBlockMatrix;
 using xgboost::common::GHistIndexRow;
 using xgboost::common::HistCollection;
 using xgboost::common::RowSetCollection;
@ -245,8 +244,6 @@ class QuantileHistMaker: public TreeUpdater {
  TrainParam param_;
  // quantized data matrix
  GHistIndexMatrix gmat_;
-  // (optional) data matrix with feature grouping
-  GHistIndexBlockMatrix gmatb_;
  // column accessor
  ColumnMatrix column_matrix_;
  DMatrix const* p_last_dmat_ {nullptr};
@ -289,7 +286,6 @@ class QuantileHistMaker: public TreeUpdater {
    }
    // update one tree, growing
    virtual void Update(const GHistIndexMatrix& gmat,
-                        const GHistIndexBlockMatrix& gmatb,
                        const ColumnMatrix& column_matrix,
                        HostDeviceVector<GradientPair>* gpair,
                        DMatrix* p_fmat,
@ -298,15 +294,10 @@ class QuantileHistMaker: public TreeUpdater {
    inline void BuildHist(const std::vector<GradientPair>& gpair,
                          const RowSetCollection::Elem row_indices,
                          const GHistIndexMatrix& gmat,
-                          const GHistIndexBlockMatrix& gmatb,
                          GHistRowT hist) {
-      if (param_.enable_feature_grouping > 0) {
-        hist_builder_.BuildBlockHist(gpair, row_indices, gmatb, hist);
-      } else {
      hist_builder_.BuildHist(gpair, row_indices, gmat, hist,
                              data_layout_ != DataLayout::kSparseData);
    }
-    }

    inline void SubtractionTrick(GHistRowT self,
                                 GHistRowT sibling,
@ -386,12 +377,10 @@ class QuantileHistMaker: public TreeUpdater {
    bool SplitContainsMissingValues(const GradStats e, const NodeEntry& snode);

    void BuildLocalHistograms(const GHistIndexMatrix &gmat,
-                              const GHistIndexBlockMatrix &gmatb,
                              RegTree *p_tree,
                              const std::vector<GradientPair> &gpair_h);

    void InitRoot(const GHistIndexMatrix &gmat,
-                  const GHistIndexBlockMatrix &gmatb,
                  const DMatrix& fmat,
                  RegTree *p_tree,
                  const std::vector<GradientPair> &gpair_h,
@ -415,7 +404,6 @@ class QuantileHistMaker: public TreeUpdater {
                        const std::vector<CPUExpandEntry>& nodes_for_apply_split, RegTree *p_tree);

    void ExpandTree(const GHistIndexMatrix& gmat,
-                    const GHistIndexBlockMatrix& gmatb,
                    const ColumnMatrix& column_matrix,
                    DMatrix* p_fmat,
                    RegTree* p_tree,
--- a/tests/cpp/tree/test_quantile_hist.cc
+++ b/tests/cpp/tree/test_quantile_hist.cc
@ -307,11 +307,10 @@ class QuantileHistMock : public QuantileHistMaker {
          { {0.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {0.27f, 0.28f},
            {0.27f, 0.29f}, {0.37f, 0.39f}, {0.47f, 0.49f}, {0.57f, 0.59f} };
      RealImpl::InitData(gmat, fmat, tree, &gpair);
-      GHistIndexBlockMatrix dummy;
      this->hist_.AddHistRow(nid);
      this->hist_.AllocateAllData();
      this->BuildHist(gpair, this->row_set_collection_[nid],
-                gmat, dummy, this->hist_[nid]);
+                      gmat, this->hist_[nid]);

      // Check if number of histogram bins is correct
      ASSERT_EQ(this->hist_[nid].size(), gmat.cut.Ptrs().back());
@ -337,8 +336,7 @@ class QuantileHistMock : public QuantileHistMaker {
      }
    }

-    void TestEvaluateSplit(const GHistIndexBlockMatrix& quantile_index_block,
-                           const RegTree& tree) {
+    void TestEvaluateSplit(const RegTree& tree) {
      std::vector<GradientPair> row_gpairs =
          { {1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f},
            {0.27f, 0.29f}, {0.37f, 0.39f}, {-0.47f, 0.49f}, {0.57f, 0.59f} };
@ -353,7 +351,7 @@ class QuantileHistMock : public QuantileHistMaker {
      this->hist_.AddHistRow(0);
      this->hist_.AllocateAllData();
      this->BuildHist(row_gpairs, this->row_set_collection_[0],
-                      gmat, quantile_index_block, this->hist_[0]);
+                      gmat, this->hist_[0]);

      RealImpl::InitNewNode(0, gmat, row_gpairs, *dmat, tree);

@ -419,15 +417,13 @@ class QuantileHistMock : public QuantileHistMaker {
      ASSERT_EQ(this->snode_[0].best.split_value, gmat.cut.Values()[best_split_threshold]);
    }

-    void TestEvaluateSplitParallel(const GHistIndexBlockMatrix &quantile_index_block,
-                                   const RegTree &tree) {
+    void TestEvaluateSplitParallel(const RegTree &tree) {
      omp_set_num_threads(2);
-      TestEvaluateSplit(quantile_index_block, tree);
+      TestEvaluateSplit(tree);
      omp_set_num_threads(1);
    }

-    void TestApplySplit(const GHistIndexBlockMatrix& quantile_index_block,
-                        const RegTree& tree) {
+    void TestApplySplit(const RegTree& tree) {
      std::vector<GradientPair> row_gpairs =
          { {1.23f, 0.24f}, {0.24f, 0.25f}, {0.26f, 0.27f}, {2.27f, 0.28f},
            {0.27f, 0.29f}, {0.37f, 0.39f}, {-0.47f, 0.49f}, {0.57f, 0.59f} };
@ -632,9 +628,9 @@ class QuantileHistMock : public QuantileHistMaker {
    RegTree tree = RegTree();
    tree.param.UpdateAllowUnknown(cfg_);
    if (double_builder_) {
-      double_builder_->TestEvaluateSplit(gmatb_, tree);
+      double_builder_->TestEvaluateSplit(tree);
    } else {
-      float_builder_->TestEvaluateSplit(gmatb_, tree);
+      float_builder_->TestEvaluateSplit(tree);
    }
  }

@ -642,9 +638,9 @@ class QuantileHistMock : public QuantileHistMaker {
    RegTree tree = RegTree();
    tree.param.UpdateAllowUnknown(cfg_);
    if (double_builder_) {
-      double_builder_->TestApplySplit(gmatb_, tree);
+      double_builder_->TestApplySplit(tree);
    } else {
-      float_builder_->TestEvaluateSplit(gmatb_, tree);
+      float_builder_->TestEvaluateSplit(tree);
    }
  }
 };
@ -714,8 +710,7 @@ TEST(QuantileHist, DistributedSyncHistograms) {
 TEST(QuantileHist, BuildHist) {
  // Don't enable feature grouping
  std::vector<std::pair<std::string, std::string>> cfg
-      {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())},
-       {"enable_feature_grouping", std::to_string(0)}};
+      {{"num_feature", std::to_string(QuantileHistMock::GetNumColumns())}};
  QuantileHistMock maker(cfg);
  maker.TestBuildHist();
  const bool single_precision_histogram = true;