Support cpu quantile sketch with column-wise data split (#8742)
This commit is contained in:
@@ -802,6 +802,7 @@ class SketchContainerImpl {
|
||||
std::vector<bst_row_t> columns_size_;
|
||||
int32_t max_bins_;
|
||||
bool use_group_ind_{false};
|
||||
bool col_split_;
|
||||
int32_t n_threads_;
|
||||
bool has_categorical_{false};
|
||||
Monitor monitor_;
|
||||
@@ -814,7 +815,7 @@ class SketchContainerImpl {
|
||||
* \param use_group whether is assigned to group to data instance.
|
||||
*/
|
||||
SketchContainerImpl(std::vector<bst_row_t> columns_size, int32_t max_bins,
|
||||
common::Span<FeatureType const> feature_types, bool use_group,
|
||||
common::Span<FeatureType const> feature_types, bool use_group, bool col_split,
|
||||
int32_t n_threads);
|
||||
|
||||
static bool UseGroup(MetaInfo const &info) {
|
||||
@@ -896,6 +897,10 @@ class SketchContainerImpl {
|
||||
void PushRowPage(SparsePage const &page, MetaInfo const &info, Span<float const> hessian = {});
|
||||
|
||||
void MakeCuts(HistogramCuts* cuts);
|
||||
|
||||
private:
|
||||
// Merge all categories from other workers.
|
||||
void AllreduceCategories();
|
||||
};
|
||||
|
||||
class HostSketchContainer : public SketchContainerImpl<WQuantileSketch<float, float>> {
|
||||
@@ -904,7 +909,8 @@ class HostSketchContainer : public SketchContainerImpl<WQuantileSketch<float, fl
|
||||
|
||||
public:
|
||||
HostSketchContainer(int32_t max_bins, common::Span<FeatureType const> ft,
|
||||
std::vector<size_t> columns_size, bool use_group, int32_t n_threads);
|
||||
std::vector<size_t> columns_size, bool use_group, bool col_split,
|
||||
int32_t n_threads);
|
||||
|
||||
template <typename Batch>
|
||||
void PushAdapterBatch(Batch const &batch, size_t base_rowid, MetaInfo const &info, float missing);
|
||||
@@ -1000,9 +1006,9 @@ class SortedSketchContainer : public SketchContainerImpl<WXQuantileSketch<float,
|
||||
|
||||
public:
|
||||
explicit SortedSketchContainer(int32_t max_bins, common::Span<FeatureType const> ft,
|
||||
std::vector<size_t> columns_size, bool use_group,
|
||||
std::vector<size_t> columns_size, bool use_group, bool col_split,
|
||||
int32_t n_threads)
|
||||
: SketchContainerImpl{columns_size, max_bins, ft, use_group, n_threads} {
|
||||
: SketchContainerImpl{columns_size, max_bins, ft, use_group, col_split, n_threads} {
|
||||
monitor_.Init(__func__);
|
||||
sketches_.resize(columns_size.size());
|
||||
size_t i = 0;
|
||||
|
||||
Reference in New Issue
Block a user