Implement sketching with adapter. (#8019)
This commit is contained in:
@@ -152,41 +152,8 @@ class HistogramCuts {
|
||||
* \param use_sorted Whether should we use SortedCSC for sketching, it's more efficient
|
||||
* but consumes more memory.
|
||||
*/
|
||||
inline HistogramCuts SketchOnDMatrix(DMatrix* m, int32_t max_bins, int32_t n_threads,
|
||||
bool use_sorted = false, Span<float> const hessian = {}) {
|
||||
HistogramCuts out;
|
||||
auto const& info = m->Info();
|
||||
std::vector<std::vector<bst_row_t>> column_sizes(n_threads);
|
||||
for (auto& column : column_sizes) {
|
||||
column.resize(info.num_col_, 0);
|
||||
}
|
||||
std::vector<bst_row_t> reduced(info.num_col_, 0);
|
||||
for (auto const& page : m->GetBatches<SparsePage>()) {
|
||||
auto const& entries_per_column =
|
||||
HostSketchContainer::CalcColumnSize(page, info.num_col_, n_threads);
|
||||
for (size_t i = 0; i < entries_per_column.size(); ++i) {
|
||||
reduced[i] += entries_per_column[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (!use_sorted) {
|
||||
HostSketchContainer container(max_bins, m->Info(), reduced, HostSketchContainer::UseGroup(info),
|
||||
n_threads);
|
||||
for (auto const& page : m->GetBatches<SparsePage>()) {
|
||||
container.PushRowPage(page, info, hessian);
|
||||
}
|
||||
container.MakeCuts(&out);
|
||||
} else {
|
||||
SortedSketchContainer container{max_bins, m->Info(), reduced,
|
||||
HostSketchContainer::UseGroup(info), n_threads};
|
||||
for (auto const& page : m->GetBatches<SortedCSCPage>()) {
|
||||
container.PushColPage(page, info, hessian);
|
||||
}
|
||||
container.MakeCuts(&out);
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
HistogramCuts SketchOnDMatrix(DMatrix* m, int32_t max_bins, int32_t n_threads,
|
||||
bool use_sorted = false, Span<float> const hessian = {});
|
||||
|
||||
enum BinTypeSize : uint8_t {
|
||||
kUint8BinsTypeSize = 1,
|
||||
|
||||
Reference in New Issue
Block a user