Optimize cpu sketch allreduce for sparse data. (#6009)

* Bypass RABIT serialization reducer and use custom allgather based merging.
This commit is contained in:
Jiaming Yuan
2020-08-19 10:03:45 +08:00
committed by GitHub
parent 90355b4f00
commit 29b7fea572
10 changed files with 357 additions and 87 deletions

View File

@@ -162,7 +162,7 @@ inline void ValidateColumn(const HistogramCuts& cuts, int column_idx,
// Check all cut points are unique
EXPECT_EQ(std::set<float>(cuts_begin, cuts_end).size(),
cuts_end - cuts_begin);
static_cast<size_t>(cuts_end - cuts_begin));
auto unique = std::set<float>(sorted_column.begin(), sorted_column.end());
if (unique.size() <= num_bins) {
@@ -189,7 +189,7 @@ inline void ValidateCuts(const HistogramCuts& cuts, DMatrix* dmat,
// Collect data into columns
std::vector<std::vector<float>> columns(dmat->Info().num_col_);
for (auto& batch : dmat->GetBatches<SparsePage>()) {
ASSERT_GT(batch.Size(), 0);
ASSERT_GT(batch.Size(), 0ul);
for (auto i = 0ull; i < batch.Size(); i++) {
for (auto e : batch[i]) {
columns[e.index].push_back(e.fvalue);