Fuse gpu_hist all-reduce calls where possible (#7867)

This commit is contained in:
Rory Mitchell
2022-05-17 13:27:50 +02:00
committed by GitHub
parent b41cf92dc2
commit 71d3b2e036
9 changed files with 234 additions and 185 deletions

View File

@@ -103,7 +103,7 @@ class GPUHistEvaluator {
}
/**
* \brief Get sorted index storage based on the left node of inputs .
* \brief Get sorted index storage based on the left node of inputs.
*/
auto SortedIdx(EvaluateSplitInputs<GradientSumT> left) {
if (left.nidx == RegTree::kRoot && !cat_sorted_idx_.empty()) {

View File

@@ -247,15 +247,6 @@ void BuildGradientHistogram(EllpackDeviceAccessor const& matrix,
dh::safe_cuda(cudaGetLastError());
}
template void BuildGradientHistogram<GradientPair>(
EllpackDeviceAccessor const& matrix,
FeatureGroupsAccessor const& feature_groups,
common::Span<GradientPair const> gpair,
common::Span<const uint32_t> ridx,
common::Span<GradientPair> histogram,
HistRounding<GradientPair> rounding,
bool force_global_memory);
template void BuildGradientHistogram<GradientPairPrecise>(
EllpackDeviceAccessor const& matrix,
FeatureGroupsAccessor const& feature_groups,