Fuse gpu_hist all-reduce calls where possible (#7867)
This commit is contained in:
@@ -103,7 +103,7 @@ class GPUHistEvaluator {
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Get sorted index storage based on the left node of inputs .
|
||||
* \brief Get sorted index storage based on the left node of inputs.
|
||||
*/
|
||||
auto SortedIdx(EvaluateSplitInputs<GradientSumT> left) {
|
||||
if (left.nidx == RegTree::kRoot && !cat_sorted_idx_.empty()) {
|
||||
|
||||
@@ -247,15 +247,6 @@ void BuildGradientHistogram(EllpackDeviceAccessor const& matrix,
|
||||
dh::safe_cuda(cudaGetLastError());
|
||||
}
|
||||
|
||||
template void BuildGradientHistogram<GradientPair>(
|
||||
EllpackDeviceAccessor const& matrix,
|
||||
FeatureGroupsAccessor const& feature_groups,
|
||||
common::Span<GradientPair const> gpair,
|
||||
common::Span<const uint32_t> ridx,
|
||||
common::Span<GradientPair> histogram,
|
||||
HistRounding<GradientPair> rounding,
|
||||
bool force_global_memory);
|
||||
|
||||
template void BuildGradientHistogram<GradientPairPrecise>(
|
||||
EllpackDeviceAccessor const& matrix,
|
||||
FeatureGroupsAccessor const& feature_groups,
|
||||
|
||||
Reference in New Issue
Block a user