try direct instantiation of EvaluateSplitsKernel

This commit is contained in:
Hendrik Groove 2024-10-21 21:22:57 +02:00
parent ee17a5a26c
commit 94ffd57641

View File

@ -332,6 +332,14 @@ __global__ __launch_bounds__(kBlockSize) void EvaluateSplitsKernel(
}
}
template __global__ void EvaluateSplitsKernel<64>(
bst_feature_t max_active_features,
common::Span<const EvaluateSplitInputs> d_inputs,
const EvaluateSplitSharedInputs shared_inputs,
common::Span<bst_feature_t> sorted_idx,
const TreeEvaluator::SplitEvaluator<GPUTrainingParam> evaluator,
common::Span<DeviceSplitCandidate> out_candidates);
__device__ DeviceSplitCandidate operator+(const DeviceSplitCandidate &a,
const DeviceSplitCandidate &b) {
return b.loss_chg > a.loss_chg ? b : a;