Enable compiling with system cub. (#7232)

- Tested with all CUDA 11.x.
- Workaround cub scan by using discard iterator in AUC.
- Limit the size of Argsort when compiled with CUDA cub.
This commit is contained in:
Jiaming Yuan
2021-09-17 14:28:18 +08:00
committed by GitHub
parent b18f5f61b0
commit c311a8c1d8
6 changed files with 67 additions and 26 deletions

View File

@@ -50,9 +50,6 @@ struct WriteResultsFunctor {
}
};
// Change the value type of thrust discard iterator so we can use it with cub
using DiscardOverload = thrust::discard_iterator<IndexFlagTuple>;
// Implement partitioning via single scan operation using transform output to
// write the result
void RowPartitioner::SortPosition(common::Span<bst_node_t> position,
@@ -64,7 +61,7 @@ void RowPartitioner::SortPosition(common::Span<bst_node_t> position,
WriteResultsFunctor write_results{left_nidx, position, position_out,
ridx, ridx_out, d_left_count};
auto discard_write_iterator =
thrust::make_transform_output_iterator(DiscardOverload(), write_results);
thrust::make_transform_output_iterator(dh::TypedDiscard<IndexFlagTuple>(), write_results);
auto counting = thrust::make_counting_iterator(0llu);
auto input_iterator = dh::MakeTransformIterator<IndexFlagTuple>(
counting, [=] __device__(size_t idx) {