[EM] Enable prediction cache for GPU. (#10707)

- Use `UpdatePosition` for all nodes and skip `FinalizePosition` when external memory is used.
- Create `encode/decode` for node position, this is just as a refactor.
- Reuse code between update position and finalization.
This commit is contained in:
Jiaming Yuan
2024-08-15 21:41:59 +08:00
committed by GitHub
parent 0def8e0bae
commit 582ea104b5
20 changed files with 378 additions and 327 deletions

View File

@@ -67,9 +67,9 @@ void TestSortPositionBatch(const std::vector<int>& ridx_in, const std::vector<Se
h_batch_info.size() * sizeof(PerNodeData<int>), cudaMemcpyDefault,
nullptr));
dh::device_vector<int8_t> tmp;
SortPositionBatch<uint32_t, decltype(op), int>(dh::ToSpan(d_batch_info), dh::ToSpan(ridx),
dh::ToSpan(ridx_tmp), dh::ToSpan(counts),
total_rows, op, &tmp);
SortPositionBatch<decltype(op), int>(dh::ToSpan(d_batch_info), dh::ToSpan(ridx),
dh::ToSpan(ridx_tmp), dh::ToSpan(counts), total_rows, op,
&tmp);
auto op_without_data = [=] __device__(auto ridx) { return ridx % 2 == 0; };
for (size_t i = 0; i < segments.size(); i++) {