[EM] Enable prediction cache for GPU. (#10707)

- Use `UpdatePosition` for all nodes and skip `FinalizePosition` when external memory is used. - Create `encode/decode` for node position, this is just as a refactor. - Reuse code between update position and finalization.
2024-08-15 21:41:59 +08:00
parent 0def8e0bae
commit 582ea104b5
20 changed files with 378 additions and 327 deletions
--- a/tests/cpp/tree/gpu_hist/test_row_partitioner.cu
+++ b/tests/cpp/tree/gpu_hist/test_row_partitioner.cu
@@ -67,9 +67,9 @@ void TestSortPositionBatch(const std::vector<int>& ridx_in, const std::vector<Se
                                h_batch_info.size() * sizeof(PerNodeData<int>), cudaMemcpyDefault,
                                nullptr));
  dh::device_vector<int8_t> tmp;
-  SortPositionBatch<uint32_t, decltype(op), int>(dh::ToSpan(d_batch_info), dh::ToSpan(ridx),
-                                                 dh::ToSpan(ridx_tmp), dh::ToSpan(counts),
-                                                 total_rows, op, &tmp);
+  SortPositionBatch<decltype(op), int>(dh::ToSpan(d_batch_info), dh::ToSpan(ridx),
+                                       dh::ToSpan(ridx_tmp), dh::ToSpan(counts), total_rows, op,
+                                       &tmp);

  auto op_without_data = [=] __device__(auto ridx) { return ridx % 2 == 0; };
  for (size_t i = 0; i < segments.size(); i++) {