Re-introduce double buffer in UpdatePosition, to fix perf regression in gpu_hist (#6757)

* Revert "gpu_hist performance tweaks (#5707)"

This reverts commit f779980f7e.

* Address reviewer's comment

* Fix build error
This commit is contained in:
Philip Hyunsu Cho
2021-03-18 13:56:10 -07:00
committed by GitHub
parent e2d8a99413
commit 4230dcb614
3 changed files with 63 additions and 21 deletions

View File

@@ -47,7 +47,17 @@ class RowPartitioner {
/*! \brief Range of row index for each node, pointers into ridx below. */
std::vector<Segment> ridx_segments_;
dh::TemporaryArray<RowIndexT> ridx_a_;
dh::TemporaryArray<RowIndexT> ridx_b_;
dh::TemporaryArray<bst_node_t> position_a_;
dh::TemporaryArray<bst_node_t> position_b_;
/*! \brief mapping for node id -> rows.
* This looks like:
* node id | 1 | 2 |
* rows idx | 3, 5, 1 | 13, 31 |
*/
dh::DoubleBuffer<RowIndexT> ridx_;
/*! \brief mapping for row -> node id. */
dh::DoubleBuffer<bst_node_t> position_;
dh::caching_device_vector<int64_t>
left_counts_; // Useful to keep a bunch of zeroed memory for sort position
std::vector<cudaStream_t> streams_;
@@ -100,8 +110,8 @@ class RowPartitioner {
void UpdatePosition(bst_node_t nidx, bst_node_t left_nidx,
bst_node_t right_nidx, UpdatePositionOpT op) {
Segment segment = ridx_segments_.at(nidx); // rows belongs to node nidx
auto d_ridx = dh::ToSpan(ridx_a_);
auto d_position = dh::ToSpan(position_a_);
auto d_ridx = ridx_.CurrentSpan();
auto d_position = position_.CurrentSpan();
if (left_counts_.size() <= nidx) {
left_counts_.resize((nidx * 2) + 1);
thrust::fill(left_counts_.begin(), left_counts_.end(), 0);
@@ -148,9 +158,9 @@ class RowPartitioner {
*/
template <typename FinalisePositionOpT>
void FinalisePosition(FinalisePositionOpT op) {
auto d_position = position_a_.data().get();
const auto d_ridx = ridx_a_.data().get();
dh::LaunchN(device_idx_, position_a_.size(), [=] __device__(size_t idx) {
auto d_position = position_.Current();
const auto d_ridx = ridx_.Current();
dh::LaunchN(device_idx_, position_.Size(), [=] __device__(size_t idx) {
auto position = d_position[idx];
RowIndexT ridx = d_ridx[idx];
bst_node_t new_position = op(ridx, position);