Use heuristic to select histogram node, avoid rabit call (#4951)
This commit is contained in:
parent
185e3f1916
commit
60748b2071
22
src/tree/updater_gpu_hist.cu
Normal file → Executable file
22
src/tree/updater_gpu_hist.cu
Normal file → Executable file
@ -776,21 +776,15 @@ struct GPUHistMakerDevice {
|
||||
/**
|
||||
* \brief Build GPU local histograms for the left and right child of some parent node
|
||||
*/
|
||||
void BuildHistLeftRight(int nidx_parent, int nidx_left, int nidx_right, dh::AllReducer* reducer) {
|
||||
void BuildHistLeftRight(const ExpandEntry &candidate, int nidx_left,
|
||||
int nidx_right, dh::AllReducer* reducer) {
|
||||
auto build_hist_nidx = nidx_left;
|
||||
auto subtraction_trick_nidx = nidx_right;
|
||||
|
||||
auto left_node_rows = row_partitioner->GetRows(nidx_left).size();
|
||||
auto right_node_rows = row_partitioner->GetRows(nidx_right).size();
|
||||
|
||||
// Decide whether to build the left histogram or right histogram
|
||||
// Find the largest number of training instances on any given device
|
||||
// Assume this will be the bottleneck and avoid building this node if
|
||||
// possible
|
||||
std::vector<size_t> max_reduce;
|
||||
max_reduce.push_back(left_node_rows);
|
||||
max_reduce.push_back(right_node_rows);
|
||||
reducer->HostMaxAllReduce(&max_reduce);
|
||||
bool fewer_right = max_reduce[1] < max_reduce[0];
|
||||
// Use sum of Hessian as a heuristic to select node with fewest training instances
|
||||
bool fewer_right = candidate.split.right_sum.GetHess() < candidate.split.left_sum.GetHess();
|
||||
if (fewer_right) {
|
||||
std::swap(build_hist_nidx, subtraction_trick_nidx);
|
||||
}
|
||||
@ -800,11 +794,11 @@ struct GPUHistMakerDevice {
|
||||
|
||||
// Check whether we can use the subtraction trick to calculate the other
|
||||
bool do_subtraction_trick = this->CanDoSubtractionTrick(
|
||||
nidx_parent, build_hist_nidx, subtraction_trick_nidx);
|
||||
candidate.nid, build_hist_nidx, subtraction_trick_nidx);
|
||||
|
||||
if (do_subtraction_trick) {
|
||||
// Calculate other histogram using subtraction trick
|
||||
this->SubtractionTrick(nidx_parent, build_hist_nidx,
|
||||
this->SubtractionTrick(candidate.nid, build_hist_nidx,
|
||||
subtraction_trick_nidx);
|
||||
} else {
|
||||
// Calculate other histogram manually
|
||||
@ -917,7 +911,7 @@ struct GPUHistMakerDevice {
|
||||
monitor.StopCuda("UpdatePosition");
|
||||
|
||||
monitor.StartCuda("BuildHist");
|
||||
this->BuildHistLeftRight(candidate.nid, left_child_nidx, right_child_nidx, reducer);
|
||||
this->BuildHistLeftRight(candidate, left_child_nidx, right_child_nidx, reducer);
|
||||
monitor.StopCuda("BuildHist");
|
||||
|
||||
monitor.StartCuda("EvaluateSplits");
|
||||
|
||||
19
tests/cpp/common/test_device_helpers.cu
Normal file → Executable file
19
tests/cpp/common/test_device_helpers.cu
Normal file → Executable file
@ -84,22 +84,3 @@ void TestAllocator() {
|
||||
TEST(bulkAllocator, Test) {
|
||||
TestAllocator();
|
||||
}
|
||||
|
||||
// Test thread safe max reduction
|
||||
#if defined(XGBOOST_USE_NCCL)
|
||||
TEST(AllReducer, MGPU_HostMaxAllReduce) {
|
||||
dh::AllReducer reducer;
|
||||
size_t num_threads = 50;
|
||||
std::vector<std::vector<size_t>> thread_data(num_threads);
|
||||
#pragma omp parallel num_threads(num_threads)
|
||||
{
|
||||
int tid = omp_get_thread_num();
|
||||
thread_data[tid] = {size_t(tid)};
|
||||
reducer.HostMaxAllReduce(&thread_data[tid]);
|
||||
}
|
||||
|
||||
for (auto data : thread_data) {
|
||||
ASSERT_EQ(data.front(), num_threads - 1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user