Use heuristic to select histogram node, avoid rabit call (#4951)

This commit is contained in:
Rory Mitchell 2019-10-18 11:33:54 +13:00 committed by GitHub
parent 185e3f1916
commit 60748b2071
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 8 additions and 33 deletions

22
src/tree/updater_gpu_hist.cu Normal file → Executable file
View File

@ -776,21 +776,15 @@ struct GPUHistMakerDevice {
/** /**
* \brief Build GPU local histograms for the left and right child of some parent node * \brief Build GPU local histograms for the left and right child of some parent node
*/ */
void BuildHistLeftRight(int nidx_parent, int nidx_left, int nidx_right, dh::AllReducer* reducer) { void BuildHistLeftRight(const ExpandEntry &candidate, int nidx_left,
int nidx_right, dh::AllReducer* reducer) {
auto build_hist_nidx = nidx_left; auto build_hist_nidx = nidx_left;
auto subtraction_trick_nidx = nidx_right; auto subtraction_trick_nidx = nidx_right;
auto left_node_rows = row_partitioner->GetRows(nidx_left).size();
auto right_node_rows = row_partitioner->GetRows(nidx_right).size();
// Decide whether to build the left histogram or right histogram // Decide whether to build the left histogram or right histogram
// Find the largest number of training instances on any given device // Use sum of Hessian as a heuristic to select node with fewest training instances
// Assume this will be the bottleneck and avoid building this node if bool fewer_right = candidate.split.right_sum.GetHess() < candidate.split.left_sum.GetHess();
// possible
std::vector<size_t> max_reduce;
max_reduce.push_back(left_node_rows);
max_reduce.push_back(right_node_rows);
reducer->HostMaxAllReduce(&max_reduce);
bool fewer_right = max_reduce[1] < max_reduce[0];
if (fewer_right) { if (fewer_right) {
std::swap(build_hist_nidx, subtraction_trick_nidx); std::swap(build_hist_nidx, subtraction_trick_nidx);
} }
@ -800,11 +794,11 @@ struct GPUHistMakerDevice {
// Check whether we can use the subtraction trick to calculate the other // Check whether we can use the subtraction trick to calculate the other
bool do_subtraction_trick = this->CanDoSubtractionTrick( bool do_subtraction_trick = this->CanDoSubtractionTrick(
nidx_parent, build_hist_nidx, subtraction_trick_nidx); candidate.nid, build_hist_nidx, subtraction_trick_nidx);
if (do_subtraction_trick) { if (do_subtraction_trick) {
// Calculate other histogram using subtraction trick // Calculate other histogram using subtraction trick
this->SubtractionTrick(nidx_parent, build_hist_nidx, this->SubtractionTrick(candidate.nid, build_hist_nidx,
subtraction_trick_nidx); subtraction_trick_nidx);
} else { } else {
// Calculate other histogram manually // Calculate other histogram manually
@ -917,7 +911,7 @@ struct GPUHistMakerDevice {
monitor.StopCuda("UpdatePosition"); monitor.StopCuda("UpdatePosition");
monitor.StartCuda("BuildHist"); monitor.StartCuda("BuildHist");
this->BuildHistLeftRight(candidate.nid, left_child_nidx, right_child_nidx, reducer); this->BuildHistLeftRight(candidate, left_child_nidx, right_child_nidx, reducer);
monitor.StopCuda("BuildHist"); monitor.StopCuda("BuildHist");
monitor.StartCuda("EvaluateSplits"); monitor.StartCuda("EvaluateSplits");

19
tests/cpp/common/test_device_helpers.cu Normal file → Executable file
View File

@ -84,22 +84,3 @@ void TestAllocator() {
TEST(bulkAllocator, Test) { TEST(bulkAllocator, Test) {
TestAllocator(); TestAllocator();
} }
// Test thread safe max reduction
#if defined(XGBOOST_USE_NCCL)
TEST(AllReducer, MGPU_HostMaxAllReduce) {
dh::AllReducer reducer;
size_t num_threads = 50;
std::vector<std::vector<size_t>> thread_data(num_threads);
#pragma omp parallel num_threads(num_threads)
{
int tid = omp_get_thread_num();
thread_data[tid] = {size_t(tid)};
reducer.HostMaxAllReduce(&thread_data[tid]);
}
for (auto data : thread_data) {
ASSERT_EQ(data.front(), num_threads - 1);
}
}
#endif