Use heuristic to select histogram node, avoid rabit call (#4951)

This commit is contained in:
Rory Mitchell 2019-10-18 11:33:54 +13:00 committed by GitHub
parent 185e3f1916
commit 60748b2071
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 8 additions and 33 deletions

22
src/tree/updater_gpu_hist.cu Normal file → Executable file
View File

@ -776,21 +776,15 @@ struct GPUHistMakerDevice {
/**
* \brief Build GPU local histograms for the left and right child of some parent node
*/
void BuildHistLeftRight(int nidx_parent, int nidx_left, int nidx_right, dh::AllReducer* reducer) {
void BuildHistLeftRight(const ExpandEntry &candidate, int nidx_left,
int nidx_right, dh::AllReducer* reducer) {
auto build_hist_nidx = nidx_left;
auto subtraction_trick_nidx = nidx_right;
auto left_node_rows = row_partitioner->GetRows(nidx_left).size();
auto right_node_rows = row_partitioner->GetRows(nidx_right).size();
// Decide whether to build the left histogram or right histogram
// Find the largest number of training instances on any given device
// Assume this will be the bottleneck and avoid building this node if
// possible
std::vector<size_t> max_reduce;
max_reduce.push_back(left_node_rows);
max_reduce.push_back(right_node_rows);
reducer->HostMaxAllReduce(&max_reduce);
bool fewer_right = max_reduce[1] < max_reduce[0];
// Use sum of Hessian as a heuristic to select node with fewest training instances
bool fewer_right = candidate.split.right_sum.GetHess() < candidate.split.left_sum.GetHess();
if (fewer_right) {
std::swap(build_hist_nidx, subtraction_trick_nidx);
}
@ -800,11 +794,11 @@ struct GPUHistMakerDevice {
// Check whether we can use the subtraction trick to calculate the other
bool do_subtraction_trick = this->CanDoSubtractionTrick(
nidx_parent, build_hist_nidx, subtraction_trick_nidx);
candidate.nid, build_hist_nidx, subtraction_trick_nidx);
if (do_subtraction_trick) {
// Calculate other histogram using subtraction trick
this->SubtractionTrick(nidx_parent, build_hist_nidx,
this->SubtractionTrick(candidate.nid, build_hist_nidx,
subtraction_trick_nidx);
} else {
// Calculate other histogram manually
@ -917,7 +911,7 @@ struct GPUHistMakerDevice {
monitor.StopCuda("UpdatePosition");
monitor.StartCuda("BuildHist");
this->BuildHistLeftRight(candidate.nid, left_child_nidx, right_child_nidx, reducer);
this->BuildHistLeftRight(candidate, left_child_nidx, right_child_nidx, reducer);
monitor.StopCuda("BuildHist");
monitor.StartCuda("EvaluateSplits");

19
tests/cpp/common/test_device_helpers.cu Normal file → Executable file
View File

@ -84,22 +84,3 @@ void TestAllocator() {
TEST(bulkAllocator, Test) {
TestAllocator();
}
// Test thread safe max reduction
#if defined(XGBOOST_USE_NCCL)
TEST(AllReducer, MGPU_HostMaxAllReduce) {
dh::AllReducer reducer;
size_t num_threads = 50;
std::vector<std::vector<size_t>> thread_data(num_threads);
#pragma omp parallel num_threads(num_threads)
{
int tid = omp_get_thread_num();
thread_data[tid] = {size_t(tid)};
reducer.HostMaxAllReduce(&thread_data[tid]);
}
for (auto data : thread_data) {
ASSERT_EQ(data.front(), num_threads - 1);
}
}
#endif