Smarter choice of histogram construction for distributed gpu_hist (#4519)

* Smarter choice of histogram construction for distributed gpu_hist

* Limit omp team size in ExecuteShards
This commit is contained in:
Rory Mitchell
2019-05-31 14:11:34 +12:00
committed by GitHub
parent dd60fc23e6
commit fbbae3386a
3 changed files with 67 additions and 10 deletions

View File

@@ -95,3 +95,20 @@ void TestAllocator() {
TEST(bulkAllocator, Test) {
TestAllocator();
}
// Test thread safe max reduction
TEST(AllReducer, HostMaxAllReduce) {
dh::AllReducer reducer;
size_t num_threads = 50;
std::vector<std::vector<size_t>> thread_data(num_threads);
#pragma omp parallel num_threads(num_threads)
{
int tid = omp_get_thread_num();
thread_data[tid] = {size_t(tid)};
reducer.HostMaxAllReduce(&thread_data[tid]);
}
for (auto data : thread_data) {
ASSERT_EQ(data.front(), num_threads - 1);
}
}