Single precision histograms on GPU (#3965)

* Allow single precision histogram summation in gpu_hist

* Add python test, reduce run-time of gpu_hist tests

* Update documentation
This commit is contained in:
Rory Mitchell
2018-12-10 10:55:30 +13:00
committed by GitHub
parent 9af6b689d6
commit 93f9ce9ef9
10 changed files with 351 additions and 212 deletions

View File

@@ -944,6 +944,32 @@ class AllReducer {
#endif
}
/**
* \brief Allreduce. Use in exactly the same way as NCCL but without needing
* streams or comms.
*
* \param communication_group_idx Zero-based index of the communication group.
* \param sendbuff The sendbuff.
* \param recvbuff The recvbuff.
* \param count Number of elements.
*/
void AllReduceSum(int communication_group_idx, const float *sendbuff,
float *recvbuff, int count) {
#ifdef XGBOOST_USE_NCCL
CHECK(initialised_);
dh::safe_cuda(cudaSetDevice(device_ordinals.at(communication_group_idx)));
dh::safe_nccl(ncclAllReduce(sendbuff, recvbuff, count, ncclFloat, ncclSum,
comms.at(communication_group_idx),
streams.at(communication_group_idx)));
if(communication_group_idx == 0)
{
allreduce_bytes_ += count * sizeof(float);
allreduce_calls_ += 1;
}
#endif
}
/**
* \brief Allreduce. Use in exactly the same way as NCCL but without needing streams or comms.
*