From 9837b09b208f76ef7dc9039d4f0c21b4581f7b24 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Thu, 7 Mar 2019 15:22:12 -0800 Subject: [PATCH] support cuda 10.1 (#4223) * support cuda 10.1 * add cuda 10.1 to jenkins build matrix --- Jenkinsfile | 2 +- cmake/Utils.cmake | 4 +++- src/common/host_device_vector.cu | 3 ++- src/tree/updater_gpu.cu | 4 ++++ 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 215e9102e..e60e0240e 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -14,7 +14,7 @@ def dockerRun = 'tests/ci_build/ci_build.sh' def utils def buildMatrix = [ - [ "enabled": true, "os" : "linux", "withGpu": true, "withNccl": true, "withOmp": true, "pythonVersion": "2.7", "cudaVersion": "9.2", "multiGpu": true], + [ "enabled": true, "os" : "linux", "withGpu": true, "withNccl": true, "withOmp": true, "pythonVersion": "2.7", "cudaVersion": "10.1", "multiGpu": true], [ "enabled": true, "os" : "linux", "withGpu": true, "withNccl": true, "withOmp": true, "pythonVersion": "2.7", "cudaVersion": "9.2" ], [ "enabled": true, "os" : "linux", "withGpu": true, "withNccl": true, "withOmp": true, "pythonVersion": "2.7", "cudaVersion": "8.0" ], [ "enabled": true, "os" : "linux", "withGpu": true, "withNccl": false, "withOmp": true, "pythonVersion": "2.7", "cudaVersion": "8.0" ], diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake index c0fcb9391..2bc9fb40b 100644 --- a/cmake/Utils.cmake +++ b/cmake/Utils.cmake @@ -59,7 +59,9 @@ endfunction(set_default_configuration_release) function(format_gencode_flags flags out) # Set up architecture flags if(NOT flags) - if((CUDA_VERSION_MAJOR EQUAL 9) OR (CUDA_VERSION_MAJOR GREATER 9)) + if((CUDA_VERSION_MAJOR EQUAL 10) OR (CUDA_VERSION_MAJOR GREATER 10)) + set(flags "35;50;52;60;61;70;75") + elseif(CUDA_VERSION_MAJOR EQUAL 9) set(flags "35;50;52;60;61;70") else() set(flags "35;50;52;60;61") diff --git a/src/common/host_device_vector.cu b/src/common/host_device_vector.cu index bdccbf348..6e33adfcb 100644 --- a/src/common/host_device_vector.cu +++ b/src/common/host_device_vector.cu @@ -53,7 +53,8 @@ struct HostDeviceVectorImpl { if (vec_ == nullptr) { vec_ = vec; } CHECK_EQ(vec, vec_); device_ = device; - LazyResize(vec_->Size()); + // TODO(rongou): remove pointer dereference once CUDA 10.1 is fixed. + LazyResize((*vec_).Size()); perm_d_ = vec_->perm_h_.Complementary(); } diff --git a/src/tree/updater_gpu.cu b/src/tree/updater_gpu.cu index cfbefa89e..4464f63fa 100644 --- a/src/tree/updater_gpu.cu +++ b/src/tree/updater_gpu.cu @@ -140,7 +140,11 @@ __global__ void CubScanByKeyL1( // in order to pass on the partial scan values. // this statement MUST appear before the checks below! // else, the result of this shuffle operation will be undefined +#if (__CUDACC_VER_MAJOR__ >= 9) + int previousKey = __shfl_up_sync(0xFFFFFFFF, myKey, 1); +#else int previousKey = __shfl_up(myKey, 1); +#endif // Collectively compute the block-wide exclusive prefix sum BlockScan(temp_storage) .ExclusiveScan(threadData, threadData, rootPair, AddByKey());