diff --git a/CMakeLists.txt b/CMakeLists.txt index 460327385..3608e5670 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,15 +2,14 @@ cmake_minimum_required(VERSION 3.18 FATAL_ERROR) project(xgboost LANGUAGES CXX C VERSION 2.1.0) include(cmake/Utils.cmake) list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules") -cmake_policy(SET CMP0022 NEW) -cmake_policy(SET CMP0079 NEW) -cmake_policy(SET CMP0076 NEW) -set(CMAKE_POLICY_DEFAULT_CMP0063 NEW) -cmake_policy(SET CMP0063 NEW) -if((${CMAKE_VERSION} VERSION_GREATER 3.13) OR (${CMAKE_VERSION} VERSION_EQUAL 3.13)) - cmake_policy(SET CMP0077 NEW) -endif() +# These policies are already set from 3.18 but we still need to set the policy +# default variables here for lower minimum versions in the submodules +set(CMAKE_POLICY_DEFAULT_CMP0063 NEW) +set(CMAKE_POLICY_DEFAULT_CMP0069 NEW) +set(CMAKE_POLICY_DEFAULT_CMP0076 NEW) +set(CMAKE_POLICY_DEFAULT_CMP0077 NEW) +set(CMAKE_POLICY_DEFAULT_CMP0079 NEW) message(STATUS "CMake version ${CMAKE_VERSION}") @@ -41,6 +40,8 @@ write_version() set_default_configuration_release() #-- Options +include(CMakeDependentOption) + ## User options option(BUILD_C_DOC "Build documentation for C APIs using Doxygen." OFF) option(USE_OPENMP "Build with OpenMP support." ON) @@ -69,8 +70,24 @@ option(USE_CUDA "Build with GPU acceleration" OFF) option(USE_PER_THREAD_DEFAULT_STREAM "Build with per-thread default stream" ON) option(USE_NCCL "Build with NCCL to enable distributed GPU support." OFF) option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF) -set(GPU_COMPUTE_VER "" CACHE STRING - "Semicolon separated list of compute versions to be built against, e.g. '35;61'") +if(USE_CUDA) + if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES AND NOT DEFINED ENV{CUDAARCHS}) + set(GPU_COMPUTE_VER "" CACHE STRING + "Semicolon separated list of compute versions to be built against, e.g. '35;61'") + else() + # Clear any cached values from previous runs + unset(GPU_COMPUTE_VER) + unset(GPU_COMPUTE_VER CACHE) + endif() +endif() +# CUDA device LTO was introduced in CMake v3.25 and requires host LTO to also be enabled but can still +# be explicitly disabled allowing for LTO on host only, host and device, or neither, but device-only LTO +# is not a supproted configuration +cmake_dependent_option(USE_CUDA_LTO + "Enable link-time optimization for CUDA device code" + "${CMAKE_INTERPROCEDURAL_OPTIMIZATION}" + "CMAKE_VERSION VERSION_GREATER_EQUAL 3.25;USE_CUDA;CMAKE_INTERPROCEDURAL_OPTIMIZATION" + OFF) ## Sanitizers option(USE_SANITIZER "Use santizer flags" OFF) option(SANITIZER_PATH "Path to sanitizes.") @@ -168,15 +185,24 @@ endif() if(USE_CUDA) set(USE_OPENMP ON CACHE BOOL "CUDA requires OpenMP" FORCE) # `export CXX=' is ignored by CMake CUDA. - set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER}) - message(STATUS "Configured CUDA host compiler: ${CMAKE_CUDA_HOST_COMPILER}") + if(NOT DEFINED CMAKE_CUDA_HOST_COMPILER AND NOT DEFINED ENV{CUDAHOSTCXX}) + set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} CACHE FILEPATH + "The compiler executable to use when compiling host code for CUDA or HIP language files.") + mark_as_advanced(CMAKE_CUDA_HOST_COMPILER) + message(STATUS "Configured CUDA host compiler: ${CMAKE_CUDA_HOST_COMPILER}") + endif() + + if(NOT DEFINED CMAKE_CUDA_RUNTIME_LIBRARY) + set(CMAKE_CUDA_RUNTIME_LIBRARY Static) + endif() enable_language(CUDA) if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 11.0) message(FATAL_ERROR "CUDA version must be at least 11.0!") endif() - set(GEN_CODE "") - format_gencode_flags("${GPU_COMPUTE_VER}" GEN_CODE) + if(DEFINED GPU_COMPUTE_VER) + compute_cmake_cuda_archs("${GPU_COMPUTE_VER}") + endif() add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap) find_package(CUDAToolkit REQUIRED) diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake index 586e32ee5..eafd829fc 100644 --- a/cmake/Utils.cmake +++ b/cmake/Utils.cmake @@ -82,46 +82,35 @@ function(set_default_configuration_release) endif() endfunction() -# Generate nvcc compiler flags given a list of architectures +# Generate CMAKE_CUDA_ARCHITECTURES form a list of architectures # Also generates PTX for the most recent architecture for forwards compatibility -function(format_gencode_flags flags out) +function(compute_cmake_cuda_archs archs) if(CMAKE_CUDA_COMPILER_VERSION MATCHES "^([0-9]+\\.[0-9]+)") set(CUDA_VERSION "${CMAKE_MATCH_1}") endif() - # Set up architecture flags - if(NOT flags) + list(SORT archs) + unset(CMAKE_CUDA_ARCHITECTURES CACHE) + set(CMAKE_CUDA_ARCHITECTURES ${archs}) + + # Set up defaults based on CUDA varsion + if(NOT CMAKE_CUDA_ARCHITECTURES) if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8") - set(flags "50;60;70;80;90") + set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80 90) elseif(CUDA_VERSION VERSION_GREATER_EQUAL "11.0") - set(flags "50;60;70;80") + set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80) elseif(CUDA_VERSION VERSION_GREATER_EQUAL "10.0") - set(flags "35;50;60;70") + set(CMAKE_CUDA_ARCHITECTURES 35 50 60 70) elseif(CUDA_VERSION VERSION_GREATER_EQUAL "9.0") - set(flags "35;50;60;70") + set(CMAKE_CUDA_ARCHITECTURES 35 50 60 70) else() - set(flags "35;50;60") + set(CMAKE_CUDA_ARCHITECTURES 35 50 60) endif() endif() - if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.18") - cmake_policy(SET CMP0104 NEW) - list(GET flags -1 latest_arch) - list(TRANSFORM flags APPEND "-real") - list(APPEND flags ${latest_arch}) - set(CMAKE_CUDA_ARCHITECTURES ${flags}) - set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" PARENT_SCOPE) - message(STATUS "CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}") - else() - # Generate SASS - foreach(ver ${flags}) - set(${out} "${${out}}--generate-code=arch=compute_${ver},code=sm_${ver};") - endforeach() - # Generate PTX for last architecture - list(GET flags -1 ver) - set(${out} "${${out}}--generate-code=arch=compute_${ver},code=compute_${ver};") - set(${out} "${${out}}" PARENT_SCOPE) - message(STATUS "CUDA GEN_CODE: ${GEN_CODE}") - endif() + list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real") + list(TRANSFORM CMAKE_CUDA_ARCHITECTURES REPLACE "([0-9]+)-real" "\\0;\\1-virtual" AT -1) + set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" PARENT_SCOPE) + message(STATUS "CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}") endfunction() # Set CUDA related flags to target. Must be used after code `format_gencode_flags`. @@ -129,7 +118,6 @@ function(xgboost_set_cuda_flags target) target_compile_options(${target} PRIVATE $<$:--expt-extended-lambda> $<$:--expt-relaxed-constexpr> - $<$:${GEN_CODE}> $<$:-Xcompiler=${OpenMP_CXX_FLAGS}> $<$:-Xfatbin=-compress-all>) @@ -138,10 +126,6 @@ function(xgboost_set_cuda_flags target) $<$:--default-stream per-thread>) endif() - if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.18") - set_property(TARGET ${target} PROPERTY CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES}) - endif() - if(FORCE_COLORED_OUTPUT) if(FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND ((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR @@ -176,9 +160,15 @@ function(xgboost_set_cuda_flags target) set_target_properties(${target} PROPERTIES CUDA_STANDARD 17 - CUDA_STANDARD_REQUIRED ON - CUDA_SEPARABLE_COMPILATION OFF - CUDA_RUNTIME_LIBRARY Static) + CUDA_STANDARD_REQUIRED ON) + if(USE_CUDA_LTO) + set_target_properties(${target} PROPERTIES + INTERPROCEDURAL_OPTIMIZATION ON + CUDA_SEPARABLE_COMPILATION ON) + else() + set_target_properties(${target} PROPERTIES + CUDA_SEPARABLE_COMPILATION OFF) + endif() endfunction() macro(xgboost_link_nccl target) diff --git a/tests/ci_build/prune_libnccl.sh b/tests/ci_build/prune_libnccl.sh index a81d6e4ac..c5a0d8123 100755 --- a/tests/ci_build/prune_libnccl.sh +++ b/tests/ci_build/prune_libnccl.sh @@ -19,10 +19,8 @@ cmake_policy(SET CMP0104 NEW) set(CMAKE_CUDA_HOST_COMPILER \${CMAKE_CXX_COMPILER}) enable_language(CUDA) include(../cmake/Utils.cmake) -set(GEN_CODE "") -format_gencode_flags("" GEN_CODE) +compute_cmake_cuda_archs("") add_library(test OBJECT test.cu) -set_property(TARGET test PROPERTY CUDA_ARCHITECTURES \${CMAKE_CUDA_ARCHITECTURES}) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) EOF