CMake LTO and CUDA arch (#9677)

This commit is contained in:
Chuck Atkins 2023-10-20 01:01:37 -04:00 committed by GitHub
parent 3b86260b50
commit 83cdf14b2c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 67 additions and 53 deletions

View File

@ -2,15 +2,14 @@ cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
project(xgboost LANGUAGES CXX C VERSION 2.1.0) project(xgboost LANGUAGES CXX C VERSION 2.1.0)
include(cmake/Utils.cmake) include(cmake/Utils.cmake)
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules") list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
cmake_policy(SET CMP0022 NEW)
cmake_policy(SET CMP0079 NEW)
cmake_policy(SET CMP0076 NEW)
set(CMAKE_POLICY_DEFAULT_CMP0063 NEW)
cmake_policy(SET CMP0063 NEW)
if((${CMAKE_VERSION} VERSION_GREATER 3.13) OR (${CMAKE_VERSION} VERSION_EQUAL 3.13)) # These policies are already set from 3.18 but we still need to set the policy
cmake_policy(SET CMP0077 NEW) # default variables here for lower minimum versions in the submodules
endif() set(CMAKE_POLICY_DEFAULT_CMP0063 NEW)
set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
set(CMAKE_POLICY_DEFAULT_CMP0076 NEW)
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
set(CMAKE_POLICY_DEFAULT_CMP0079 NEW)
message(STATUS "CMake version ${CMAKE_VERSION}") message(STATUS "CMake version ${CMAKE_VERSION}")
@ -41,6 +40,8 @@ write_version()
set_default_configuration_release() set_default_configuration_release()
#-- Options #-- Options
include(CMakeDependentOption)
## User options ## User options
option(BUILD_C_DOC "Build documentation for C APIs using Doxygen." OFF) option(BUILD_C_DOC "Build documentation for C APIs using Doxygen." OFF)
option(USE_OPENMP "Build with OpenMP support." ON) option(USE_OPENMP "Build with OpenMP support." ON)
@ -69,8 +70,24 @@ option(USE_CUDA "Build with GPU acceleration" OFF)
option(USE_PER_THREAD_DEFAULT_STREAM "Build with per-thread default stream" ON) option(USE_PER_THREAD_DEFAULT_STREAM "Build with per-thread default stream" ON)
option(USE_NCCL "Build with NCCL to enable distributed GPU support." OFF) option(USE_NCCL "Build with NCCL to enable distributed GPU support." OFF)
option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF) option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF)
if(USE_CUDA)
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES AND NOT DEFINED ENV{CUDAARCHS})
set(GPU_COMPUTE_VER "" CACHE STRING set(GPU_COMPUTE_VER "" CACHE STRING
"Semicolon separated list of compute versions to be built against, e.g. '35;61'") "Semicolon separated list of compute versions to be built against, e.g. '35;61'")
else()
# Clear any cached values from previous runs
unset(GPU_COMPUTE_VER)
unset(GPU_COMPUTE_VER CACHE)
endif()
endif()
# CUDA device LTO was introduced in CMake v3.25 and requires host LTO to also be enabled but can still
# be explicitly disabled allowing for LTO on host only, host and device, or neither, but device-only LTO
# is not a supproted configuration
cmake_dependent_option(USE_CUDA_LTO
"Enable link-time optimization for CUDA device code"
"${CMAKE_INTERPROCEDURAL_OPTIMIZATION}"
"CMAKE_VERSION VERSION_GREATER_EQUAL 3.25;USE_CUDA;CMAKE_INTERPROCEDURAL_OPTIMIZATION"
OFF)
## Sanitizers ## Sanitizers
option(USE_SANITIZER "Use santizer flags" OFF) option(USE_SANITIZER "Use santizer flags" OFF)
option(SANITIZER_PATH "Path to sanitizes.") option(SANITIZER_PATH "Path to sanitizes.")
@ -168,15 +185,24 @@ endif()
if(USE_CUDA) if(USE_CUDA)
set(USE_OPENMP ON CACHE BOOL "CUDA requires OpenMP" FORCE) set(USE_OPENMP ON CACHE BOOL "CUDA requires OpenMP" FORCE)
# `export CXX=' is ignored by CMake CUDA. # `export CXX=' is ignored by CMake CUDA.
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER}) if(NOT DEFINED CMAKE_CUDA_HOST_COMPILER AND NOT DEFINED ENV{CUDAHOSTCXX})
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} CACHE FILEPATH
"The compiler executable to use when compiling host code for CUDA or HIP language files.")
mark_as_advanced(CMAKE_CUDA_HOST_COMPILER)
message(STATUS "Configured CUDA host compiler: ${CMAKE_CUDA_HOST_COMPILER}") message(STATUS "Configured CUDA host compiler: ${CMAKE_CUDA_HOST_COMPILER}")
endif()
if(NOT DEFINED CMAKE_CUDA_RUNTIME_LIBRARY)
set(CMAKE_CUDA_RUNTIME_LIBRARY Static)
endif()
enable_language(CUDA) enable_language(CUDA)
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 11.0) if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 11.0)
message(FATAL_ERROR "CUDA version must be at least 11.0!") message(FATAL_ERROR "CUDA version must be at least 11.0!")
endif() endif()
set(GEN_CODE "") if(DEFINED GPU_COMPUTE_VER)
format_gencode_flags("${GPU_COMPUTE_VER}" GEN_CODE) compute_cmake_cuda_archs("${GPU_COMPUTE_VER}")
endif()
add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap) add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap)
find_package(CUDAToolkit REQUIRED) find_package(CUDAToolkit REQUIRED)

View File

@ -82,46 +82,35 @@ function(set_default_configuration_release)
endif() endif()
endfunction() endfunction()
# Generate nvcc compiler flags given a list of architectures # Generate CMAKE_CUDA_ARCHITECTURES form a list of architectures
# Also generates PTX for the most recent architecture for forwards compatibility # Also generates PTX for the most recent architecture for forwards compatibility
function(format_gencode_flags flags out) function(compute_cmake_cuda_archs archs)
if(CMAKE_CUDA_COMPILER_VERSION MATCHES "^([0-9]+\\.[0-9]+)") if(CMAKE_CUDA_COMPILER_VERSION MATCHES "^([0-9]+\\.[0-9]+)")
set(CUDA_VERSION "${CMAKE_MATCH_1}") set(CUDA_VERSION "${CMAKE_MATCH_1}")
endif() endif()
# Set up architecture flags list(SORT archs)
if(NOT flags) unset(CMAKE_CUDA_ARCHITECTURES CACHE)
set(CMAKE_CUDA_ARCHITECTURES ${archs})
# Set up defaults based on CUDA varsion
if(NOT CMAKE_CUDA_ARCHITECTURES)
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8") if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8")
set(flags "50;60;70;80;90") set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80 90)
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "11.0") elseif(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
set(flags "50;60;70;80") set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80)
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "10.0") elseif(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
set(flags "35;50;60;70") set(CMAKE_CUDA_ARCHITECTURES 35 50 60 70)
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "9.0") elseif(CUDA_VERSION VERSION_GREATER_EQUAL "9.0")
set(flags "35;50;60;70") set(CMAKE_CUDA_ARCHITECTURES 35 50 60 70)
else() else()
set(flags "35;50;60") set(CMAKE_CUDA_ARCHITECTURES 35 50 60)
endif() endif()
endif() endif()
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.18") list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real")
cmake_policy(SET CMP0104 NEW) list(TRANSFORM CMAKE_CUDA_ARCHITECTURES REPLACE "([0-9]+)-real" "\\0;\\1-virtual" AT -1)
list(GET flags -1 latest_arch)
list(TRANSFORM flags APPEND "-real")
list(APPEND flags ${latest_arch})
set(CMAKE_CUDA_ARCHITECTURES ${flags})
set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" PARENT_SCOPE) set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" PARENT_SCOPE)
message(STATUS "CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}") message(STATUS "CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}")
else()
# Generate SASS
foreach(ver ${flags})
set(${out} "${${out}}--generate-code=arch=compute_${ver},code=sm_${ver};")
endforeach()
# Generate PTX for last architecture
list(GET flags -1 ver)
set(${out} "${${out}}--generate-code=arch=compute_${ver},code=compute_${ver};")
set(${out} "${${out}}" PARENT_SCOPE)
message(STATUS "CUDA GEN_CODE: ${GEN_CODE}")
endif()
endfunction() endfunction()
# Set CUDA related flags to target. Must be used after code `format_gencode_flags`. # Set CUDA related flags to target. Must be used after code `format_gencode_flags`.
@ -129,7 +118,6 @@ function(xgboost_set_cuda_flags target)
target_compile_options(${target} PRIVATE target_compile_options(${target} PRIVATE
$<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda> $<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda>
$<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr> $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>
$<$<COMPILE_LANGUAGE:CUDA>:${GEN_CODE}>
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=${OpenMP_CXX_FLAGS}> $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=${OpenMP_CXX_FLAGS}>
$<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all>) $<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all>)
@ -138,10 +126,6 @@ function(xgboost_set_cuda_flags target)
$<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread>) $<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread>)
endif() endif()
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
set_property(TARGET ${target} PROPERTY CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES})
endif()
if(FORCE_COLORED_OUTPUT) if(FORCE_COLORED_OUTPUT)
if(FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND if(FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR ((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR
@ -176,9 +160,15 @@ function(xgboost_set_cuda_flags target)
set_target_properties(${target} PROPERTIES set_target_properties(${target} PROPERTIES
CUDA_STANDARD 17 CUDA_STANDARD 17
CUDA_STANDARD_REQUIRED ON CUDA_STANDARD_REQUIRED ON)
CUDA_SEPARABLE_COMPILATION OFF if(USE_CUDA_LTO)
CUDA_RUNTIME_LIBRARY Static) set_target_properties(${target} PROPERTIES
INTERPROCEDURAL_OPTIMIZATION ON
CUDA_SEPARABLE_COMPILATION ON)
else()
set_target_properties(${target} PROPERTIES
CUDA_SEPARABLE_COMPILATION OFF)
endif()
endfunction() endfunction()
macro(xgboost_link_nccl target) macro(xgboost_link_nccl target)

View File

@ -19,10 +19,8 @@ cmake_policy(SET CMP0104 NEW)
set(CMAKE_CUDA_HOST_COMPILER \${CMAKE_CXX_COMPILER}) set(CMAKE_CUDA_HOST_COMPILER \${CMAKE_CXX_COMPILER})
enable_language(CUDA) enable_language(CUDA)
include(../cmake/Utils.cmake) include(../cmake/Utils.cmake)
set(GEN_CODE "") compute_cmake_cuda_archs("")
format_gencode_flags("" GEN_CODE)
add_library(test OBJECT test.cu) add_library(test OBJECT test.cu)
set_property(TARGET test PROPERTY CUDA_ARCHITECTURES \${CMAKE_CUDA_ARCHITECTURES})
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
EOF EOF