CMake LTO and CUDA arch (#9677)
This commit is contained in:
parent
3b86260b50
commit
83cdf14b2c
@ -2,15 +2,14 @@ cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
|
||||
project(xgboost LANGUAGES CXX C VERSION 2.1.0)
|
||||
include(cmake/Utils.cmake)
|
||||
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
||||
cmake_policy(SET CMP0022 NEW)
|
||||
cmake_policy(SET CMP0079 NEW)
|
||||
cmake_policy(SET CMP0076 NEW)
|
||||
set(CMAKE_POLICY_DEFAULT_CMP0063 NEW)
|
||||
cmake_policy(SET CMP0063 NEW)
|
||||
|
||||
if((${CMAKE_VERSION} VERSION_GREATER 3.13) OR (${CMAKE_VERSION} VERSION_EQUAL 3.13))
|
||||
cmake_policy(SET CMP0077 NEW)
|
||||
endif()
|
||||
# These policies are already set from 3.18 but we still need to set the policy
|
||||
# default variables here for lower minimum versions in the submodules
|
||||
set(CMAKE_POLICY_DEFAULT_CMP0063 NEW)
|
||||
set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
|
||||
set(CMAKE_POLICY_DEFAULT_CMP0076 NEW)
|
||||
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
|
||||
set(CMAKE_POLICY_DEFAULT_CMP0079 NEW)
|
||||
|
||||
message(STATUS "CMake version ${CMAKE_VERSION}")
|
||||
|
||||
@ -41,6 +40,8 @@ write_version()
|
||||
set_default_configuration_release()
|
||||
|
||||
#-- Options
|
||||
include(CMakeDependentOption)
|
||||
|
||||
## User options
|
||||
option(BUILD_C_DOC "Build documentation for C APIs using Doxygen." OFF)
|
||||
option(USE_OPENMP "Build with OpenMP support." ON)
|
||||
@ -69,8 +70,24 @@ option(USE_CUDA "Build with GPU acceleration" OFF)
|
||||
option(USE_PER_THREAD_DEFAULT_STREAM "Build with per-thread default stream" ON)
|
||||
option(USE_NCCL "Build with NCCL to enable distributed GPU support." OFF)
|
||||
option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF)
|
||||
set(GPU_COMPUTE_VER "" CACHE STRING
|
||||
if(USE_CUDA)
|
||||
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES AND NOT DEFINED ENV{CUDAARCHS})
|
||||
set(GPU_COMPUTE_VER "" CACHE STRING
|
||||
"Semicolon separated list of compute versions to be built against, e.g. '35;61'")
|
||||
else()
|
||||
# Clear any cached values from previous runs
|
||||
unset(GPU_COMPUTE_VER)
|
||||
unset(GPU_COMPUTE_VER CACHE)
|
||||
endif()
|
||||
endif()
|
||||
# CUDA device LTO was introduced in CMake v3.25 and requires host LTO to also be enabled but can still
|
||||
# be explicitly disabled allowing for LTO on host only, host and device, or neither, but device-only LTO
|
||||
# is not a supproted configuration
|
||||
cmake_dependent_option(USE_CUDA_LTO
|
||||
"Enable link-time optimization for CUDA device code"
|
||||
"${CMAKE_INTERPROCEDURAL_OPTIMIZATION}"
|
||||
"CMAKE_VERSION VERSION_GREATER_EQUAL 3.25;USE_CUDA;CMAKE_INTERPROCEDURAL_OPTIMIZATION"
|
||||
OFF)
|
||||
## Sanitizers
|
||||
option(USE_SANITIZER "Use santizer flags" OFF)
|
||||
option(SANITIZER_PATH "Path to sanitizes.")
|
||||
@ -168,15 +185,24 @@ endif()
|
||||
if(USE_CUDA)
|
||||
set(USE_OPENMP ON CACHE BOOL "CUDA requires OpenMP" FORCE)
|
||||
# `export CXX=' is ignored by CMake CUDA.
|
||||
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
|
||||
if(NOT DEFINED CMAKE_CUDA_HOST_COMPILER AND NOT DEFINED ENV{CUDAHOSTCXX})
|
||||
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} CACHE FILEPATH
|
||||
"The compiler executable to use when compiling host code for CUDA or HIP language files.")
|
||||
mark_as_advanced(CMAKE_CUDA_HOST_COMPILER)
|
||||
message(STATUS "Configured CUDA host compiler: ${CMAKE_CUDA_HOST_COMPILER}")
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED CMAKE_CUDA_RUNTIME_LIBRARY)
|
||||
set(CMAKE_CUDA_RUNTIME_LIBRARY Static)
|
||||
endif()
|
||||
|
||||
enable_language(CUDA)
|
||||
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 11.0)
|
||||
message(FATAL_ERROR "CUDA version must be at least 11.0!")
|
||||
endif()
|
||||
set(GEN_CODE "")
|
||||
format_gencode_flags("${GPU_COMPUTE_VER}" GEN_CODE)
|
||||
if(DEFINED GPU_COMPUTE_VER)
|
||||
compute_cmake_cuda_archs("${GPU_COMPUTE_VER}")
|
||||
endif()
|
||||
add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap)
|
||||
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
@ -82,46 +82,35 @@ function(set_default_configuration_release)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
# Generate nvcc compiler flags given a list of architectures
|
||||
# Generate CMAKE_CUDA_ARCHITECTURES form a list of architectures
|
||||
# Also generates PTX for the most recent architecture for forwards compatibility
|
||||
function(format_gencode_flags flags out)
|
||||
function(compute_cmake_cuda_archs archs)
|
||||
if(CMAKE_CUDA_COMPILER_VERSION MATCHES "^([0-9]+\\.[0-9]+)")
|
||||
set(CUDA_VERSION "${CMAKE_MATCH_1}")
|
||||
endif()
|
||||
# Set up architecture flags
|
||||
if(NOT flags)
|
||||
list(SORT archs)
|
||||
unset(CMAKE_CUDA_ARCHITECTURES CACHE)
|
||||
set(CMAKE_CUDA_ARCHITECTURES ${archs})
|
||||
|
||||
# Set up defaults based on CUDA varsion
|
||||
if(NOT CMAKE_CUDA_ARCHITECTURES)
|
||||
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8")
|
||||
set(flags "50;60;70;80;90")
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80 90)
|
||||
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
|
||||
set(flags "50;60;70;80")
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80)
|
||||
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
|
||||
set(flags "35;50;60;70")
|
||||
set(CMAKE_CUDA_ARCHITECTURES 35 50 60 70)
|
||||
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "9.0")
|
||||
set(flags "35;50;60;70")
|
||||
set(CMAKE_CUDA_ARCHITECTURES 35 50 60 70)
|
||||
else()
|
||||
set(flags "35;50;60")
|
||||
set(CMAKE_CUDA_ARCHITECTURES 35 50 60)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
|
||||
cmake_policy(SET CMP0104 NEW)
|
||||
list(GET flags -1 latest_arch)
|
||||
list(TRANSFORM flags APPEND "-real")
|
||||
list(APPEND flags ${latest_arch})
|
||||
set(CMAKE_CUDA_ARCHITECTURES ${flags})
|
||||
list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real")
|
||||
list(TRANSFORM CMAKE_CUDA_ARCHITECTURES REPLACE "([0-9]+)-real" "\\0;\\1-virtual" AT -1)
|
||||
set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" PARENT_SCOPE)
|
||||
message(STATUS "CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}")
|
||||
else()
|
||||
# Generate SASS
|
||||
foreach(ver ${flags})
|
||||
set(${out} "${${out}}--generate-code=arch=compute_${ver},code=sm_${ver};")
|
||||
endforeach()
|
||||
# Generate PTX for last architecture
|
||||
list(GET flags -1 ver)
|
||||
set(${out} "${${out}}--generate-code=arch=compute_${ver},code=compute_${ver};")
|
||||
set(${out} "${${out}}" PARENT_SCOPE)
|
||||
message(STATUS "CUDA GEN_CODE: ${GEN_CODE}")
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
# Set CUDA related flags to target. Must be used after code `format_gencode_flags`.
|
||||
@ -129,7 +118,6 @@ function(xgboost_set_cuda_flags target)
|
||||
target_compile_options(${target} PRIVATE
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda>
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:${GEN_CODE}>
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=${OpenMP_CXX_FLAGS}>
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all>)
|
||||
|
||||
@ -138,10 +126,6 @@ function(xgboost_set_cuda_flags target)
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread>)
|
||||
endif()
|
||||
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
|
||||
set_property(TARGET ${target} PROPERTY CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES})
|
||||
endif()
|
||||
|
||||
if(FORCE_COLORED_OUTPUT)
|
||||
if(FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
|
||||
((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR
|
||||
@ -176,9 +160,15 @@ function(xgboost_set_cuda_flags target)
|
||||
|
||||
set_target_properties(${target} PROPERTIES
|
||||
CUDA_STANDARD 17
|
||||
CUDA_STANDARD_REQUIRED ON
|
||||
CUDA_SEPARABLE_COMPILATION OFF
|
||||
CUDA_RUNTIME_LIBRARY Static)
|
||||
CUDA_STANDARD_REQUIRED ON)
|
||||
if(USE_CUDA_LTO)
|
||||
set_target_properties(${target} PROPERTIES
|
||||
INTERPROCEDURAL_OPTIMIZATION ON
|
||||
CUDA_SEPARABLE_COMPILATION ON)
|
||||
else()
|
||||
set_target_properties(${target} PROPERTIES
|
||||
CUDA_SEPARABLE_COMPILATION OFF)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
macro(xgboost_link_nccl target)
|
||||
|
||||
@ -19,10 +19,8 @@ cmake_policy(SET CMP0104 NEW)
|
||||
set(CMAKE_CUDA_HOST_COMPILER \${CMAKE_CXX_COMPILER})
|
||||
enable_language(CUDA)
|
||||
include(../cmake/Utils.cmake)
|
||||
set(GEN_CODE "")
|
||||
format_gencode_flags("" GEN_CODE)
|
||||
compute_cmake_cuda_archs("")
|
||||
add_library(test OBJECT test.cu)
|
||||
set_property(TARGET test PROPERTY CUDA_ARCHITECTURES \${CMAKE_CUDA_ARCHITECTURES})
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
EOF
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user