CMake LTO and CUDA arch (#9677)
This commit is contained in:
parent
3b86260b50
commit
83cdf14b2c
@ -2,15 +2,14 @@ cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
|
|||||||
project(xgboost LANGUAGES CXX C VERSION 2.1.0)
|
project(xgboost LANGUAGES CXX C VERSION 2.1.0)
|
||||||
include(cmake/Utils.cmake)
|
include(cmake/Utils.cmake)
|
||||||
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
||||||
cmake_policy(SET CMP0022 NEW)
|
|
||||||
cmake_policy(SET CMP0079 NEW)
|
|
||||||
cmake_policy(SET CMP0076 NEW)
|
|
||||||
set(CMAKE_POLICY_DEFAULT_CMP0063 NEW)
|
|
||||||
cmake_policy(SET CMP0063 NEW)
|
|
||||||
|
|
||||||
if((${CMAKE_VERSION} VERSION_GREATER 3.13) OR (${CMAKE_VERSION} VERSION_EQUAL 3.13))
|
# These policies are already set from 3.18 but we still need to set the policy
|
||||||
cmake_policy(SET CMP0077 NEW)
|
# default variables here for lower minimum versions in the submodules
|
||||||
endif()
|
set(CMAKE_POLICY_DEFAULT_CMP0063 NEW)
|
||||||
|
set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
|
||||||
|
set(CMAKE_POLICY_DEFAULT_CMP0076 NEW)
|
||||||
|
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
|
||||||
|
set(CMAKE_POLICY_DEFAULT_CMP0079 NEW)
|
||||||
|
|
||||||
message(STATUS "CMake version ${CMAKE_VERSION}")
|
message(STATUS "CMake version ${CMAKE_VERSION}")
|
||||||
|
|
||||||
@ -41,6 +40,8 @@ write_version()
|
|||||||
set_default_configuration_release()
|
set_default_configuration_release()
|
||||||
|
|
||||||
#-- Options
|
#-- Options
|
||||||
|
include(CMakeDependentOption)
|
||||||
|
|
||||||
## User options
|
## User options
|
||||||
option(BUILD_C_DOC "Build documentation for C APIs using Doxygen." OFF)
|
option(BUILD_C_DOC "Build documentation for C APIs using Doxygen." OFF)
|
||||||
option(USE_OPENMP "Build with OpenMP support." ON)
|
option(USE_OPENMP "Build with OpenMP support." ON)
|
||||||
@ -69,8 +70,24 @@ option(USE_CUDA "Build with GPU acceleration" OFF)
|
|||||||
option(USE_PER_THREAD_DEFAULT_STREAM "Build with per-thread default stream" ON)
|
option(USE_PER_THREAD_DEFAULT_STREAM "Build with per-thread default stream" ON)
|
||||||
option(USE_NCCL "Build with NCCL to enable distributed GPU support." OFF)
|
option(USE_NCCL "Build with NCCL to enable distributed GPU support." OFF)
|
||||||
option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF)
|
option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF)
|
||||||
set(GPU_COMPUTE_VER "" CACHE STRING
|
if(USE_CUDA)
|
||||||
"Semicolon separated list of compute versions to be built against, e.g. '35;61'")
|
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES AND NOT DEFINED ENV{CUDAARCHS})
|
||||||
|
set(GPU_COMPUTE_VER "" CACHE STRING
|
||||||
|
"Semicolon separated list of compute versions to be built against, e.g. '35;61'")
|
||||||
|
else()
|
||||||
|
# Clear any cached values from previous runs
|
||||||
|
unset(GPU_COMPUTE_VER)
|
||||||
|
unset(GPU_COMPUTE_VER CACHE)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
# CUDA device LTO was introduced in CMake v3.25 and requires host LTO to also be enabled but can still
|
||||||
|
# be explicitly disabled allowing for LTO on host only, host and device, or neither, but device-only LTO
|
||||||
|
# is not a supproted configuration
|
||||||
|
cmake_dependent_option(USE_CUDA_LTO
|
||||||
|
"Enable link-time optimization for CUDA device code"
|
||||||
|
"${CMAKE_INTERPROCEDURAL_OPTIMIZATION}"
|
||||||
|
"CMAKE_VERSION VERSION_GREATER_EQUAL 3.25;USE_CUDA;CMAKE_INTERPROCEDURAL_OPTIMIZATION"
|
||||||
|
OFF)
|
||||||
## Sanitizers
|
## Sanitizers
|
||||||
option(USE_SANITIZER "Use santizer flags" OFF)
|
option(USE_SANITIZER "Use santizer flags" OFF)
|
||||||
option(SANITIZER_PATH "Path to sanitizes.")
|
option(SANITIZER_PATH "Path to sanitizes.")
|
||||||
@ -168,15 +185,24 @@ endif()
|
|||||||
if(USE_CUDA)
|
if(USE_CUDA)
|
||||||
set(USE_OPENMP ON CACHE BOOL "CUDA requires OpenMP" FORCE)
|
set(USE_OPENMP ON CACHE BOOL "CUDA requires OpenMP" FORCE)
|
||||||
# `export CXX=' is ignored by CMake CUDA.
|
# `export CXX=' is ignored by CMake CUDA.
|
||||||
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
|
if(NOT DEFINED CMAKE_CUDA_HOST_COMPILER AND NOT DEFINED ENV{CUDAHOSTCXX})
|
||||||
message(STATUS "Configured CUDA host compiler: ${CMAKE_CUDA_HOST_COMPILER}")
|
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} CACHE FILEPATH
|
||||||
|
"The compiler executable to use when compiling host code for CUDA or HIP language files.")
|
||||||
|
mark_as_advanced(CMAKE_CUDA_HOST_COMPILER)
|
||||||
|
message(STATUS "Configured CUDA host compiler: ${CMAKE_CUDA_HOST_COMPILER}")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(NOT DEFINED CMAKE_CUDA_RUNTIME_LIBRARY)
|
||||||
|
set(CMAKE_CUDA_RUNTIME_LIBRARY Static)
|
||||||
|
endif()
|
||||||
|
|
||||||
enable_language(CUDA)
|
enable_language(CUDA)
|
||||||
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 11.0)
|
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 11.0)
|
||||||
message(FATAL_ERROR "CUDA version must be at least 11.0!")
|
message(FATAL_ERROR "CUDA version must be at least 11.0!")
|
||||||
endif()
|
endif()
|
||||||
set(GEN_CODE "")
|
if(DEFINED GPU_COMPUTE_VER)
|
||||||
format_gencode_flags("${GPU_COMPUTE_VER}" GEN_CODE)
|
compute_cmake_cuda_archs("${GPU_COMPUTE_VER}")
|
||||||
|
endif()
|
||||||
add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap)
|
add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap)
|
||||||
|
|
||||||
find_package(CUDAToolkit REQUIRED)
|
find_package(CUDAToolkit REQUIRED)
|
||||||
|
|||||||
@ -82,46 +82,35 @@ function(set_default_configuration_release)
|
|||||||
endif()
|
endif()
|
||||||
endfunction()
|
endfunction()
|
||||||
|
|
||||||
# Generate nvcc compiler flags given a list of architectures
|
# Generate CMAKE_CUDA_ARCHITECTURES form a list of architectures
|
||||||
# Also generates PTX for the most recent architecture for forwards compatibility
|
# Also generates PTX for the most recent architecture for forwards compatibility
|
||||||
function(format_gencode_flags flags out)
|
function(compute_cmake_cuda_archs archs)
|
||||||
if(CMAKE_CUDA_COMPILER_VERSION MATCHES "^([0-9]+\\.[0-9]+)")
|
if(CMAKE_CUDA_COMPILER_VERSION MATCHES "^([0-9]+\\.[0-9]+)")
|
||||||
set(CUDA_VERSION "${CMAKE_MATCH_1}")
|
set(CUDA_VERSION "${CMAKE_MATCH_1}")
|
||||||
endif()
|
endif()
|
||||||
# Set up architecture flags
|
list(SORT archs)
|
||||||
if(NOT flags)
|
unset(CMAKE_CUDA_ARCHITECTURES CACHE)
|
||||||
|
set(CMAKE_CUDA_ARCHITECTURES ${archs})
|
||||||
|
|
||||||
|
# Set up defaults based on CUDA varsion
|
||||||
|
if(NOT CMAKE_CUDA_ARCHITECTURES)
|
||||||
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8")
|
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8")
|
||||||
set(flags "50;60;70;80;90")
|
set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80 90)
|
||||||
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
|
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
|
||||||
set(flags "50;60;70;80")
|
set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80)
|
||||||
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
|
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
|
||||||
set(flags "35;50;60;70")
|
set(CMAKE_CUDA_ARCHITECTURES 35 50 60 70)
|
||||||
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "9.0")
|
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "9.0")
|
||||||
set(flags "35;50;60;70")
|
set(CMAKE_CUDA_ARCHITECTURES 35 50 60 70)
|
||||||
else()
|
else()
|
||||||
set(flags "35;50;60")
|
set(CMAKE_CUDA_ARCHITECTURES 35 50 60)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
|
list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real")
|
||||||
cmake_policy(SET CMP0104 NEW)
|
list(TRANSFORM CMAKE_CUDA_ARCHITECTURES REPLACE "([0-9]+)-real" "\\0;\\1-virtual" AT -1)
|
||||||
list(GET flags -1 latest_arch)
|
set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" PARENT_SCOPE)
|
||||||
list(TRANSFORM flags APPEND "-real")
|
message(STATUS "CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}")
|
||||||
list(APPEND flags ${latest_arch})
|
|
||||||
set(CMAKE_CUDA_ARCHITECTURES ${flags})
|
|
||||||
set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" PARENT_SCOPE)
|
|
||||||
message(STATUS "CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}")
|
|
||||||
else()
|
|
||||||
# Generate SASS
|
|
||||||
foreach(ver ${flags})
|
|
||||||
set(${out} "${${out}}--generate-code=arch=compute_${ver},code=sm_${ver};")
|
|
||||||
endforeach()
|
|
||||||
# Generate PTX for last architecture
|
|
||||||
list(GET flags -1 ver)
|
|
||||||
set(${out} "${${out}}--generate-code=arch=compute_${ver},code=compute_${ver};")
|
|
||||||
set(${out} "${${out}}" PARENT_SCOPE)
|
|
||||||
message(STATUS "CUDA GEN_CODE: ${GEN_CODE}")
|
|
||||||
endif()
|
|
||||||
endfunction()
|
endfunction()
|
||||||
|
|
||||||
# Set CUDA related flags to target. Must be used after code `format_gencode_flags`.
|
# Set CUDA related flags to target. Must be used after code `format_gencode_flags`.
|
||||||
@ -129,7 +118,6 @@ function(xgboost_set_cuda_flags target)
|
|||||||
target_compile_options(${target} PRIVATE
|
target_compile_options(${target} PRIVATE
|
||||||
$<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda>
|
$<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda>
|
||||||
$<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>
|
$<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>
|
||||||
$<$<COMPILE_LANGUAGE:CUDA>:${GEN_CODE}>
|
|
||||||
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=${OpenMP_CXX_FLAGS}>
|
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=${OpenMP_CXX_FLAGS}>
|
||||||
$<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all>)
|
$<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all>)
|
||||||
|
|
||||||
@ -138,10 +126,6 @@ function(xgboost_set_cuda_flags target)
|
|||||||
$<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread>)
|
$<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread>)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
|
|
||||||
set_property(TARGET ${target} PROPERTY CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES})
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(FORCE_COLORED_OUTPUT)
|
if(FORCE_COLORED_OUTPUT)
|
||||||
if(FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
|
if(FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
|
||||||
((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR
|
((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR
|
||||||
@ -176,9 +160,15 @@ function(xgboost_set_cuda_flags target)
|
|||||||
|
|
||||||
set_target_properties(${target} PROPERTIES
|
set_target_properties(${target} PROPERTIES
|
||||||
CUDA_STANDARD 17
|
CUDA_STANDARD 17
|
||||||
CUDA_STANDARD_REQUIRED ON
|
CUDA_STANDARD_REQUIRED ON)
|
||||||
CUDA_SEPARABLE_COMPILATION OFF
|
if(USE_CUDA_LTO)
|
||||||
CUDA_RUNTIME_LIBRARY Static)
|
set_target_properties(${target} PROPERTIES
|
||||||
|
INTERPROCEDURAL_OPTIMIZATION ON
|
||||||
|
CUDA_SEPARABLE_COMPILATION ON)
|
||||||
|
else()
|
||||||
|
set_target_properties(${target} PROPERTIES
|
||||||
|
CUDA_SEPARABLE_COMPILATION OFF)
|
||||||
|
endif()
|
||||||
endfunction()
|
endfunction()
|
||||||
|
|
||||||
macro(xgboost_link_nccl target)
|
macro(xgboost_link_nccl target)
|
||||||
|
|||||||
@ -19,10 +19,8 @@ cmake_policy(SET CMP0104 NEW)
|
|||||||
set(CMAKE_CUDA_HOST_COMPILER \${CMAKE_CXX_COMPILER})
|
set(CMAKE_CUDA_HOST_COMPILER \${CMAKE_CXX_COMPILER})
|
||||||
enable_language(CUDA)
|
enable_language(CUDA)
|
||||||
include(../cmake/Utils.cmake)
|
include(../cmake/Utils.cmake)
|
||||||
set(GEN_CODE "")
|
compute_cmake_cuda_archs("")
|
||||||
format_gencode_flags("" GEN_CODE)
|
|
||||||
add_library(test OBJECT test.cu)
|
add_library(test OBJECT test.cu)
|
||||||
set_property(TARGET test PROPERTY CUDA_ARCHITECTURES \${CMAKE_CUDA_ARCHITECTURES})
|
|
||||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user