[GPU-Plugin] Change GPU plugin to use tree_method parameter, bump cmake version to 3.5 for GPU plugin, add compute architecture 3.5, remove unused cmake files (#2455)
This commit is contained in:
parent
88488fdbb9
commit
48f3003302
@ -3,9 +3,10 @@ project (xgboost)
|
||||
find_package(OpenMP)
|
||||
|
||||
option(PLUGIN_UPDATER_GPU "Build GPU accelerated tree construction plugin")
|
||||
set(GPU_COMPUTE_VER 50;52;60;61 CACHE STRING
|
||||
set(GPU_COMPUTE_VER 35;50;52;60;61 CACHE STRING
|
||||
"Space separated list of compute versions to be built against")
|
||||
if(PLUGIN_UPDATER_GPU)
|
||||
cmake_minimum_required (VERSION 3.5)
|
||||
find_package(CUDA REQUIRED)
|
||||
endif()
|
||||
|
||||
|
||||
289
cmake/Cuda.cmake
289
cmake/Cuda.cmake
@ -1,289 +0,0 @@
|
||||
|
||||
include(CheckCXXCompilerFlag)
|
||||
check_cxx_compiler_flag("-std=c++11" SUPPORT_CXX11)
|
||||
|
||||
################################################################################################
|
||||
# A function for automatic detection of GPUs installed (if autodetection is enabled)
|
||||
# Usage:
|
||||
# mshadow_detect_installed_gpus(out_variable)
|
||||
function(xgboost_detect_installed_gpus out_variable)
|
||||
set(CUDA_gpu_detect_output "")
|
||||
if(NOT CUDA_gpu_detect_output)
|
||||
set(__cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu)
|
||||
|
||||
file(WRITE ${__cufile} ""
|
||||
"#include <cstdio>\n"
|
||||
"int main()\n"
|
||||
"{\n"
|
||||
" int count = 0;\n"
|
||||
" if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n"
|
||||
" if (count == 0) return -1;\n"
|
||||
" for (int device = 0; device < count; ++device)\n"
|
||||
" {\n"
|
||||
" cudaDeviceProp prop;\n"
|
||||
" if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n"
|
||||
" std::printf(\"%d.%d \", prop.major, prop.minor);\n"
|
||||
" }\n"
|
||||
" return 0;\n"
|
||||
"}\n")
|
||||
if(MSVC)
|
||||
#find vcvarsall.bat and run it building msvc environment
|
||||
get_filename_component(MY_COMPILER_DIR ${CMAKE_CXX_COMPILER} DIRECTORY)
|
||||
find_file(MY_VCVARSALL_BAT vcvarsall.bat "${MY_COMPILER_DIR}/.." "${MY_COMPILER_DIR}/../..")
|
||||
execute_process(COMMAND ${MY_VCVARSALL_BAT} && ${CUDA_NVCC_EXECUTABLE} -arch sm_30 --run ${__cufile}
|
||||
WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
|
||||
RESULT_VARIABLE __nvcc_res OUTPUT_VARIABLE __nvcc_out
|
||||
ERROR_QUIET
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
else()
|
||||
if(CUDA_LIBRARY_PATH)
|
||||
set(CUDA_LINK_LIBRARY_PATH "-L${CUDA_LIBRARY_PATH}")
|
||||
endif()
|
||||
execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} -arch sm_30 --run ${__cufile} ${CUDA_LINK_LIBRARY_PATH}
|
||||
WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
|
||||
RESULT_VARIABLE __nvcc_res OUTPUT_VARIABLE __nvcc_out
|
||||
ERROR_QUIET
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
endif()
|
||||
if(__nvcc_res EQUAL 0)
|
||||
# nvcc outputs text containing line breaks when building with MSVC.
|
||||
# The line below prevents CMake from inserting a variable with line
|
||||
# breaks in the cache
|
||||
string(REGEX MATCH "([1-9].[0-9])" __nvcc_out "${__nvcc_out}")
|
||||
string(REPLACE "2.1" "2.1(2.0)" __nvcc_out "${__nvcc_out}")
|
||||
set(CUDA_gpu_detect_output ${__nvcc_out} CACHE INTERNAL "Returned GPU architetures from xgboost_detect_gpus tool" FORCE)
|
||||
else()
|
||||
message(WARNING "Running GPU detection script with nvcc failed: ${__nvcc_out}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT CUDA_gpu_detect_output)
|
||||
message(WARNING "Automatic GPU detection failed. Building for all known architectures (${xgboost_known_gpu_archs}).")
|
||||
set(${out_variable} ${xgboost_known_gpu_archs} PARENT_SCOPE)
|
||||
else()
|
||||
set(${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
|
||||
################################################################################################
|
||||
# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME
|
||||
# Usage:
|
||||
# xgboost_select_nvcc_arch_flags(out_variable)
|
||||
function(xgboost_select_nvcc_arch_flags out_variable)
|
||||
# List of arch names
|
||||
set(__archs_names "Fermi" "Kepler" "Maxwell" "Pascal" "All" "Manual")
|
||||
set(__archs_name_default "All")
|
||||
if(NOT CMAKE_CROSSCOMPILING)
|
||||
list(APPEND __archs_names "Auto")
|
||||
set(__archs_name_default "Auto")
|
||||
endif()
|
||||
|
||||
# set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui)
|
||||
set(CUDA_ARCH_NAME ${__archs_name_default} CACHE STRING "Select target NVIDIA GPU achitecture.")
|
||||
set_property( CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${__archs_names} )
|
||||
mark_as_advanced(CUDA_ARCH_NAME)
|
||||
|
||||
# verify CUDA_ARCH_NAME value
|
||||
if(NOT ";${__archs_names};" MATCHES ";${CUDA_ARCH_NAME};")
|
||||
string(REPLACE ";" ", " __archs_names "${__archs_names}")
|
||||
message(FATAL_ERROR "Only ${__archs_names} architeture names are supported.")
|
||||
endif()
|
||||
|
||||
if(${CUDA_ARCH_NAME} STREQUAL "Manual")
|
||||
set(CUDA_ARCH_BIN ${xgboost_known_gpu_archs} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")
|
||||
set(CUDA_ARCH_PTX "50" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")
|
||||
mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX)
|
||||
else()
|
||||
unset(CUDA_ARCH_BIN CACHE)
|
||||
unset(CUDA_ARCH_PTX CACHE)
|
||||
endif()
|
||||
|
||||
if(${CUDA_ARCH_NAME} STREQUAL "Fermi")
|
||||
set(__cuda_arch_bin "20 21(20)")
|
||||
elseif(${CUDA_ARCH_NAME} STREQUAL "Kepler")
|
||||
set(__cuda_arch_bin "30 35")
|
||||
elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
|
||||
set(__cuda_arch_bin "50")
|
||||
elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal")
|
||||
set(__cuda_arch_bin "60 61")
|
||||
elseif(${CUDA_ARCH_NAME} STREQUAL "All")
|
||||
set(__cuda_arch_bin ${xgboost_known_gpu_archs})
|
||||
elseif(${CUDA_ARCH_NAME} STREQUAL "Auto")
|
||||
xgboost_detect_installed_gpus(__cuda_arch_bin)
|
||||
else() # (${CUDA_ARCH_NAME} STREQUAL "Manual")
|
||||
set(__cuda_arch_bin ${CUDA_ARCH_BIN})
|
||||
endif()
|
||||
|
||||
# remove dots and convert to lists
|
||||
string(REGEX REPLACE "\\." "" __cuda_arch_bin "${__cuda_arch_bin}")
|
||||
string(REGEX REPLACE "\\." "" __cuda_arch_ptx "${CUDA_ARCH_PTX}")
|
||||
string(REGEX MATCHALL "[0-9()]+" __cuda_arch_bin "${__cuda_arch_bin}")
|
||||
string(REGEX MATCHALL "[0-9]+" __cuda_arch_ptx "${__cuda_arch_ptx}")
|
||||
xgboost_list_unique(__cuda_arch_bin __cuda_arch_ptx)
|
||||
|
||||
set(__nvcc_flags "")
|
||||
set(__nvcc_archs_readable "")
|
||||
|
||||
# Tell NVCC to add binaries for the specified GPUs
|
||||
foreach(__arch ${__cuda_arch_bin})
|
||||
if(__arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
|
||||
# User explicitly specified PTX for the concrete BIN
|
||||
list(APPEND __nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
|
||||
list(APPEND __nvcc_archs_readable sm_${CMAKE_MATCH_1})
|
||||
else()
|
||||
# User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
|
||||
list(APPEND __nvcc_flags -gencode arch=compute_${__arch},code=sm_${__arch})
|
||||
list(APPEND __nvcc_archs_readable sm_${__arch})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# Tell NVCC to add PTX intermediate code for the specified architectures
|
||||
foreach(__arch ${__cuda_arch_ptx})
|
||||
list(APPEND __nvcc_flags -gencode arch=compute_${__arch},code=compute_${__arch})
|
||||
list(APPEND __nvcc_archs_readable compute_${__arch})
|
||||
endforeach()
|
||||
|
||||
string(REPLACE ";" " " __nvcc_archs_readable "${__nvcc_archs_readable}")
|
||||
set(${out_variable} ${__nvcc_flags} PARENT_SCOPE)
|
||||
set(${out_variable}_readable ${__nvcc_archs_readable} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
################################################################################################
|
||||
# Short command for cuda comnpilation
|
||||
# Usage:
|
||||
# xgboost_cuda_compile(<objlist_variable> <cuda_files>)
|
||||
macro(xgboost_cuda_compile objlist_variable)
|
||||
foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
|
||||
set(${var}_backup_in_cuda_compile_ "${${var}}")
|
||||
|
||||
# we remove /EHa as it generates warnings under windows
|
||||
string(REPLACE "/EHa" "" ${var} "${${var}}")
|
||||
|
||||
endforeach()
|
||||
if(UNIX OR APPLE)
|
||||
list(APPEND CUDA_NVCC_FLAGS -Xcompiler -fPIC)
|
||||
endif()
|
||||
|
||||
if(APPLE)
|
||||
list(APPEND CUDA_NVCC_FLAGS -Xcompiler -Wno-unused-function)
|
||||
endif()
|
||||
|
||||
set(CUDA_NVCC_FLAGS_DEBUG "${CUDA_NVCC_FLAGS_DEBUG} -G -lineinfo")
|
||||
|
||||
if(MSVC)
|
||||
# disable noisy warnings:
|
||||
# 4819: The file contains a character that cannot be represented in the current code page (number).
|
||||
list(APPEND CUDA_NVCC_FLAGS -Xcompiler "/wd4819")
|
||||
foreach(flag_var
|
||||
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
|
||||
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
|
||||
if(${flag_var} MATCHES "/MD")
|
||||
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
|
||||
endif(${flag_var} MATCHES "/MD")
|
||||
endforeach(flag_var)
|
||||
endif()
|
||||
|
||||
# If the build system is a container, make sure the nvcc intermediate files
|
||||
# go into the build output area rather than in /tmp, which may run out of space
|
||||
if(IS_CONTAINER_BUILD)
|
||||
set(CUDA_NVCC_INTERMEDIATE_DIR "${CMAKE_CURRENT_BINARY_DIR}")
|
||||
message(STATUS "Container build enabled, so nvcc intermediate files in: ${CUDA_NVCC_INTERMEDIATE_DIR}")
|
||||
list(APPEND CUDA_NVCC_FLAGS "--keep --keep-dir ${CUDA_NVCC_INTERMEDIATE_DIR}")
|
||||
endif()
|
||||
|
||||
cuda_compile(cuda_objcs ${ARGN})
|
||||
|
||||
foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
|
||||
set(${var} "${${var}_backup_in_cuda_compile_}")
|
||||
unset(${var}_backup_in_cuda_compile_)
|
||||
endforeach()
|
||||
|
||||
set(${objlist_variable} ${cuda_objcs})
|
||||
endmacro()
|
||||
|
||||
|
||||
################################################################################################
|
||||
### Non macro section
|
||||
################################################################################################
|
||||
|
||||
# Try to prime CUDA_TOOLKIT_ROOT_DIR by looking for libcudart.so
|
||||
if(NOT CUDA_TOOLKIT_ROOT_DIR)
|
||||
find_library(CUDA_LIBRARY_PATH libcudart.so PATHS ENV LD_LIBRARY_PATH PATH_SUFFIXES lib lib64)
|
||||
if(CUDA_LIBRARY_PATH)
|
||||
get_filename_component(CUDA_LIBRARY_PATH ${CUDA_LIBRARY_PATH} DIRECTORY)
|
||||
set(CUDA_TOOLKIT_ROOT_DIR "${CUDA_LIBRARY_PATH}/..")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
find_package(CUDA 5.5 QUIET REQUIRED)
|
||||
find_cuda_helper_libs(curand) # cmake 2.8.7 compartibility which doesn't search for curand
|
||||
|
||||
if(NOT CUDA_FOUND)
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(HAVE_CUDA TRUE)
|
||||
message(STATUS "CUDA detected: " ${CUDA_VERSION})
|
||||
include_directories(SYSTEM ${CUDA_INCLUDE_DIRS})
|
||||
list(APPEND xgboost_LINKER_LIBS ${CUDA_CUDART_LIBRARY}
|
||||
${CUDA_curand_LIBRARY} ${CUDA_CUBLAS_LIBRARIES})
|
||||
|
||||
# Known NVIDIA GPU achitectures xgboost can be compiled for.
|
||||
# This list will be used for CUDA_ARCH_NAME = All option
|
||||
if(CUDA_ARCH_ALL)
|
||||
set(xgboost_known_gpu_archs "${CUDA_ARCH_ALL}")
|
||||
else()
|
||||
if(${CUDA_VERSION} GREATER 7.5)
|
||||
set(xgboost_known_gpu_archs "30 35 50 52 60 61")
|
||||
else()
|
||||
set(xgboost_known_gpu_archs "30 35 50 52")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# cudnn detection
|
||||
if(USE_CUDNN)
|
||||
detect_cuDNN()
|
||||
if(HAVE_CUDNN)
|
||||
add_definitions(-DUSE_CUDNN)
|
||||
include_directories(SYSTEM ${CUDNN_INCLUDE})
|
||||
list(APPEND xgboost_LINKER_LIBS ${CUDNN_LIBRARY})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# setting nvcc arch flags
|
||||
xgboost_select_nvcc_arch_flags(NVCC_FLAGS_EXTRA)
|
||||
list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA})
|
||||
message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA_readable}")
|
||||
|
||||
# Boost 1.55 workaround, see https://svn.boost.org/trac/boost/ticket/9392 or
|
||||
# https://github.com/ComputationalRadiationPhysics/picongpu/blob/master/src/picongpu/CMakeLists.txt
|
||||
if(Boost_VERSION EQUAL 105500)
|
||||
message(STATUS "Cuda + Boost 1.55: Applying noinline work around")
|
||||
# avoid warning for CMake >= 2.8.12
|
||||
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} \"-DBOOST_NOINLINE=__attribute__((noinline))\" ")
|
||||
endif()
|
||||
|
||||
# disable some nvcc diagnostic that apears in boost, glog, glags, opencv, etc.
|
||||
foreach(diag cc_clobber_ignored integer_sign_change useless_using_declaration set_but_not_used)
|
||||
list(APPEND CUDA_NVCC_FLAGS -Xcudafe --diag_suppress=${diag})
|
||||
endforeach()
|
||||
|
||||
# setting default testing device
|
||||
if(NOT CUDA_TEST_DEVICE)
|
||||
set(CUDA_TEST_DEVICE -1)
|
||||
endif()
|
||||
|
||||
mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD)
|
||||
mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION)
|
||||
|
||||
# Handle clang/libc++ issue
|
||||
if(APPLE)
|
||||
xgboost_detect_darwin_version(OSX_VERSION)
|
||||
|
||||
# OSX 10.9 and higher uses clang/libc++ by default which is incompartible with old CUDA toolkits
|
||||
if(OSX_VERSION VERSION_GREATER 10.8)
|
||||
# enabled by default if and only if CUDA version is less than 7.0
|
||||
xgboost_option(USE_libstdcpp "Use libstdc++ instead of libc++" (CUDA_VERSION VERSION_LESS 7.0))
|
||||
endif()
|
||||
endif()
|
||||
@ -1,398 +0,0 @@
|
||||
################################################################################################
|
||||
# Command alias for debugging messages
|
||||
# Usage:
|
||||
# dmsg(<message>)
|
||||
function(dmsg)
|
||||
message(STATUS ${ARGN})
|
||||
endfunction()
|
||||
|
||||
################################################################################################
|
||||
# Removes duplicates from list(s)
|
||||
# Usage:
|
||||
# xgboost_list_unique(<list_variable> [<list_variable>] [...])
|
||||
macro(xgboost_list_unique)
|
||||
foreach(__lst ${ARGN})
|
||||
if(${__lst})
|
||||
list(REMOVE_DUPLICATES ${__lst})
|
||||
endif()
|
||||
endforeach()
|
||||
endmacro()
|
||||
|
||||
################################################################################################
|
||||
# Clears variables from list
|
||||
# Usage:
|
||||
# xgboost_clear_vars(<variables_list>)
|
||||
macro(xgboost_clear_vars)
|
||||
foreach(_var ${ARGN})
|
||||
unset(${_var})
|
||||
endforeach()
|
||||
endmacro()
|
||||
|
||||
################################################################################################
|
||||
# Removes duplicates from string
|
||||
# Usage:
|
||||
# xgboost_string_unique(<string_variable>)
|
||||
function(xgboost_string_unique __string)
|
||||
if(${__string})
|
||||
set(__list ${${__string}})
|
||||
separate_arguments(__list)
|
||||
list(REMOVE_DUPLICATES __list)
|
||||
foreach(__e ${__list})
|
||||
set(__str "${__str} ${__e}")
|
||||
endforeach()
|
||||
set(${__string} ${__str} PARENT_SCOPE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
################################################################################################
|
||||
# Prints list element per line
|
||||
# Usage:
|
||||
# xgboost_print_list(<list>)
|
||||
function(xgboost_print_list)
|
||||
foreach(e ${ARGN})
|
||||
message(STATUS ${e})
|
||||
endforeach()
|
||||
endfunction()
|
||||
|
||||
################################################################################################
|
||||
# Function merging lists of compiler flags to single string.
|
||||
# Usage:
|
||||
# xgboost_merge_flag_lists(out_variable <list1> [<list2>] [<list3>] ...)
|
||||
function(xgboost_merge_flag_lists out_var)
|
||||
set(__result "")
|
||||
foreach(__list ${ARGN})
|
||||
foreach(__flag ${${__list}})
|
||||
string(STRIP ${__flag} __flag)
|
||||
set(__result "${__result} ${__flag}")
|
||||
endforeach()
|
||||
endforeach()
|
||||
string(STRIP ${__result} __result)
|
||||
set(${out_var} ${__result} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
################################################################################################
|
||||
# Converts all paths in list to absolute
|
||||
# Usage:
|
||||
# xgboost_convert_absolute_paths(<list_variable>)
|
||||
function(xgboost_convert_absolute_paths variable)
|
||||
set(__dlist "")
|
||||
foreach(__s ${${variable}})
|
||||
get_filename_component(__abspath ${__s} ABSOLUTE)
|
||||
list(APPEND __list ${__abspath})
|
||||
endforeach()
|
||||
set(${variable} ${__list} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
################################################################################################
|
||||
# Reads set of version defines from the header file
|
||||
# Usage:
|
||||
# xgboost_parse_header(<file> <define1> <define2> <define3> ..)
|
||||
macro(xgboost_parse_header FILENAME FILE_VAR)
|
||||
set(vars_regex "")
|
||||
set(__parnet_scope OFF)
|
||||
set(__add_cache OFF)
|
||||
foreach(name ${ARGN})
|
||||
if("${name}" STREQUAL "PARENT_SCOPE")
|
||||
set(__parnet_scope ON)
|
||||
elseif("${name}" STREQUAL "CACHE")
|
||||
set(__add_cache ON)
|
||||
elseif(vars_regex)
|
||||
set(vars_regex "${vars_regex}|${name}")
|
||||
else()
|
||||
set(vars_regex "${name}")
|
||||
endif()
|
||||
endforeach()
|
||||
if(EXISTS "${FILENAME}")
|
||||
file(STRINGS "${FILENAME}" ${FILE_VAR} REGEX "#define[ \t]+(${vars_regex})[ \t]+[0-9]+" )
|
||||
else()
|
||||
unset(${FILE_VAR})
|
||||
endif()
|
||||
foreach(name ${ARGN})
|
||||
if(NOT "${name}" STREQUAL "PARENT_SCOPE" AND NOT "${name}" STREQUAL "CACHE")
|
||||
if(${FILE_VAR})
|
||||
if(${FILE_VAR} MATCHES ".+[ \t]${name}[ \t]+([0-9]+).*")
|
||||
string(REGEX REPLACE ".+[ \t]${name}[ \t]+([0-9]+).*" "\\1" ${name} "${${FILE_VAR}}")
|
||||
else()
|
||||
set(${name} "")
|
||||
endif()
|
||||
if(__add_cache)
|
||||
set(${name} ${${name}} CACHE INTERNAL "${name} parsed from ${FILENAME}" FORCE)
|
||||
elseif(__parnet_scope)
|
||||
set(${name} "${${name}}" PARENT_SCOPE)
|
||||
endif()
|
||||
else()
|
||||
unset(${name} CACHE)
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
endmacro()
|
||||
|
||||
################################################################################################
|
||||
# Reads single version define from the header file and parses it
|
||||
# Usage:
|
||||
# xgboost_parse_header_single_define(<library_name> <file> <define_name>)
|
||||
function(xgboost_parse_header_single_define LIBNAME HDR_PATH VARNAME)
|
||||
set(${LIBNAME}_H "")
|
||||
if(EXISTS "${HDR_PATH}")
|
||||
file(STRINGS "${HDR_PATH}" ${LIBNAME}_H REGEX "^#define[ \t]+${VARNAME}[ \t]+\"[^\"]*\".*$" LIMIT_COUNT 1)
|
||||
endif()
|
||||
|
||||
if(${LIBNAME}_H)
|
||||
string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MAJOR "${${LIBNAME}_H}")
|
||||
string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MINOR "${${LIBNAME}_H}")
|
||||
string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_PATCH "${${LIBNAME}_H}")
|
||||
set(${LIBNAME}_VERSION_MAJOR ${${LIBNAME}_VERSION_MAJOR} ${ARGN} PARENT_SCOPE)
|
||||
set(${LIBNAME}_VERSION_MINOR ${${LIBNAME}_VERSION_MINOR} ${ARGN} PARENT_SCOPE)
|
||||
set(${LIBNAME}_VERSION_PATCH ${${LIBNAME}_VERSION_PATCH} ${ARGN} PARENT_SCOPE)
|
||||
set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_MAJOR}.${${LIBNAME}_VERSION_MINOR}.${${LIBNAME}_VERSION_PATCH}" PARENT_SCOPE)
|
||||
|
||||
# append a TWEAK version if it exists:
|
||||
set(${LIBNAME}_VERSION_TWEAK "")
|
||||
if("${${LIBNAME}_H}" MATCHES "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.[0-9]+\\.[0-9]+\\.([0-9]+).*$")
|
||||
set(${LIBNAME}_VERSION_TWEAK "${CMAKE_MATCH_1}" ${ARGN} PARENT_SCOPE)
|
||||
endif()
|
||||
if(${LIBNAME}_VERSION_TWEAK)
|
||||
set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_STRING}.${${LIBNAME}_VERSION_TWEAK}" ${ARGN} PARENT_SCOPE)
|
||||
else()
|
||||
set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_STRING}" ${ARGN} PARENT_SCOPE)
|
||||
endif()
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
########################################################################################################
|
||||
# An option that the user can select. Can accept condition to control when option is available for user.
|
||||
# Usage:
|
||||
# xgboost_option(<option_variable> "doc string" <initial value or boolean expression> [IF <condition>])
|
||||
function(xgboost_option variable description value)
|
||||
set(__value ${value})
|
||||
set(__condition "")
|
||||
set(__varname "__value")
|
||||
foreach(arg ${ARGN})
|
||||
if(arg STREQUAL "IF" OR arg STREQUAL "if")
|
||||
set(__varname "__condition")
|
||||
else()
|
||||
list(APPEND ${__varname} ${arg})
|
||||
endif()
|
||||
endforeach()
|
||||
unset(__varname)
|
||||
if("${__condition}" STREQUAL "")
|
||||
set(__condition 2 GREATER 1)
|
||||
endif()
|
||||
|
||||
if(${__condition})
|
||||
if("${__value}" MATCHES ";")
|
||||
if(${__value})
|
||||
option(${variable} "${description}" ON)
|
||||
else()
|
||||
option(${variable} "${description}" OFF)
|
||||
endif()
|
||||
elseif(DEFINED ${__value})
|
||||
if(${__value})
|
||||
option(${variable} "${description}" ON)
|
||||
else()
|
||||
option(${variable} "${description}" OFF)
|
||||
endif()
|
||||
else()
|
||||
option(${variable} "${description}" ${__value})
|
||||
endif()
|
||||
else()
|
||||
unset(${variable} CACHE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
################################################################################################
|
||||
# Utility macro for comparing two lists. Used for CMake debugging purposes
|
||||
# Usage:
|
||||
# xgboost_compare_lists(<list_variable> <list2_variable> [description])
|
||||
function(xgboost_compare_lists list1 list2 desc)
|
||||
set(__list1 ${${list1}})
|
||||
set(__list2 ${${list2}})
|
||||
list(SORT __list1)
|
||||
list(SORT __list2)
|
||||
list(LENGTH __list1 __len1)
|
||||
list(LENGTH __list2 __len2)
|
||||
|
||||
if(NOT ${__len1} EQUAL ${__len2})
|
||||
message(FATAL_ERROR "Lists are not equal. ${__len1} != ${__len2}. ${desc}")
|
||||
endif()
|
||||
|
||||
foreach(__i RANGE 1 ${__len1})
|
||||
math(EXPR __index "${__i}- 1")
|
||||
list(GET __list1 ${__index} __item1)
|
||||
list(GET __list2 ${__index} __item2)
|
||||
if(NOT ${__item1} STREQUAL ${__item2})
|
||||
message(FATAL_ERROR "Lists are not equal. Differ at element ${__index}. ${desc}")
|
||||
endif()
|
||||
endforeach()
|
||||
endfunction()
|
||||
|
||||
################################################################################################
|
||||
# Command for disabling warnings for different platforms (see below for gcc and VisualStudio)
|
||||
# Usage:
|
||||
# xgboost_warnings_disable(<CMAKE_[C|CXX]_FLAGS[_CONFIGURATION]> -Wshadow /wd4996 ..,)
|
||||
macro(xgboost_warnings_disable)
|
||||
set(_flag_vars "")
|
||||
set(_msvc_warnings "")
|
||||
set(_gxx_warnings "")
|
||||
|
||||
foreach(arg ${ARGN})
|
||||
if(arg MATCHES "^CMAKE_")
|
||||
list(APPEND _flag_vars ${arg})
|
||||
elseif(arg MATCHES "^/wd")
|
||||
list(APPEND _msvc_warnings ${arg})
|
||||
elseif(arg MATCHES "^-W")
|
||||
list(APPEND _gxx_warnings ${arg})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
if(NOT _flag_vars)
|
||||
set(_flag_vars CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
endif()
|
||||
|
||||
if(MSVC AND _msvc_warnings)
|
||||
foreach(var ${_flag_vars})
|
||||
foreach(warning ${_msvc_warnings})
|
||||
set(${var} "${${var}} ${warning}")
|
||||
endforeach()
|
||||
endforeach()
|
||||
elseif((CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX) AND _gxx_warnings)
|
||||
foreach(var ${_flag_vars})
|
||||
foreach(warning ${_gxx_warnings})
|
||||
if(NOT warning MATCHES "^-Wno-")
|
||||
string(REPLACE "${warning}" "" ${var} "${${var}}")
|
||||
string(REPLACE "-W" "-Wno-" warning "${warning}")
|
||||
endif()
|
||||
set(${var} "${${var}} ${warning}")
|
||||
endforeach()
|
||||
endforeach()
|
||||
endif()
|
||||
xgboost_clear_vars(_flag_vars _msvc_warnings _gxx_warnings)
|
||||
endmacro()
|
||||
|
||||
################################################################################################
|
||||
# Helper function get current definitions
|
||||
# Usage:
|
||||
# xgboost_get_current_definitions(<definitions_variable>)
|
||||
function(xgboost_get_current_definitions definitions_var)
|
||||
get_property(current_definitions DIRECTORY PROPERTY COMPILE_DEFINITIONS)
|
||||
set(result "")
|
||||
|
||||
foreach(d ${current_definitions})
|
||||
list(APPEND result -D${d})
|
||||
endforeach()
|
||||
|
||||
xgboost_list_unique(result)
|
||||
set(${definitions_var} ${result} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
################################################################################################
|
||||
# Helper function get current includes/definitions
|
||||
# Usage:
|
||||
# xgboost_get_current_cflags(<cflagslist_variable>)
|
||||
function(xgboost_get_current_cflags cflags_var)
|
||||
get_property(current_includes DIRECTORY PROPERTY INCLUDE_DIRECTORIES)
|
||||
xgboost_convert_absolute_paths(current_includes)
|
||||
xgboost_get_current_definitions(cflags)
|
||||
|
||||
foreach(i ${current_includes})
|
||||
list(APPEND cflags "-I${i}")
|
||||
endforeach()
|
||||
|
||||
xgboost_list_unique(cflags)
|
||||
set(${cflags_var} ${cflags} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
################################################################################################
|
||||
# Helper function to parse current linker libs into link directories, libflags and osx frameworks
|
||||
# Usage:
|
||||
# xgboost_parse_linker_libs(<xgboost_LINKER_LIBS_var> <directories_var> <libflags_var> <frameworks_var>)
|
||||
function(xgboost_parse_linker_libs xgboost_LINKER_LIBS_variable folders_var flags_var frameworks_var)
|
||||
|
||||
set(__unspec "")
|
||||
set(__debug "")
|
||||
set(__optimized "")
|
||||
set(__framework "")
|
||||
set(__varname "__unspec")
|
||||
|
||||
# split libs into debug, optimized, unspecified and frameworks
|
||||
foreach(list_elem ${${xgboost_LINKER_LIBS_variable}})
|
||||
if(list_elem STREQUAL "debug")
|
||||
set(__varname "__debug")
|
||||
elseif(list_elem STREQUAL "optimized")
|
||||
set(__varname "__optimized")
|
||||
elseif(list_elem MATCHES "^-framework[ \t]+([^ \t].*)")
|
||||
list(APPEND __framework -framework ${CMAKE_MATCH_1})
|
||||
else()
|
||||
list(APPEND ${__varname} ${list_elem})
|
||||
set(__varname "__unspec")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# attach debug or optimized libs to unspecified according to current configuration
|
||||
if(CMAKE_BUILD_TYPE MATCHES "Debug")
|
||||
set(__libs ${__unspec} ${__debug})
|
||||
else()
|
||||
set(__libs ${__unspec} ${__optimized})
|
||||
endif()
|
||||
|
||||
set(libflags "")
|
||||
set(folders "")
|
||||
|
||||
# convert linker libraries list to link flags
|
||||
foreach(lib ${__libs})
|
||||
if(TARGET ${lib})
|
||||
list(APPEND folders $<TARGET_LINKER_FILE_DIR:${lib}>)
|
||||
list(APPEND libflags -l${lib})
|
||||
elseif(lib MATCHES "^-l.*")
|
||||
list(APPEND libflags ${lib})
|
||||
elseif(IS_ABSOLUTE ${lib})
|
||||
get_filename_component(name_we ${lib} NAME_WE)
|
||||
get_filename_component(folder ${lib} PATH)
|
||||
|
||||
string(REGEX MATCH "^lib(.*)" __match ${name_we})
|
||||
list(APPEND libflags -l${CMAKE_MATCH_1})
|
||||
list(APPEND folders ${folder})
|
||||
else()
|
||||
message(FATAL_ERROR "Logic error. Need to update cmake script")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
xgboost_list_unique(libflags folders)
|
||||
|
||||
set(${folders_var} ${folders} PARENT_SCOPE)
|
||||
set(${flags_var} ${libflags} PARENT_SCOPE)
|
||||
set(${frameworks_var} ${__framework} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
################################################################################################
|
||||
# Helper function to detect Darwin version, i.e. 10.8, 10.9, 10.10, ....
|
||||
# Usage:
|
||||
# xgboost_detect_darwin_version(<version_variable>)
|
||||
function(xgboost_detect_darwin_version output_var)
|
||||
if(APPLE)
|
||||
execute_process(COMMAND /usr/bin/sw_vers -productVersion
|
||||
RESULT_VARIABLE __sw_vers OUTPUT_VARIABLE __sw_vers_out
|
||||
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
set(${output_var} ${__sw_vers_out} PARENT_SCOPE)
|
||||
else()
|
||||
set(${output_var} "" PARENT_SCOPE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
################################################################################################
|
||||
# Convenient command to setup source group for IDEs that support this feature (VS, XCode)
|
||||
# Usage:
|
||||
# caffe_source_group(<group> GLOB[_RECURSE] <globbing_expression>)
|
||||
function(xgboost_source_group group)
|
||||
cmake_parse_arguments(CAFFE_SOURCE_GROUP "" "" "GLOB;GLOB_RECURSE" ${ARGN})
|
||||
if(CAFFE_SOURCE_GROUP_GLOB)
|
||||
file(GLOB srcs1 ${CAFFE_SOURCE_GROUP_GLOB})
|
||||
source_group(${group} FILES ${srcs1})
|
||||
endif()
|
||||
|
||||
if(CAFFE_SOURCE_GROUP_GLOB_RECURSE)
|
||||
file(GLOB_RECURSE srcs2 ${CAFFE_SOURCE_GROUP_GLOB_RECURSE})
|
||||
source_group(${group} FILES ${srcs2})
|
||||
endif()
|
||||
endfunction()
|
||||
@ -24,8 +24,7 @@ param['eval_metric'] = 'auc'
|
||||
param['max_depth'] = 5
|
||||
param['eta'] = 0.3
|
||||
param['silent'] = 0
|
||||
param['updater'] = 'grow_gpu'
|
||||
#param['updater'] = 'grow_colmaker'
|
||||
param['tree_method'] = 'gpu_exact'
|
||||
|
||||
num_round = 20
|
||||
|
||||
|
||||
@ -1,16 +1,16 @@
|
||||
# CUDA Accelerated Tree Construction Algorithms
|
||||
This plugin adds GPU accelerated tree construction algorithms to XGBoost.
|
||||
## Usage
|
||||
Specify the 'updater' parameter as one of the following algorithms.
|
||||
Specify the 'tree_method' parameter as one of the following algorithms.
|
||||
|
||||
### Algorithms
|
||||
| updater | Description |
|
||||
| tree_method | Description |
|
||||
| --- | --- |
|
||||
grow_gpu | The standard XGBoost tree construction algorithm. Performs exact search for splits. Slower and uses considerably more memory than 'grow_gpu_hist' |
|
||||
grow_gpu_hist | Equivalent to the XGBoost fast histogram algorithm. Faster and uses considerably less memory. Splits may be less accurate. |
|
||||
gpu_exact | The standard XGBoost tree construction algorithm. Performs exact search for splits. Slower and uses considerably more memory than 'gpu_hist' |
|
||||
gpu_hist | Equivalent to the XGBoost fast histogram algorithm. Faster and uses considerably less memory. Splits may be less accurate. |
|
||||
|
||||
### Supported parameters
|
||||
| parameter | grow_gpu | grow_gpu_hist |
|
||||
| parameter | gpu_exact | gpu_hist |
|
||||
| --- | --- | --- |
|
||||
subsample | ✔ | ✔ |
|
||||
colsample_bytree | ✔ | ✔|
|
||||
@ -29,7 +29,7 @@ Python example:
|
||||
```python
|
||||
param['gpu_id'] = 1
|
||||
param['max_bin'] = 16
|
||||
param['updater'] = 'grow_gpu_hist'
|
||||
param['tree_method'] = 'gpu_hist'
|
||||
```
|
||||
## Benchmarks
|
||||
To run benchmarks on synthetic data for binary classification:
|
||||
@ -39,18 +39,18 @@ $ python benchmark/benchmark.py
|
||||
|
||||
Training time time on 1000000 rows x 50 columns with 500 boosting iterations on i7-6700K CPU @ 4.00GHz and Pascal Titan X.
|
||||
|
||||
| Updater | Time (s) |
|
||||
| tree_method | Time (s) |
|
||||
| --- | --- |
|
||||
| grow_gpu_hist | 11.09 |
|
||||
| grow_fast_histmaker (histogram XGBoost - CPU) | 41.75 |
|
||||
| grow_gpu | 193.90 |
|
||||
| grow_colmaker (standard XGBoost - CPU) | 720.12 |
|
||||
| gpu_hist | 11.09 |
|
||||
| hist (histogram XGBoost - CPU) | 41.75 |
|
||||
| gpu_exact | 193.90 |
|
||||
| exact (standard XGBoost - CPU) | 720.12 |
|
||||
|
||||
|
||||
[See here](http://dmlc.ml/2016/12/14/GPU-accelerated-xgboost.html) for additional performance benchmarks of the 'grow_gpu' updater.
|
||||
[See here](http://dmlc.ml/2016/12/14/GPU-accelerated-xgboost.html) for additional performance benchmarks of the 'gpu_exact' tree_method.
|
||||
|
||||
## Test
|
||||
To run tests:
|
||||
To run tests:Will
|
||||
```bash
|
||||
$ python -m nose test/python/
|
||||
```
|
||||
@ -122,6 +122,13 @@ $ make PLUGIN_UPDATER_GPU=ON GTEST_PATH=${CACHE_PREFIX} test
|
||||
```
|
||||
|
||||
## Changelog
|
||||
##### 2017/6/26
|
||||
|
||||
* Change API to use tree_method parameter
|
||||
* Increase required cmake version to 3.5
|
||||
* Add compute arch 3.5 to default archs
|
||||
* Set default n_gpus to 1
|
||||
|
||||
##### 2017/6/5
|
||||
|
||||
* Multi-GPU support for histogram method using NVIDIA NCCL.
|
||||
|
||||
@ -14,19 +14,18 @@ def run_benchmark(args, gpu_algorithm, cpu_algorithm):
|
||||
dtrain = xgb.DMatrix(X, y)
|
||||
|
||||
param = {'objective': 'binary:logistic',
|
||||
'tree_method': 'exact',
|
||||
'max_depth': 6,
|
||||
'silent': 1,
|
||||
'eval_metric': 'auc'}
|
||||
|
||||
param['updater'] = gpu_algorithm
|
||||
print("Training with '%s'" % param['updater'])
|
||||
param['tree_method'] = gpu_algorithm
|
||||
print("Training with '%s'" % param['tree_method'])
|
||||
tmp = time.time()
|
||||
xgb.train(param, dtrain, args.iterations)
|
||||
print ("Time: %s seconds" % (str(time.time() - tmp)))
|
||||
|
||||
param['updater'] = cpu_algorithm
|
||||
print("Training with '%s'" % param['updater'])
|
||||
param['tree_method'] = cpu_algorithm
|
||||
print("Training with '%s'" % param['tree_method'])
|
||||
tmp = time.time()
|
||||
xgb.train(param, dtrain, args.iterations)
|
||||
print ("Time: %s seconds" % (str(time.time() - tmp)))
|
||||
@ -34,17 +33,17 @@ def run_benchmark(args, gpu_algorithm, cpu_algorithm):
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--algorithm', choices=['all', 'grow_gpu', 'grow_gpu_hist'], required=True)
|
||||
parser.add_argument('--algorithm', choices=['all', 'gpu_exact', 'gpu_hist'], default='all')
|
||||
parser.add_argument('--rows',type=int,default=1000000)
|
||||
parser.add_argument('--columns',type=int,default=50)
|
||||
parser.add_argument('--iterations',type=int,default=500)
|
||||
args = parser.parse_args()
|
||||
|
||||
if 'grow_gpu_hist' in args.algorithm:
|
||||
run_benchmark(args, args.algorithm, 'grow_fast_histmaker')
|
||||
if 'grow_gpu' in args.algorithm:
|
||||
run_benchmark(args, args.algorithm, 'grow_colmaker')
|
||||
if 'gpu_hist' in args.algorithm:
|
||||
run_benchmark(args, args.algorithm, 'hist')
|
||||
if 'gpu_exact' in args.algorithm:
|
||||
run_benchmark(args, args.algorithm, 'exact')
|
||||
if 'all' in args.algorithm:
|
||||
run_benchmark(args, 'grow_gpu', 'grow_colmaker')
|
||||
run_benchmark(args, 'grow_gpu_hist', 'grow_fast_histmaker')
|
||||
run_benchmark(args, 'gpu_exact', 'exact')
|
||||
run_benchmark(args, 'gpu_hist', 'hist')
|
||||
|
||||
|
||||
@ -35,7 +35,7 @@ class TestGPU(unittest.TestCase):
|
||||
'objective': 'binary:logistic',
|
||||
'eval_metric': 'auc'}
|
||||
ag_param2 = {'max_depth': 2,
|
||||
'updater': 'grow_gpu',
|
||||
'tree_method': 'gpu_exact',
|
||||
'eta': 1,
|
||||
'silent': 1,
|
||||
'objective': 'binary:logistic',
|
||||
@ -59,7 +59,7 @@ class TestGPU(unittest.TestCase):
|
||||
dtest = xgb.DMatrix(X_test, y_test)
|
||||
|
||||
param = {'objective': 'binary:logistic',
|
||||
'updater': 'grow_gpu',
|
||||
'tree_method': 'gpu_exact',
|
||||
'max_depth': 3,
|
||||
'eval_metric': 'auc'}
|
||||
res = {}
|
||||
@ -75,7 +75,7 @@ class TestGPU(unittest.TestCase):
|
||||
dtrain2 = xgb.DMatrix(X2, label=y2)
|
||||
|
||||
param = {'objective': 'binary:logistic',
|
||||
'updater': 'grow_gpu',
|
||||
'tree_method': 'gpu_exact',
|
||||
'max_depth': 2,
|
||||
'eval_metric': 'auc'}
|
||||
res = {}
|
||||
@ -134,7 +134,7 @@ class TestGPU(unittest.TestCase):
|
||||
'objective': 'binary:logistic',
|
||||
'eval_metric': 'auc'}
|
||||
ag_param2 = {'max_depth': max_depth,
|
||||
'updater': 'grow_gpu_hist',
|
||||
'tree_method': 'gpu_hist',
|
||||
'eta': 1,
|
||||
'silent': 1,
|
||||
'n_gpus': 1,
|
||||
@ -142,7 +142,7 @@ class TestGPU(unittest.TestCase):
|
||||
'max_bin': max_bin,
|
||||
'eval_metric': 'auc'}
|
||||
ag_param3 = {'max_depth': max_depth,
|
||||
'updater': 'grow_gpu_hist',
|
||||
'tree_method': 'gpu_hist',
|
||||
'eta': 1,
|
||||
'silent': 1,
|
||||
'n_gpus': n_gpus,
|
||||
@ -177,7 +177,7 @@ class TestGPU(unittest.TestCase):
|
||||
dtest = xgb.DMatrix(X_test, y_test)
|
||||
|
||||
param = {'objective': 'binary:logistic',
|
||||
'updater': 'grow_gpu_hist',
|
||||
'tree_method': 'gpu_hist',
|
||||
'max_depth': max_depth,
|
||||
'n_gpus': 1,
|
||||
'max_bin': max_bin,
|
||||
@ -189,7 +189,7 @@ class TestGPU(unittest.TestCase):
|
||||
assert self.non_decreasing(res['train']['auc'])
|
||||
#assert self.non_decreasing(res['test']['auc'])
|
||||
param2 = {'objective': 'binary:logistic',
|
||||
'updater': 'grow_gpu_hist',
|
||||
'tree_method': 'gpu_hist',
|
||||
'max_depth': max_depth,
|
||||
'n_gpus': n_gpus,
|
||||
'max_bin': max_bin,
|
||||
@ -211,7 +211,7 @@ class TestGPU(unittest.TestCase):
|
||||
dtrain2 = xgb.DMatrix(X2, label=y2)
|
||||
|
||||
param = {'objective': 'binary:logistic',
|
||||
'updater': 'grow_gpu_hist',
|
||||
'tree_method': 'gpu_hist',
|
||||
'max_depth': max_depth,
|
||||
'n_gpus': n_gpus,
|
||||
'max_bin': max_bin,
|
||||
@ -250,7 +250,7 @@ class TestGPU(unittest.TestCase):
|
||||
######################################################################
|
||||
# fail-safe test for max_bin
|
||||
param = {'objective': 'binary:logistic',
|
||||
'updater': 'grow_gpu_hist',
|
||||
'tree_method': 'gpu_hist',
|
||||
'max_depth': max_depth,
|
||||
'n_gpus': n_gpus,
|
||||
'eval_metric': 'auc',
|
||||
@ -263,7 +263,7 @@ class TestGPU(unittest.TestCase):
|
||||
######################################################################
|
||||
# subsampling
|
||||
param = {'objective': 'binary:logistic',
|
||||
'updater': 'grow_gpu_hist',
|
||||
'tree_method': 'gpu_hist',
|
||||
'max_depth': max_depth,
|
||||
'n_gpus': n_gpus,
|
||||
'eval_metric': 'auc',
|
||||
@ -279,7 +279,7 @@ class TestGPU(unittest.TestCase):
|
||||
######################################################################
|
||||
# fail-safe test for max_bin=2
|
||||
param = {'objective': 'binary:logistic',
|
||||
'updater': 'grow_gpu_hist',
|
||||
'tree_method': 'gpu_hist',
|
||||
'max_depth': 2,
|
||||
'n_gpus': n_gpus,
|
||||
'eval_metric': 'auc',
|
||||
|
||||
206
src/learner.cc
206
src/learner.cc
@ -4,19 +4,19 @@
|
||||
* \brief Implementation of learning algorithm.
|
||||
* \author Tianqi Chen
|
||||
*/
|
||||
#include <xgboost/logging.h>
|
||||
#include <xgboost/learner.h>
|
||||
#include <dmlc/timer.h>
|
||||
#include <dmlc/io.h>
|
||||
#include <dmlc/timer.h>
|
||||
#include <xgboost/learner.h>
|
||||
#include <xgboost/logging.h>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <limits>
|
||||
#include <iomanip>
|
||||
#include "./common/io.h"
|
||||
#include <limits>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "./common/common.h"
|
||||
#include "./common/io.h"
|
||||
#include "./common/random.h"
|
||||
|
||||
namespace xgboost {
|
||||
@ -25,17 +25,14 @@ bool Learner::AllowLazyCheckPoint() const {
|
||||
return gbm_->AllowLazyCheckPoint();
|
||||
}
|
||||
|
||||
std::vector<std::string>
|
||||
Learner::DumpModel(const FeatureMap& fmap,
|
||||
std::vector<std::string> Learner::DumpModel(const FeatureMap& fmap,
|
||||
bool with_stats,
|
||||
std::string format) const {
|
||||
return gbm_->DumpModel(fmap, with_stats, format);
|
||||
}
|
||||
|
||||
|
||||
/*! \brief training parameter for regression */
|
||||
struct LearnerModelParam
|
||||
: public dmlc::Parameter<LearnerModelParam> {
|
||||
struct LearnerModelParam : public dmlc::Parameter<LearnerModelParam> {
|
||||
/* \brief global bias */
|
||||
bst_float base_score;
|
||||
/* \brief number of features */
|
||||
@ -55,20 +52,21 @@ struct LearnerModelParam
|
||||
}
|
||||
// declare parameters
|
||||
DMLC_DECLARE_PARAMETER(LearnerModelParam) {
|
||||
DMLC_DECLARE_FIELD(base_score).set_default(0.5f)
|
||||
DMLC_DECLARE_FIELD(base_score)
|
||||
.set_default(0.5f)
|
||||
.describe("Global bias of the model.");
|
||||
DMLC_DECLARE_FIELD(num_feature).set_default(0)
|
||||
.describe("Number of features in training data,"\
|
||||
DMLC_DECLARE_FIELD(num_feature)
|
||||
.set_default(0)
|
||||
.describe(
|
||||
"Number of features in training data,"
|
||||
" this parameter will be automatically detected by learner.");
|
||||
DMLC_DECLARE_FIELD(num_class).set_default(0).set_lower_bound(0)
|
||||
.describe("Number of class option for multi-class classifier. "\
|
||||
DMLC_DECLARE_FIELD(num_class).set_default(0).set_lower_bound(0).describe(
|
||||
"Number of class option for multi-class classifier. "
|
||||
" By default equals 0 and corresponds to binary classifier.");
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct LearnerTrainParam
|
||||
: public dmlc::Parameter<LearnerTrainParam> {
|
||||
struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
|
||||
// stored random seed
|
||||
int seed;
|
||||
// whether seed the PRNG each iteration
|
||||
@ -90,30 +88,40 @@ struct LearnerTrainParam
|
||||
int debug_verbose;
|
||||
// declare parameters
|
||||
DMLC_DECLARE_PARAMETER(LearnerTrainParam) {
|
||||
DMLC_DECLARE_FIELD(seed).set_default(0)
|
||||
.describe("Random number seed during training.");
|
||||
DMLC_DECLARE_FIELD(seed_per_iteration).set_default(false)
|
||||
.describe("Seed PRNG determnisticly via iterator number, "\
|
||||
"this option will be switched on automatically on distributed mode.");
|
||||
DMLC_DECLARE_FIELD(dsplit).set_default(0)
|
||||
DMLC_DECLARE_FIELD(seed).set_default(0).describe(
|
||||
"Random number seed during training.");
|
||||
DMLC_DECLARE_FIELD(seed_per_iteration)
|
||||
.set_default(false)
|
||||
.describe(
|
||||
"Seed PRNG determnisticly via iterator number, "
|
||||
"this option will be switched on automatically on distributed "
|
||||
"mode.");
|
||||
DMLC_DECLARE_FIELD(dsplit)
|
||||
.set_default(0)
|
||||
.add_enum("auto", 0)
|
||||
.add_enum("col", 1)
|
||||
.add_enum("row", 2)
|
||||
.describe("Data split mode for distributed training.");
|
||||
DMLC_DECLARE_FIELD(tree_method).set_default(0)
|
||||
DMLC_DECLARE_FIELD(tree_method)
|
||||
.set_default(0)
|
||||
.add_enum("auto", 0)
|
||||
.add_enum("approx", 1)
|
||||
.add_enum("exact", 2)
|
||||
.add_enum("hist", 3)
|
||||
.add_enum("gpu_exact", 4)
|
||||
.add_enum("gpu_hist", 5)
|
||||
.describe("Choice of tree construction method.");
|
||||
DMLC_DECLARE_FIELD(test_flag).set_default("")
|
||||
.describe("Internal test flag");
|
||||
DMLC_DECLARE_FIELD(prob_buffer_row).set_default(1.0f).set_range(0.0f, 1.0f)
|
||||
DMLC_DECLARE_FIELD(test_flag).set_default("").describe(
|
||||
"Internal test flag");
|
||||
DMLC_DECLARE_FIELD(prob_buffer_row)
|
||||
.set_default(1.0f)
|
||||
.set_range(0.0f, 1.0f)
|
||||
.describe("Maximum buffered row portion");
|
||||
DMLC_DECLARE_FIELD(max_row_perbatch).set_default(std::numeric_limits<size_t>::max())
|
||||
DMLC_DECLARE_FIELD(max_row_perbatch)
|
||||
.set_default(std::numeric_limits<size_t>::max())
|
||||
.describe("maximum row per batch.");
|
||||
DMLC_DECLARE_FIELD(nthread).set_default(0)
|
||||
.describe("Number of threads to use.");
|
||||
DMLC_DECLARE_FIELD(nthread).set_default(0).describe(
|
||||
"Number of threads to use.");
|
||||
DMLC_DECLARE_FIELD(debug_verbose)
|
||||
.set_lower_bound(0)
|
||||
.set_default(0)
|
||||
@ -125,8 +133,8 @@ DMLC_REGISTER_PARAMETER(LearnerModelParam);
|
||||
DMLC_REGISTER_PARAMETER(LearnerTrainParam);
|
||||
|
||||
/*!
|
||||
* \brief learner that performs gradient boosting for a specific objective function.
|
||||
* It does training and prediction.
|
||||
* \brief learner that performs gradient boosting for a specific objective
|
||||
* function. It does training and prediction.
|
||||
*/
|
||||
class LearnerImpl : public Learner {
|
||||
public:
|
||||
@ -137,7 +145,34 @@ class LearnerImpl : public Learner {
|
||||
name_gbm_ = "gbtree";
|
||||
}
|
||||
|
||||
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||
void ConfigureUpdaters() {
|
||||
if (tparam.tree_method == 0 || tparam.tree_method == 1 ||
|
||||
tparam.tree_method == 2) {
|
||||
if (cfg_.count("updater") == 0) {
|
||||
if (tparam.dsplit == 1) {
|
||||
cfg_["updater"] = "distcol";
|
||||
} else if (tparam.dsplit == 2) {
|
||||
cfg_["updater"] = "grow_histmaker,prune";
|
||||
}
|
||||
if (tparam.prob_buffer_row != 1.0f) {
|
||||
cfg_["updater"] = "grow_histmaker,refresh,prune";
|
||||
}
|
||||
}
|
||||
} else if (tparam.tree_method == 3) {
|
||||
/* histogram-based algorithm */
|
||||
LOG(CONSOLE) << "Tree method is selected to be \'hist\', which uses a "
|
||||
"single updater "
|
||||
<< "grow_fast_histmaker.";
|
||||
cfg_["updater"] = "grow_fast_histmaker";
|
||||
} else if (tparam.tree_method == 4) {
|
||||
cfg_["updater"] = "grow_gpu,prune";
|
||||
} else if (tparam.tree_method == 5) {
|
||||
cfg_["updater"] = "grow_gpu_hist";
|
||||
}
|
||||
}
|
||||
|
||||
void Configure(
|
||||
const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||
// add to configurations
|
||||
tparam.InitAllowUnknown(args);
|
||||
cfg_.clear();
|
||||
@ -172,27 +207,13 @@ class LearnerImpl : public Learner {
|
||||
}
|
||||
}
|
||||
|
||||
if (cfg_.count("max_delta_step") == 0 &&
|
||||
cfg_.count("objective") != 0 &&
|
||||
if (cfg_.count("max_delta_step") == 0 && cfg_.count("objective") != 0 &&
|
||||
cfg_["objective"] == "count:poisson") {
|
||||
cfg_["max_delta_step"] = "0.7";
|
||||
}
|
||||
|
||||
if (tparam.tree_method == 3) {
|
||||
/* histogram-based algorithm */
|
||||
LOG(CONSOLE) << "Tree method is selected to be \'hist\', which uses a single updater "
|
||||
<< "grow_fast_histmaker.";
|
||||
cfg_["updater"] = "grow_fast_histmaker";
|
||||
} else if (cfg_.count("updater") == 0) {
|
||||
if (tparam.dsplit == 1) {
|
||||
cfg_["updater"] = "distcol";
|
||||
} else if (tparam.dsplit == 2) {
|
||||
cfg_["updater"] = "grow_histmaker,prune";
|
||||
}
|
||||
if (tparam.prob_buffer_row != 1.0f) {
|
||||
cfg_["updater"] = "grow_histmaker,refresh,prune";
|
||||
}
|
||||
}
|
||||
ConfigureUpdaters();
|
||||
|
||||
if (cfg_.count("objective") == 0) {
|
||||
cfg_["objective"] = "reg:linear";
|
||||
}
|
||||
@ -220,9 +241,7 @@ class LearnerImpl : public Learner {
|
||||
}
|
||||
}
|
||||
|
||||
void InitModel() override {
|
||||
this->LazyInitModel();
|
||||
}
|
||||
void InitModel() override { this->LazyInitModel(); }
|
||||
|
||||
void Load(dmlc::Stream* fi) override {
|
||||
// TODO(tqchen) mark deprecation of old format.
|
||||
@ -259,8 +278,7 @@ class LearnerImpl : public Learner {
|
||||
<< "BoostLearner: wrong model format";
|
||||
}
|
||||
}
|
||||
CHECK(fi->Read(&name_gbm_))
|
||||
<< "BoostLearner: wrong model format";
|
||||
CHECK(fi->Read(&name_gbm_)) << "BoostLearner: wrong model format";
|
||||
// duplicated code with LazyInitModel
|
||||
obj_.reset(ObjFunction::Create(name_obj_));
|
||||
gbm_.reset(GradientBooster::Create(name_gbm_, cache_, mparam.base_score));
|
||||
@ -268,8 +286,8 @@ class LearnerImpl : public Learner {
|
||||
if (mparam.contain_extra_attrs != 0) {
|
||||
std::vector<std::pair<std::string, std::string> > attr;
|
||||
fi->Read(&attr);
|
||||
attributes_ = std::map<std::string, std::string>(
|
||||
attr.begin(), attr.end());
|
||||
attributes_ =
|
||||
std::map<std::string, std::string>(attr.begin(), attr.end());
|
||||
}
|
||||
if (name_obj_ == "count:poisson") {
|
||||
std::string max_delta_step;
|
||||
@ -300,9 +318,9 @@ class LearnerImpl : public Learner {
|
||||
fo->Write(attr);
|
||||
}
|
||||
if (name_obj_ == "count:poisson") {
|
||||
std::map<std::string, std::string>::const_iterator it = cfg_.find("max_delta_step");
|
||||
if (it != cfg_.end())
|
||||
fo->Write(it->second);
|
||||
std::map<std::string, std::string>::const_iterator it =
|
||||
cfg_.find("max_delta_step");
|
||||
if (it != cfg_.end()) fo->Write(it->second);
|
||||
}
|
||||
if (mparam.contain_eval_metrics != 0) {
|
||||
std::vector<std::string> metr;
|
||||
@ -325,8 +343,7 @@ class LearnerImpl : public Learner {
|
||||
gbm_->DoBoost(train, &gpair_, obj_.get());
|
||||
}
|
||||
|
||||
void BoostOneIter(int iter,
|
||||
DMatrix* train,
|
||||
void BoostOneIter(int iter, DMatrix* train,
|
||||
std::vector<bst_gpair>* in_gpair) override {
|
||||
if (tparam.seed_per_iteration || rabit::IsDistributed()) {
|
||||
common::GlobalRandom().seed(tparam.seed * kRandSeedMagic + iter);
|
||||
@ -335,13 +352,11 @@ class LearnerImpl : public Learner {
|
||||
gbm_->DoBoost(train, in_gpair);
|
||||
}
|
||||
|
||||
std::string EvalOneIter(int iter,
|
||||
const std::vector<DMatrix*>& data_sets,
|
||||
std::string EvalOneIter(int iter, const std::vector<DMatrix*>& data_sets,
|
||||
const std::vector<std::string>& data_names) override {
|
||||
double tstart = dmlc::GetTime();
|
||||
std::ostringstream os;
|
||||
os << '[' << iter << ']'
|
||||
<< std::setiosflags(std::ios::fixed);
|
||||
os << '[' << iter << ']' << std::setiosflags(std::ios::fixed);
|
||||
if (metrics_.size() == 0) {
|
||||
metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric()));
|
||||
}
|
||||
@ -388,20 +403,19 @@ class LearnerImpl : public Learner {
|
||||
return out;
|
||||
}
|
||||
|
||||
std::pair<std::string, bst_float> Evaluate(DMatrix* data, std::string metric) {
|
||||
std::pair<std::string, bst_float> Evaluate(DMatrix* data,
|
||||
std::string metric) {
|
||||
if (metric == "auto") metric = obj_->DefaultEvalMetric();
|
||||
std::unique_ptr<Metric> ev(Metric::Create(metric.c_str()));
|
||||
this->PredictRaw(data, &preds_);
|
||||
obj_->EvalTransform(&preds_);
|
||||
return std::make_pair(metric, ev->Eval(preds_, data->info(), tparam.dsplit == 2));
|
||||
return std::make_pair(metric,
|
||||
ev->Eval(preds_, data->info(), tparam.dsplit == 2));
|
||||
}
|
||||
|
||||
void Predict(DMatrix* data,
|
||||
bool output_margin,
|
||||
std::vector<bst_float> *out_preds,
|
||||
unsigned ntree_limit,
|
||||
bool pred_leaf,
|
||||
bool pred_contribs) const override {
|
||||
void Predict(DMatrix* data, bool output_margin,
|
||||
std::vector<bst_float>* out_preds, unsigned ntree_limit,
|
||||
bool pred_leaf, bool pred_contribs) const override {
|
||||
if (pred_contribs) {
|
||||
gbm_->PredictContribution(data, out_preds, ntree_limit);
|
||||
} else if (pred_leaf) {
|
||||
@ -418,7 +432,12 @@ class LearnerImpl : public Learner {
|
||||
// check if p_train is ready to used by training.
|
||||
// if not, initialize the column access.
|
||||
inline void LazyInitDMatrix(DMatrix* p_train) {
|
||||
if (tparam.tree_method != 3 && !p_train->HaveColAccess()) {
|
||||
if (tparam.tree_method == 3 || tparam.tree_method == 4 ||
|
||||
tparam.tree_method == 5) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!p_train->HaveColAccess()) {
|
||||
int ncol = static_cast<int>(p_train->info().num_col);
|
||||
std::vector<bool> enabled(ncol, true);
|
||||
// set max row per batch to limited value
|
||||
@ -426,9 +445,9 @@ class LearnerImpl : public Learner {
|
||||
size_t max_row_perbatch = tparam.max_row_perbatch;
|
||||
const size_t safe_max_row = static_cast<size_t>(32UL << 10UL);
|
||||
|
||||
if (tparam.tree_method == 0 &&
|
||||
p_train->info().num_row >= (4UL << 20UL)) {
|
||||
LOG(CONSOLE) << "Tree method is automatically selected to be \'approx\'"
|
||||
if (tparam.tree_method == 0 && p_train->info().num_row >= (4UL << 20UL)) {
|
||||
LOG(CONSOLE)
|
||||
<< "Tree method is automatically selected to be \'approx\'"
|
||||
<< " for faster speed."
|
||||
<< " to use old behavior(exact greedy algorithm on single machine),"
|
||||
<< " set tree_method to \'exact\'";
|
||||
@ -444,15 +463,14 @@ class LearnerImpl : public Learner {
|
||||
max_row_perbatch = std::min(max_row_perbatch, safe_max_row);
|
||||
}
|
||||
// initialize column access
|
||||
p_train->InitColAccess(enabled,
|
||||
tparam.prob_buffer_row,
|
||||
max_row_perbatch);
|
||||
p_train->InitColAccess(enabled, tparam.prob_buffer_row, max_row_perbatch);
|
||||
}
|
||||
|
||||
if (!p_train->SingleColBlock() && cfg_.count("updater") == 0) {
|
||||
if (tparam.tree_method == 2) {
|
||||
LOG(CONSOLE) << "tree method is set to be 'exact',"
|
||||
<< " but currently we are only able to proceed with approximate algorithm";
|
||||
<< " but currently we are only able to proceed with "
|
||||
"approximate algorithm";
|
||||
}
|
||||
cfg_["updater"] = "grow_histmaker,prune";
|
||||
if (gbm_.get() != nullptr) {
|
||||
@ -462,9 +480,7 @@ class LearnerImpl : public Learner {
|
||||
}
|
||||
|
||||
// return whether model is already initialized.
|
||||
inline bool ModelInitialized() const {
|
||||
return gbm_.get() != nullptr;
|
||||
}
|
||||
inline bool ModelInitialized() const { return gbm_.get() != nullptr; }
|
||||
// lazily initialize the model if it haven't yet been initialized.
|
||||
inline void LazyInitModel() {
|
||||
if (this->ModelInitialized()) return;
|
||||
@ -497,14 +513,11 @@ class LearnerImpl : public Learner {
|
||||
* \param ntree_limit limit number of trees used for boosted tree
|
||||
* predictor, when it equals 0, this means we are using all the trees
|
||||
*/
|
||||
inline void PredictRaw(DMatrix* data,
|
||||
std::vector<bst_float>* out_preds,
|
||||
inline void PredictRaw(DMatrix* data, std::vector<bst_float>* out_preds,
|
||||
unsigned ntree_limit = 0) const {
|
||||
CHECK(gbm_.get() != nullptr)
|
||||
<< "Predict must happen after Load or InitModel";
|
||||
gbm_->Predict(data,
|
||||
out_preds,
|
||||
ntree_limit);
|
||||
gbm_->Predict(data, out_preds, ntree_limit);
|
||||
}
|
||||
// model parameter
|
||||
LearnerModelParam mparam;
|
||||
@ -530,7 +543,8 @@ class LearnerImpl : public Learner {
|
||||
std::vector<std::shared_ptr<DMatrix> > cache_;
|
||||
};
|
||||
|
||||
Learner* Learner::Create(const std::vector<std::shared_ptr<DMatrix> >& cache_data) {
|
||||
Learner* Learner::Create(
|
||||
const std::vector<std::shared_ptr<DMatrix> >& cache_data) {
|
||||
return new LearnerImpl(cache_data);
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user