[GPU-Plugin] Change GPU plugin to use tree_method parameter, bump cmake version to 3.5 for GPU plugin, add compute architecture 3.5, remove unused cmake files (#2455)

This commit is contained in:
Rory Mitchell 2017-06-29 16:19:45 +12:00 committed by GitHub
parent 88488fdbb9
commit 48f3003302
8 changed files with 168 additions and 835 deletions

View File

@ -3,9 +3,10 @@ project (xgboost)
find_package(OpenMP) find_package(OpenMP)
option(PLUGIN_UPDATER_GPU "Build GPU accelerated tree construction plugin") option(PLUGIN_UPDATER_GPU "Build GPU accelerated tree construction plugin")
set(GPU_COMPUTE_VER 50;52;60;61 CACHE STRING set(GPU_COMPUTE_VER 35;50;52;60;61 CACHE STRING
"Space separated list of compute versions to be built against") "Space separated list of compute versions to be built against")
if(PLUGIN_UPDATER_GPU) if(PLUGIN_UPDATER_GPU)
cmake_minimum_required (VERSION 3.5)
find_package(CUDA REQUIRED) find_package(CUDA REQUIRED)
endif() endif()

View File

@ -1,289 +0,0 @@
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag("-std=c++11" SUPPORT_CXX11)
################################################################################################
# A function for automatic detection of GPUs installed (if autodetection is enabled)
# Usage:
# mshadow_detect_installed_gpus(out_variable)
function(xgboost_detect_installed_gpus out_variable)
set(CUDA_gpu_detect_output "")
if(NOT CUDA_gpu_detect_output)
set(__cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu)
file(WRITE ${__cufile} ""
"#include <cstdio>\n"
"int main()\n"
"{\n"
" int count = 0;\n"
" if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n"
" if (count == 0) return -1;\n"
" for (int device = 0; device < count; ++device)\n"
" {\n"
" cudaDeviceProp prop;\n"
" if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n"
" std::printf(\"%d.%d \", prop.major, prop.minor);\n"
" }\n"
" return 0;\n"
"}\n")
if(MSVC)
#find vcvarsall.bat and run it building msvc environment
get_filename_component(MY_COMPILER_DIR ${CMAKE_CXX_COMPILER} DIRECTORY)
find_file(MY_VCVARSALL_BAT vcvarsall.bat "${MY_COMPILER_DIR}/.." "${MY_COMPILER_DIR}/../..")
execute_process(COMMAND ${MY_VCVARSALL_BAT} && ${CUDA_NVCC_EXECUTABLE} -arch sm_30 --run ${__cufile}
WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
RESULT_VARIABLE __nvcc_res OUTPUT_VARIABLE __nvcc_out
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE)
else()
if(CUDA_LIBRARY_PATH)
set(CUDA_LINK_LIBRARY_PATH "-L${CUDA_LIBRARY_PATH}")
endif()
execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} -arch sm_30 --run ${__cufile} ${CUDA_LINK_LIBRARY_PATH}
WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
RESULT_VARIABLE __nvcc_res OUTPUT_VARIABLE __nvcc_out
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE)
endif()
if(__nvcc_res EQUAL 0)
# nvcc outputs text containing line breaks when building with MSVC.
# The line below prevents CMake from inserting a variable with line
# breaks in the cache
string(REGEX MATCH "([1-9].[0-9])" __nvcc_out "${__nvcc_out}")
string(REPLACE "2.1" "2.1(2.0)" __nvcc_out "${__nvcc_out}")
set(CUDA_gpu_detect_output ${__nvcc_out} CACHE INTERNAL "Returned GPU architetures from xgboost_detect_gpus tool" FORCE)
else()
message(WARNING "Running GPU detection script with nvcc failed: ${__nvcc_out}")
endif()
endif()
if(NOT CUDA_gpu_detect_output)
message(WARNING "Automatic GPU detection failed. Building for all known architectures (${xgboost_known_gpu_archs}).")
set(${out_variable} ${xgboost_known_gpu_archs} PARENT_SCOPE)
else()
set(${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE)
endif()
endfunction()
################################################################################################
# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME
# Usage:
# xgboost_select_nvcc_arch_flags(out_variable)
function(xgboost_select_nvcc_arch_flags out_variable)
# List of arch names
set(__archs_names "Fermi" "Kepler" "Maxwell" "Pascal" "All" "Manual")
set(__archs_name_default "All")
if(NOT CMAKE_CROSSCOMPILING)
list(APPEND __archs_names "Auto")
set(__archs_name_default "Auto")
endif()
# set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui)
set(CUDA_ARCH_NAME ${__archs_name_default} CACHE STRING "Select target NVIDIA GPU achitecture.")
set_property( CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${__archs_names} )
mark_as_advanced(CUDA_ARCH_NAME)
# verify CUDA_ARCH_NAME value
if(NOT ";${__archs_names};" MATCHES ";${CUDA_ARCH_NAME};")
string(REPLACE ";" ", " __archs_names "${__archs_names}")
message(FATAL_ERROR "Only ${__archs_names} architeture names are supported.")
endif()
if(${CUDA_ARCH_NAME} STREQUAL "Manual")
set(CUDA_ARCH_BIN ${xgboost_known_gpu_archs} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")
set(CUDA_ARCH_PTX "50" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")
mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX)
else()
unset(CUDA_ARCH_BIN CACHE)
unset(CUDA_ARCH_PTX CACHE)
endif()
if(${CUDA_ARCH_NAME} STREQUAL "Fermi")
set(__cuda_arch_bin "20 21(20)")
elseif(${CUDA_ARCH_NAME} STREQUAL "Kepler")
set(__cuda_arch_bin "30 35")
elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
set(__cuda_arch_bin "50")
elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal")
set(__cuda_arch_bin "60 61")
elseif(${CUDA_ARCH_NAME} STREQUAL "All")
set(__cuda_arch_bin ${xgboost_known_gpu_archs})
elseif(${CUDA_ARCH_NAME} STREQUAL "Auto")
xgboost_detect_installed_gpus(__cuda_arch_bin)
else() # (${CUDA_ARCH_NAME} STREQUAL "Manual")
set(__cuda_arch_bin ${CUDA_ARCH_BIN})
endif()
# remove dots and convert to lists
string(REGEX REPLACE "\\." "" __cuda_arch_bin "${__cuda_arch_bin}")
string(REGEX REPLACE "\\." "" __cuda_arch_ptx "${CUDA_ARCH_PTX}")
string(REGEX MATCHALL "[0-9()]+" __cuda_arch_bin "${__cuda_arch_bin}")
string(REGEX MATCHALL "[0-9]+" __cuda_arch_ptx "${__cuda_arch_ptx}")
xgboost_list_unique(__cuda_arch_bin __cuda_arch_ptx)
set(__nvcc_flags "")
set(__nvcc_archs_readable "")
# Tell NVCC to add binaries for the specified GPUs
foreach(__arch ${__cuda_arch_bin})
if(__arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
# User explicitly specified PTX for the concrete BIN
list(APPEND __nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
list(APPEND __nvcc_archs_readable sm_${CMAKE_MATCH_1})
else()
# User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
list(APPEND __nvcc_flags -gencode arch=compute_${__arch},code=sm_${__arch})
list(APPEND __nvcc_archs_readable sm_${__arch})
endif()
endforeach()
# Tell NVCC to add PTX intermediate code for the specified architectures
foreach(__arch ${__cuda_arch_ptx})
list(APPEND __nvcc_flags -gencode arch=compute_${__arch},code=compute_${__arch})
list(APPEND __nvcc_archs_readable compute_${__arch})
endforeach()
string(REPLACE ";" " " __nvcc_archs_readable "${__nvcc_archs_readable}")
set(${out_variable} ${__nvcc_flags} PARENT_SCOPE)
set(${out_variable}_readable ${__nvcc_archs_readable} PARENT_SCOPE)
endfunction()
################################################################################################
# Short command for cuda comnpilation
# Usage:
# xgboost_cuda_compile(<objlist_variable> <cuda_files>)
macro(xgboost_cuda_compile objlist_variable)
foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
set(${var}_backup_in_cuda_compile_ "${${var}}")
# we remove /EHa as it generates warnings under windows
string(REPLACE "/EHa" "" ${var} "${${var}}")
endforeach()
if(UNIX OR APPLE)
list(APPEND CUDA_NVCC_FLAGS -Xcompiler -fPIC)
endif()
if(APPLE)
list(APPEND CUDA_NVCC_FLAGS -Xcompiler -Wno-unused-function)
endif()
set(CUDA_NVCC_FLAGS_DEBUG "${CUDA_NVCC_FLAGS_DEBUG} -G -lineinfo")
if(MSVC)
# disable noisy warnings:
# 4819: The file contains a character that cannot be represented in the current code page (number).
list(APPEND CUDA_NVCC_FLAGS -Xcompiler "/wd4819")
foreach(flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
if(${flag_var} MATCHES "/MD")
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
endif(${flag_var} MATCHES "/MD")
endforeach(flag_var)
endif()
# If the build system is a container, make sure the nvcc intermediate files
# go into the build output area rather than in /tmp, which may run out of space
if(IS_CONTAINER_BUILD)
set(CUDA_NVCC_INTERMEDIATE_DIR "${CMAKE_CURRENT_BINARY_DIR}")
message(STATUS "Container build enabled, so nvcc intermediate files in: ${CUDA_NVCC_INTERMEDIATE_DIR}")
list(APPEND CUDA_NVCC_FLAGS "--keep --keep-dir ${CUDA_NVCC_INTERMEDIATE_DIR}")
endif()
cuda_compile(cuda_objcs ${ARGN})
foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
set(${var} "${${var}_backup_in_cuda_compile_}")
unset(${var}_backup_in_cuda_compile_)
endforeach()
set(${objlist_variable} ${cuda_objcs})
endmacro()
################################################################################################
### Non macro section
################################################################################################
# Try to prime CUDA_TOOLKIT_ROOT_DIR by looking for libcudart.so
if(NOT CUDA_TOOLKIT_ROOT_DIR)
find_library(CUDA_LIBRARY_PATH libcudart.so PATHS ENV LD_LIBRARY_PATH PATH_SUFFIXES lib lib64)
if(CUDA_LIBRARY_PATH)
get_filename_component(CUDA_LIBRARY_PATH ${CUDA_LIBRARY_PATH} DIRECTORY)
set(CUDA_TOOLKIT_ROOT_DIR "${CUDA_LIBRARY_PATH}/..")
endif()
endif()
find_package(CUDA 5.5 QUIET REQUIRED)
find_cuda_helper_libs(curand) # cmake 2.8.7 compartibility which doesn't search for curand
if(NOT CUDA_FOUND)
return()
endif()
set(HAVE_CUDA TRUE)
message(STATUS "CUDA detected: " ${CUDA_VERSION})
include_directories(SYSTEM ${CUDA_INCLUDE_DIRS})
list(APPEND xgboost_LINKER_LIBS ${CUDA_CUDART_LIBRARY}
${CUDA_curand_LIBRARY} ${CUDA_CUBLAS_LIBRARIES})
# Known NVIDIA GPU achitectures xgboost can be compiled for.
# This list will be used for CUDA_ARCH_NAME = All option
if(CUDA_ARCH_ALL)
set(xgboost_known_gpu_archs "${CUDA_ARCH_ALL}")
else()
if(${CUDA_VERSION} GREATER 7.5)
set(xgboost_known_gpu_archs "30 35 50 52 60 61")
else()
set(xgboost_known_gpu_archs "30 35 50 52")
endif()
endif()
# cudnn detection
if(USE_CUDNN)
detect_cuDNN()
if(HAVE_CUDNN)
add_definitions(-DUSE_CUDNN)
include_directories(SYSTEM ${CUDNN_INCLUDE})
list(APPEND xgboost_LINKER_LIBS ${CUDNN_LIBRARY})
endif()
endif()
# setting nvcc arch flags
xgboost_select_nvcc_arch_flags(NVCC_FLAGS_EXTRA)
list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA})
message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA_readable}")
# Boost 1.55 workaround, see https://svn.boost.org/trac/boost/ticket/9392 or
# https://github.com/ComputationalRadiationPhysics/picongpu/blob/master/src/picongpu/CMakeLists.txt
if(Boost_VERSION EQUAL 105500)
message(STATUS "Cuda + Boost 1.55: Applying noinline work around")
# avoid warning for CMake >= 2.8.12
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} \"-DBOOST_NOINLINE=__attribute__((noinline))\" ")
endif()
# disable some nvcc diagnostic that apears in boost, glog, glags, opencv, etc.
foreach(diag cc_clobber_ignored integer_sign_change useless_using_declaration set_but_not_used)
list(APPEND CUDA_NVCC_FLAGS -Xcudafe --diag_suppress=${diag})
endforeach()
# setting default testing device
if(NOT CUDA_TEST_DEVICE)
set(CUDA_TEST_DEVICE -1)
endif()
mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD)
mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION)
# Handle clang/libc++ issue
if(APPLE)
xgboost_detect_darwin_version(OSX_VERSION)
# OSX 10.9 and higher uses clang/libc++ by default which is incompartible with old CUDA toolkits
if(OSX_VERSION VERSION_GREATER 10.8)
# enabled by default if and only if CUDA version is less than 7.0
xgboost_option(USE_libstdcpp "Use libstdc++ instead of libc++" (CUDA_VERSION VERSION_LESS 7.0))
endif()
endif()

View File

@ -1,398 +0,0 @@
################################################################################################
# Command alias for debugging messages
# Usage:
# dmsg(<message>)
function(dmsg)
message(STATUS ${ARGN})
endfunction()
################################################################################################
# Removes duplicates from list(s)
# Usage:
# xgboost_list_unique(<list_variable> [<list_variable>] [...])
macro(xgboost_list_unique)
foreach(__lst ${ARGN})
if(${__lst})
list(REMOVE_DUPLICATES ${__lst})
endif()
endforeach()
endmacro()
################################################################################################
# Clears variables from list
# Usage:
# xgboost_clear_vars(<variables_list>)
macro(xgboost_clear_vars)
foreach(_var ${ARGN})
unset(${_var})
endforeach()
endmacro()
################################################################################################
# Removes duplicates from string
# Usage:
# xgboost_string_unique(<string_variable>)
function(xgboost_string_unique __string)
if(${__string})
set(__list ${${__string}})
separate_arguments(__list)
list(REMOVE_DUPLICATES __list)
foreach(__e ${__list})
set(__str "${__str} ${__e}")
endforeach()
set(${__string} ${__str} PARENT_SCOPE)
endif()
endfunction()
################################################################################################
# Prints list element per line
# Usage:
# xgboost_print_list(<list>)
function(xgboost_print_list)
foreach(e ${ARGN})
message(STATUS ${e})
endforeach()
endfunction()
################################################################################################
# Function merging lists of compiler flags to single string.
# Usage:
# xgboost_merge_flag_lists(out_variable <list1> [<list2>] [<list3>] ...)
function(xgboost_merge_flag_lists out_var)
set(__result "")
foreach(__list ${ARGN})
foreach(__flag ${${__list}})
string(STRIP ${__flag} __flag)
set(__result "${__result} ${__flag}")
endforeach()
endforeach()
string(STRIP ${__result} __result)
set(${out_var} ${__result} PARENT_SCOPE)
endfunction()
################################################################################################
# Converts all paths in list to absolute
# Usage:
# xgboost_convert_absolute_paths(<list_variable>)
function(xgboost_convert_absolute_paths variable)
set(__dlist "")
foreach(__s ${${variable}})
get_filename_component(__abspath ${__s} ABSOLUTE)
list(APPEND __list ${__abspath})
endforeach()
set(${variable} ${__list} PARENT_SCOPE)
endfunction()
################################################################################################
# Reads set of version defines from the header file
# Usage:
# xgboost_parse_header(<file> <define1> <define2> <define3> ..)
macro(xgboost_parse_header FILENAME FILE_VAR)
set(vars_regex "")
set(__parnet_scope OFF)
set(__add_cache OFF)
foreach(name ${ARGN})
if("${name}" STREQUAL "PARENT_SCOPE")
set(__parnet_scope ON)
elseif("${name}" STREQUAL "CACHE")
set(__add_cache ON)
elseif(vars_regex)
set(vars_regex "${vars_regex}|${name}")
else()
set(vars_regex "${name}")
endif()
endforeach()
if(EXISTS "${FILENAME}")
file(STRINGS "${FILENAME}" ${FILE_VAR} REGEX "#define[ \t]+(${vars_regex})[ \t]+[0-9]+" )
else()
unset(${FILE_VAR})
endif()
foreach(name ${ARGN})
if(NOT "${name}" STREQUAL "PARENT_SCOPE" AND NOT "${name}" STREQUAL "CACHE")
if(${FILE_VAR})
if(${FILE_VAR} MATCHES ".+[ \t]${name}[ \t]+([0-9]+).*")
string(REGEX REPLACE ".+[ \t]${name}[ \t]+([0-9]+).*" "\\1" ${name} "${${FILE_VAR}}")
else()
set(${name} "")
endif()
if(__add_cache)
set(${name} ${${name}} CACHE INTERNAL "${name} parsed from ${FILENAME}" FORCE)
elseif(__parnet_scope)
set(${name} "${${name}}" PARENT_SCOPE)
endif()
else()
unset(${name} CACHE)
endif()
endif()
endforeach()
endmacro()
################################################################################################
# Reads single version define from the header file and parses it
# Usage:
# xgboost_parse_header_single_define(<library_name> <file> <define_name>)
function(xgboost_parse_header_single_define LIBNAME HDR_PATH VARNAME)
set(${LIBNAME}_H "")
if(EXISTS "${HDR_PATH}")
file(STRINGS "${HDR_PATH}" ${LIBNAME}_H REGEX "^#define[ \t]+${VARNAME}[ \t]+\"[^\"]*\".*$" LIMIT_COUNT 1)
endif()
if(${LIBNAME}_H)
string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MAJOR "${${LIBNAME}_H}")
string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MINOR "${${LIBNAME}_H}")
string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_PATCH "${${LIBNAME}_H}")
set(${LIBNAME}_VERSION_MAJOR ${${LIBNAME}_VERSION_MAJOR} ${ARGN} PARENT_SCOPE)
set(${LIBNAME}_VERSION_MINOR ${${LIBNAME}_VERSION_MINOR} ${ARGN} PARENT_SCOPE)
set(${LIBNAME}_VERSION_PATCH ${${LIBNAME}_VERSION_PATCH} ${ARGN} PARENT_SCOPE)
set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_MAJOR}.${${LIBNAME}_VERSION_MINOR}.${${LIBNAME}_VERSION_PATCH}" PARENT_SCOPE)
# append a TWEAK version if it exists:
set(${LIBNAME}_VERSION_TWEAK "")
if("${${LIBNAME}_H}" MATCHES "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.[0-9]+\\.[0-9]+\\.([0-9]+).*$")
set(${LIBNAME}_VERSION_TWEAK "${CMAKE_MATCH_1}" ${ARGN} PARENT_SCOPE)
endif()
if(${LIBNAME}_VERSION_TWEAK)
set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_STRING}.${${LIBNAME}_VERSION_TWEAK}" ${ARGN} PARENT_SCOPE)
else()
set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_STRING}" ${ARGN} PARENT_SCOPE)
endif()
endif()
endfunction()
########################################################################################################
# An option that the user can select. Can accept condition to control when option is available for user.
# Usage:
# xgboost_option(<option_variable> "doc string" <initial value or boolean expression> [IF <condition>])
function(xgboost_option variable description value)
set(__value ${value})
set(__condition "")
set(__varname "__value")
foreach(arg ${ARGN})
if(arg STREQUAL "IF" OR arg STREQUAL "if")
set(__varname "__condition")
else()
list(APPEND ${__varname} ${arg})
endif()
endforeach()
unset(__varname)
if("${__condition}" STREQUAL "")
set(__condition 2 GREATER 1)
endif()
if(${__condition})
if("${__value}" MATCHES ";")
if(${__value})
option(${variable} "${description}" ON)
else()
option(${variable} "${description}" OFF)
endif()
elseif(DEFINED ${__value})
if(${__value})
option(${variable} "${description}" ON)
else()
option(${variable} "${description}" OFF)
endif()
else()
option(${variable} "${description}" ${__value})
endif()
else()
unset(${variable} CACHE)
endif()
endfunction()
################################################################################################
# Utility macro for comparing two lists. Used for CMake debugging purposes
# Usage:
# xgboost_compare_lists(<list_variable> <list2_variable> [description])
function(xgboost_compare_lists list1 list2 desc)
set(__list1 ${${list1}})
set(__list2 ${${list2}})
list(SORT __list1)
list(SORT __list2)
list(LENGTH __list1 __len1)
list(LENGTH __list2 __len2)
if(NOT ${__len1} EQUAL ${__len2})
message(FATAL_ERROR "Lists are not equal. ${__len1} != ${__len2}. ${desc}")
endif()
foreach(__i RANGE 1 ${__len1})
math(EXPR __index "${__i}- 1")
list(GET __list1 ${__index} __item1)
list(GET __list2 ${__index} __item2)
if(NOT ${__item1} STREQUAL ${__item2})
message(FATAL_ERROR "Lists are not equal. Differ at element ${__index}. ${desc}")
endif()
endforeach()
endfunction()
################################################################################################
# Command for disabling warnings for different platforms (see below for gcc and VisualStudio)
# Usage:
# xgboost_warnings_disable(<CMAKE_[C|CXX]_FLAGS[_CONFIGURATION]> -Wshadow /wd4996 ..,)
macro(xgboost_warnings_disable)
set(_flag_vars "")
set(_msvc_warnings "")
set(_gxx_warnings "")
foreach(arg ${ARGN})
if(arg MATCHES "^CMAKE_")
list(APPEND _flag_vars ${arg})
elseif(arg MATCHES "^/wd")
list(APPEND _msvc_warnings ${arg})
elseif(arg MATCHES "^-W")
list(APPEND _gxx_warnings ${arg})
endif()
endforeach()
if(NOT _flag_vars)
set(_flag_vars CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
endif()
if(MSVC AND _msvc_warnings)
foreach(var ${_flag_vars})
foreach(warning ${_msvc_warnings})
set(${var} "${${var}} ${warning}")
endforeach()
endforeach()
elseif((CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX) AND _gxx_warnings)
foreach(var ${_flag_vars})
foreach(warning ${_gxx_warnings})
if(NOT warning MATCHES "^-Wno-")
string(REPLACE "${warning}" "" ${var} "${${var}}")
string(REPLACE "-W" "-Wno-" warning "${warning}")
endif()
set(${var} "${${var}} ${warning}")
endforeach()
endforeach()
endif()
xgboost_clear_vars(_flag_vars _msvc_warnings _gxx_warnings)
endmacro()
################################################################################################
# Helper function get current definitions
# Usage:
# xgboost_get_current_definitions(<definitions_variable>)
function(xgboost_get_current_definitions definitions_var)
get_property(current_definitions DIRECTORY PROPERTY COMPILE_DEFINITIONS)
set(result "")
foreach(d ${current_definitions})
list(APPEND result -D${d})
endforeach()
xgboost_list_unique(result)
set(${definitions_var} ${result} PARENT_SCOPE)
endfunction()
################################################################################################
# Helper function get current includes/definitions
# Usage:
# xgboost_get_current_cflags(<cflagslist_variable>)
function(xgboost_get_current_cflags cflags_var)
get_property(current_includes DIRECTORY PROPERTY INCLUDE_DIRECTORIES)
xgboost_convert_absolute_paths(current_includes)
xgboost_get_current_definitions(cflags)
foreach(i ${current_includes})
list(APPEND cflags "-I${i}")
endforeach()
xgboost_list_unique(cflags)
set(${cflags_var} ${cflags} PARENT_SCOPE)
endfunction()
################################################################################################
# Helper function to parse current linker libs into link directories, libflags and osx frameworks
# Usage:
# xgboost_parse_linker_libs(<xgboost_LINKER_LIBS_var> <directories_var> <libflags_var> <frameworks_var>)
function(xgboost_parse_linker_libs xgboost_LINKER_LIBS_variable folders_var flags_var frameworks_var)
set(__unspec "")
set(__debug "")
set(__optimized "")
set(__framework "")
set(__varname "__unspec")
# split libs into debug, optimized, unspecified and frameworks
foreach(list_elem ${${xgboost_LINKER_LIBS_variable}})
if(list_elem STREQUAL "debug")
set(__varname "__debug")
elseif(list_elem STREQUAL "optimized")
set(__varname "__optimized")
elseif(list_elem MATCHES "^-framework[ \t]+([^ \t].*)")
list(APPEND __framework -framework ${CMAKE_MATCH_1})
else()
list(APPEND ${__varname} ${list_elem})
set(__varname "__unspec")
endif()
endforeach()
# attach debug or optimized libs to unspecified according to current configuration
if(CMAKE_BUILD_TYPE MATCHES "Debug")
set(__libs ${__unspec} ${__debug})
else()
set(__libs ${__unspec} ${__optimized})
endif()
set(libflags "")
set(folders "")
# convert linker libraries list to link flags
foreach(lib ${__libs})
if(TARGET ${lib})
list(APPEND folders $<TARGET_LINKER_FILE_DIR:${lib}>)
list(APPEND libflags -l${lib})
elseif(lib MATCHES "^-l.*")
list(APPEND libflags ${lib})
elseif(IS_ABSOLUTE ${lib})
get_filename_component(name_we ${lib} NAME_WE)
get_filename_component(folder ${lib} PATH)
string(REGEX MATCH "^lib(.*)" __match ${name_we})
list(APPEND libflags -l${CMAKE_MATCH_1})
list(APPEND folders ${folder})
else()
message(FATAL_ERROR "Logic error. Need to update cmake script")
endif()
endforeach()
xgboost_list_unique(libflags folders)
set(${folders_var} ${folders} PARENT_SCOPE)
set(${flags_var} ${libflags} PARENT_SCOPE)
set(${frameworks_var} ${__framework} PARENT_SCOPE)
endfunction()
################################################################################################
# Helper function to detect Darwin version, i.e. 10.8, 10.9, 10.10, ....
# Usage:
# xgboost_detect_darwin_version(<version_variable>)
function(xgboost_detect_darwin_version output_var)
if(APPLE)
execute_process(COMMAND /usr/bin/sw_vers -productVersion
RESULT_VARIABLE __sw_vers OUTPUT_VARIABLE __sw_vers_out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
set(${output_var} ${__sw_vers_out} PARENT_SCOPE)
else()
set(${output_var} "" PARENT_SCOPE)
endif()
endfunction()
################################################################################################
# Convenient command to setup source group for IDEs that support this feature (VS, XCode)
# Usage:
# caffe_source_group(<group> GLOB[_RECURSE] <globbing_expression>)
function(xgboost_source_group group)
cmake_parse_arguments(CAFFE_SOURCE_GROUP "" "" "GLOB;GLOB_RECURSE" ${ARGN})
if(CAFFE_SOURCE_GROUP_GLOB)
file(GLOB srcs1 ${CAFFE_SOURCE_GROUP_GLOB})
source_group(${group} FILES ${srcs1})
endif()
if(CAFFE_SOURCE_GROUP_GLOB_RECURSE)
file(GLOB_RECURSE srcs2 ${CAFFE_SOURCE_GROUP_GLOB_RECURSE})
source_group(${group} FILES ${srcs2})
endif()
endfunction()

View File

@ -24,8 +24,7 @@ param['eval_metric'] = 'auc'
param['max_depth'] = 5 param['max_depth'] = 5
param['eta'] = 0.3 param['eta'] = 0.3
param['silent'] = 0 param['silent'] = 0
param['updater'] = 'grow_gpu' param['tree_method'] = 'gpu_exact'
#param['updater'] = 'grow_colmaker'
num_round = 20 num_round = 20

View File

@ -1,16 +1,16 @@
# CUDA Accelerated Tree Construction Algorithms # CUDA Accelerated Tree Construction Algorithms
This plugin adds GPU accelerated tree construction algorithms to XGBoost. This plugin adds GPU accelerated tree construction algorithms to XGBoost.
## Usage ## Usage
Specify the 'updater' parameter as one of the following algorithms. Specify the 'tree_method' parameter as one of the following algorithms.
### Algorithms ### Algorithms
| updater | Description | | tree_method | Description |
| --- | --- | | --- | --- |
grow_gpu | The standard XGBoost tree construction algorithm. Performs exact search for splits. Slower and uses considerably more memory than 'grow_gpu_hist' | gpu_exact | The standard XGBoost tree construction algorithm. Performs exact search for splits. Slower and uses considerably more memory than 'gpu_hist' |
grow_gpu_hist | Equivalent to the XGBoost fast histogram algorithm. Faster and uses considerably less memory. Splits may be less accurate. | gpu_hist | Equivalent to the XGBoost fast histogram algorithm. Faster and uses considerably less memory. Splits may be less accurate. |
### Supported parameters ### Supported parameters
| parameter | grow_gpu | grow_gpu_hist | | parameter | gpu_exact | gpu_hist |
| --- | --- | --- | | --- | --- | --- |
subsample | &#10004; | &#10004; | subsample | &#10004; | &#10004; |
colsample_bytree | &#10004; | &#10004;| colsample_bytree | &#10004; | &#10004;|
@ -29,7 +29,7 @@ Python example:
```python ```python
param['gpu_id'] = 1 param['gpu_id'] = 1
param['max_bin'] = 16 param['max_bin'] = 16
param['updater'] = 'grow_gpu_hist' param['tree_method'] = 'gpu_hist'
``` ```
## Benchmarks ## Benchmarks
To run benchmarks on synthetic data for binary classification: To run benchmarks on synthetic data for binary classification:
@ -39,18 +39,18 @@ $ python benchmark/benchmark.py
Training time time on 1000000 rows x 50 columns with 500 boosting iterations on i7-6700K CPU @ 4.00GHz and Pascal Titan X. Training time time on 1000000 rows x 50 columns with 500 boosting iterations on i7-6700K CPU @ 4.00GHz and Pascal Titan X.
| Updater | Time (s) | | tree_method | Time (s) |
| --- | --- | | --- | --- |
| grow_gpu_hist | 11.09 | | gpu_hist | 11.09 |
| grow_fast_histmaker (histogram XGBoost - CPU) | 41.75 | | hist (histogram XGBoost - CPU) | 41.75 |
| grow_gpu | 193.90 | | gpu_exact | 193.90 |
| grow_colmaker (standard XGBoost - CPU) | 720.12 | | exact (standard XGBoost - CPU) | 720.12 |
[See here](http://dmlc.ml/2016/12/14/GPU-accelerated-xgboost.html) for additional performance benchmarks of the 'grow_gpu' updater. [See here](http://dmlc.ml/2016/12/14/GPU-accelerated-xgboost.html) for additional performance benchmarks of the 'gpu_exact' tree_method.
## Test ## Test
To run tests: To run tests:Will
```bash ```bash
$ python -m nose test/python/ $ python -m nose test/python/
``` ```
@ -122,6 +122,13 @@ $ make PLUGIN_UPDATER_GPU=ON GTEST_PATH=${CACHE_PREFIX} test
``` ```
## Changelog ## Changelog
##### 2017/6/26
* Change API to use tree_method parameter
* Increase required cmake version to 3.5
* Add compute arch 3.5 to default archs
* Set default n_gpus to 1
##### 2017/6/5 ##### 2017/6/5
* Multi-GPU support for histogram method using NVIDIA NCCL. * Multi-GPU support for histogram method using NVIDIA NCCL.

View File

@ -14,19 +14,18 @@ def run_benchmark(args, gpu_algorithm, cpu_algorithm):
dtrain = xgb.DMatrix(X, y) dtrain = xgb.DMatrix(X, y)
param = {'objective': 'binary:logistic', param = {'objective': 'binary:logistic',
'tree_method': 'exact',
'max_depth': 6, 'max_depth': 6,
'silent': 1, 'silent': 1,
'eval_metric': 'auc'} 'eval_metric': 'auc'}
param['updater'] = gpu_algorithm param['tree_method'] = gpu_algorithm
print("Training with '%s'" % param['updater']) print("Training with '%s'" % param['tree_method'])
tmp = time.time() tmp = time.time()
xgb.train(param, dtrain, args.iterations) xgb.train(param, dtrain, args.iterations)
print ("Time: %s seconds" % (str(time.time() - tmp))) print ("Time: %s seconds" % (str(time.time() - tmp)))
param['updater'] = cpu_algorithm param['tree_method'] = cpu_algorithm
print("Training with '%s'" % param['updater']) print("Training with '%s'" % param['tree_method'])
tmp = time.time() tmp = time.time()
xgb.train(param, dtrain, args.iterations) xgb.train(param, dtrain, args.iterations)
print ("Time: %s seconds" % (str(time.time() - tmp))) print ("Time: %s seconds" % (str(time.time() - tmp)))
@ -34,17 +33,17 @@ def run_benchmark(args, gpu_algorithm, cpu_algorithm):
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--algorithm', choices=['all', 'grow_gpu', 'grow_gpu_hist'], required=True) parser.add_argument('--algorithm', choices=['all', 'gpu_exact', 'gpu_hist'], default='all')
parser.add_argument('--rows',type=int,default=1000000) parser.add_argument('--rows',type=int,default=1000000)
parser.add_argument('--columns',type=int,default=50) parser.add_argument('--columns',type=int,default=50)
parser.add_argument('--iterations',type=int,default=500) parser.add_argument('--iterations',type=int,default=500)
args = parser.parse_args() args = parser.parse_args()
if 'grow_gpu_hist' in args.algorithm: if 'gpu_hist' in args.algorithm:
run_benchmark(args, args.algorithm, 'grow_fast_histmaker') run_benchmark(args, args.algorithm, 'hist')
if 'grow_gpu' in args.algorithm: if 'gpu_exact' in args.algorithm:
run_benchmark(args, args.algorithm, 'grow_colmaker') run_benchmark(args, args.algorithm, 'exact')
if 'all' in args.algorithm: if 'all' in args.algorithm:
run_benchmark(args, 'grow_gpu', 'grow_colmaker') run_benchmark(args, 'gpu_exact', 'exact')
run_benchmark(args, 'grow_gpu_hist', 'grow_fast_histmaker') run_benchmark(args, 'gpu_hist', 'hist')

View File

@ -35,7 +35,7 @@ class TestGPU(unittest.TestCase):
'objective': 'binary:logistic', 'objective': 'binary:logistic',
'eval_metric': 'auc'} 'eval_metric': 'auc'}
ag_param2 = {'max_depth': 2, ag_param2 = {'max_depth': 2,
'updater': 'grow_gpu', 'tree_method': 'gpu_exact',
'eta': 1, 'eta': 1,
'silent': 1, 'silent': 1,
'objective': 'binary:logistic', 'objective': 'binary:logistic',
@ -59,7 +59,7 @@ class TestGPU(unittest.TestCase):
dtest = xgb.DMatrix(X_test, y_test) dtest = xgb.DMatrix(X_test, y_test)
param = {'objective': 'binary:logistic', param = {'objective': 'binary:logistic',
'updater': 'grow_gpu', 'tree_method': 'gpu_exact',
'max_depth': 3, 'max_depth': 3,
'eval_metric': 'auc'} 'eval_metric': 'auc'}
res = {} res = {}
@ -75,7 +75,7 @@ class TestGPU(unittest.TestCase):
dtrain2 = xgb.DMatrix(X2, label=y2) dtrain2 = xgb.DMatrix(X2, label=y2)
param = {'objective': 'binary:logistic', param = {'objective': 'binary:logistic',
'updater': 'grow_gpu', 'tree_method': 'gpu_exact',
'max_depth': 2, 'max_depth': 2,
'eval_metric': 'auc'} 'eval_metric': 'auc'}
res = {} res = {}
@ -134,7 +134,7 @@ class TestGPU(unittest.TestCase):
'objective': 'binary:logistic', 'objective': 'binary:logistic',
'eval_metric': 'auc'} 'eval_metric': 'auc'}
ag_param2 = {'max_depth': max_depth, ag_param2 = {'max_depth': max_depth,
'updater': 'grow_gpu_hist', 'tree_method': 'gpu_hist',
'eta': 1, 'eta': 1,
'silent': 1, 'silent': 1,
'n_gpus': 1, 'n_gpus': 1,
@ -142,7 +142,7 @@ class TestGPU(unittest.TestCase):
'max_bin': max_bin, 'max_bin': max_bin,
'eval_metric': 'auc'} 'eval_metric': 'auc'}
ag_param3 = {'max_depth': max_depth, ag_param3 = {'max_depth': max_depth,
'updater': 'grow_gpu_hist', 'tree_method': 'gpu_hist',
'eta': 1, 'eta': 1,
'silent': 1, 'silent': 1,
'n_gpus': n_gpus, 'n_gpus': n_gpus,
@ -177,7 +177,7 @@ class TestGPU(unittest.TestCase):
dtest = xgb.DMatrix(X_test, y_test) dtest = xgb.DMatrix(X_test, y_test)
param = {'objective': 'binary:logistic', param = {'objective': 'binary:logistic',
'updater': 'grow_gpu_hist', 'tree_method': 'gpu_hist',
'max_depth': max_depth, 'max_depth': max_depth,
'n_gpus': 1, 'n_gpus': 1,
'max_bin': max_bin, 'max_bin': max_bin,
@ -189,7 +189,7 @@ class TestGPU(unittest.TestCase):
assert self.non_decreasing(res['train']['auc']) assert self.non_decreasing(res['train']['auc'])
#assert self.non_decreasing(res['test']['auc']) #assert self.non_decreasing(res['test']['auc'])
param2 = {'objective': 'binary:logistic', param2 = {'objective': 'binary:logistic',
'updater': 'grow_gpu_hist', 'tree_method': 'gpu_hist',
'max_depth': max_depth, 'max_depth': max_depth,
'n_gpus': n_gpus, 'n_gpus': n_gpus,
'max_bin': max_bin, 'max_bin': max_bin,
@ -211,7 +211,7 @@ class TestGPU(unittest.TestCase):
dtrain2 = xgb.DMatrix(X2, label=y2) dtrain2 = xgb.DMatrix(X2, label=y2)
param = {'objective': 'binary:logistic', param = {'objective': 'binary:logistic',
'updater': 'grow_gpu_hist', 'tree_method': 'gpu_hist',
'max_depth': max_depth, 'max_depth': max_depth,
'n_gpus': n_gpus, 'n_gpus': n_gpus,
'max_bin': max_bin, 'max_bin': max_bin,
@ -250,7 +250,7 @@ class TestGPU(unittest.TestCase):
###################################################################### ######################################################################
# fail-safe test for max_bin # fail-safe test for max_bin
param = {'objective': 'binary:logistic', param = {'objective': 'binary:logistic',
'updater': 'grow_gpu_hist', 'tree_method': 'gpu_hist',
'max_depth': max_depth, 'max_depth': max_depth,
'n_gpus': n_gpus, 'n_gpus': n_gpus,
'eval_metric': 'auc', 'eval_metric': 'auc',
@ -263,7 +263,7 @@ class TestGPU(unittest.TestCase):
###################################################################### ######################################################################
# subsampling # subsampling
param = {'objective': 'binary:logistic', param = {'objective': 'binary:logistic',
'updater': 'grow_gpu_hist', 'tree_method': 'gpu_hist',
'max_depth': max_depth, 'max_depth': max_depth,
'n_gpus': n_gpus, 'n_gpus': n_gpus,
'eval_metric': 'auc', 'eval_metric': 'auc',
@ -279,7 +279,7 @@ class TestGPU(unittest.TestCase):
###################################################################### ######################################################################
# fail-safe test for max_bin=2 # fail-safe test for max_bin=2
param = {'objective': 'binary:logistic', param = {'objective': 'binary:logistic',
'updater': 'grow_gpu_hist', 'tree_method': 'gpu_hist',
'max_depth': 2, 'max_depth': 2,
'n_gpus': n_gpus, 'n_gpus': n_gpus,
'eval_metric': 'auc', 'eval_metric': 'auc',

View File

@ -4,19 +4,19 @@
* \brief Implementation of learning algorithm. * \brief Implementation of learning algorithm.
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#include <xgboost/logging.h>
#include <xgboost/learner.h>
#include <dmlc/timer.h>
#include <dmlc/io.h> #include <dmlc/io.h>
#include <dmlc/timer.h>
#include <xgboost/learner.h>
#include <xgboost/logging.h>
#include <algorithm> #include <algorithm>
#include <vector>
#include <utility>
#include <string>
#include <sstream>
#include <limits>
#include <iomanip> #include <iomanip>
#include "./common/io.h" #include <limits>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include "./common/common.h" #include "./common/common.h"
#include "./common/io.h"
#include "./common/random.h" #include "./common/random.h"
namespace xgboost { namespace xgboost {
@ -25,17 +25,14 @@ bool Learner::AllowLazyCheckPoint() const {
return gbm_->AllowLazyCheckPoint(); return gbm_->AllowLazyCheckPoint();
} }
std::vector<std::string> std::vector<std::string> Learner::DumpModel(const FeatureMap& fmap,
Learner::DumpModel(const FeatureMap& fmap, bool with_stats,
bool with_stats, std::string format) const {
std::string format) const {
return gbm_->DumpModel(fmap, with_stats, format); return gbm_->DumpModel(fmap, with_stats, format);
} }
/*! \brief training parameter for regression */ /*! \brief training parameter for regression */
struct LearnerModelParam struct LearnerModelParam : public dmlc::Parameter<LearnerModelParam> {
: public dmlc::Parameter<LearnerModelParam> {
/* \brief global bias */ /* \brief global bias */
bst_float base_score; bst_float base_score;
/* \brief number of features */ /* \brief number of features */
@ -55,20 +52,21 @@ struct LearnerModelParam
} }
// declare parameters // declare parameters
DMLC_DECLARE_PARAMETER(LearnerModelParam) { DMLC_DECLARE_PARAMETER(LearnerModelParam) {
DMLC_DECLARE_FIELD(base_score).set_default(0.5f) DMLC_DECLARE_FIELD(base_score)
.set_default(0.5f)
.describe("Global bias of the model."); .describe("Global bias of the model.");
DMLC_DECLARE_FIELD(num_feature).set_default(0) DMLC_DECLARE_FIELD(num_feature)
.describe("Number of features in training data,"\ .set_default(0)
" this parameter will be automatically detected by learner."); .describe(
DMLC_DECLARE_FIELD(num_class).set_default(0).set_lower_bound(0) "Number of features in training data,"
.describe("Number of class option for multi-class classifier. "\ " this parameter will be automatically detected by learner.");
" By default equals 0 and corresponds to binary classifier."); DMLC_DECLARE_FIELD(num_class).set_default(0).set_lower_bound(0).describe(
"Number of class option for multi-class classifier. "
" By default equals 0 and corresponds to binary classifier.");
} }
}; };
struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
struct LearnerTrainParam
: public dmlc::Parameter<LearnerTrainParam> {
// stored random seed // stored random seed
int seed; int seed;
// whether seed the PRNG each iteration // whether seed the PRNG each iteration
@ -90,30 +88,40 @@ struct LearnerTrainParam
int debug_verbose; int debug_verbose;
// declare parameters // declare parameters
DMLC_DECLARE_PARAMETER(LearnerTrainParam) { DMLC_DECLARE_PARAMETER(LearnerTrainParam) {
DMLC_DECLARE_FIELD(seed).set_default(0) DMLC_DECLARE_FIELD(seed).set_default(0).describe(
.describe("Random number seed during training."); "Random number seed during training.");
DMLC_DECLARE_FIELD(seed_per_iteration).set_default(false) DMLC_DECLARE_FIELD(seed_per_iteration)
.describe("Seed PRNG determnisticly via iterator number, "\ .set_default(false)
"this option will be switched on automatically on distributed mode."); .describe(
DMLC_DECLARE_FIELD(dsplit).set_default(0) "Seed PRNG determnisticly via iterator number, "
"this option will be switched on automatically on distributed "
"mode.");
DMLC_DECLARE_FIELD(dsplit)
.set_default(0)
.add_enum("auto", 0) .add_enum("auto", 0)
.add_enum("col", 1) .add_enum("col", 1)
.add_enum("row", 2) .add_enum("row", 2)
.describe("Data split mode for distributed training."); .describe("Data split mode for distributed training.");
DMLC_DECLARE_FIELD(tree_method).set_default(0) DMLC_DECLARE_FIELD(tree_method)
.set_default(0)
.add_enum("auto", 0) .add_enum("auto", 0)
.add_enum("approx", 1) .add_enum("approx", 1)
.add_enum("exact", 2) .add_enum("exact", 2)
.add_enum("hist", 3) .add_enum("hist", 3)
.add_enum("gpu_exact", 4)
.add_enum("gpu_hist", 5)
.describe("Choice of tree construction method."); .describe("Choice of tree construction method.");
DMLC_DECLARE_FIELD(test_flag).set_default("") DMLC_DECLARE_FIELD(test_flag).set_default("").describe(
.describe("Internal test flag"); "Internal test flag");
DMLC_DECLARE_FIELD(prob_buffer_row).set_default(1.0f).set_range(0.0f, 1.0f) DMLC_DECLARE_FIELD(prob_buffer_row)
.set_default(1.0f)
.set_range(0.0f, 1.0f)
.describe("Maximum buffered row portion"); .describe("Maximum buffered row portion");
DMLC_DECLARE_FIELD(max_row_perbatch).set_default(std::numeric_limits<size_t>::max()) DMLC_DECLARE_FIELD(max_row_perbatch)
.set_default(std::numeric_limits<size_t>::max())
.describe("maximum row per batch."); .describe("maximum row per batch.");
DMLC_DECLARE_FIELD(nthread).set_default(0) DMLC_DECLARE_FIELD(nthread).set_default(0).describe(
.describe("Number of threads to use."); "Number of threads to use.");
DMLC_DECLARE_FIELD(debug_verbose) DMLC_DECLARE_FIELD(debug_verbose)
.set_lower_bound(0) .set_lower_bound(0)
.set_default(0) .set_default(0)
@ -125,8 +133,8 @@ DMLC_REGISTER_PARAMETER(LearnerModelParam);
DMLC_REGISTER_PARAMETER(LearnerTrainParam); DMLC_REGISTER_PARAMETER(LearnerTrainParam);
/*! /*!
* \brief learner that performs gradient boosting for a specific objective function. * \brief learner that performs gradient boosting for a specific objective
* It does training and prediction. * function. It does training and prediction.
*/ */
class LearnerImpl : public Learner { class LearnerImpl : public Learner {
public: public:
@ -137,14 +145,41 @@ class LearnerImpl : public Learner {
name_gbm_ = "gbtree"; name_gbm_ = "gbtree";
} }
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override { void ConfigureUpdaters() {
if (tparam.tree_method == 0 || tparam.tree_method == 1 ||
tparam.tree_method == 2) {
if (cfg_.count("updater") == 0) {
if (tparam.dsplit == 1) {
cfg_["updater"] = "distcol";
} else if (tparam.dsplit == 2) {
cfg_["updater"] = "grow_histmaker,prune";
}
if (tparam.prob_buffer_row != 1.0f) {
cfg_["updater"] = "grow_histmaker,refresh,prune";
}
}
} else if (tparam.tree_method == 3) {
/* histogram-based algorithm */
LOG(CONSOLE) << "Tree method is selected to be \'hist\', which uses a "
"single updater "
<< "grow_fast_histmaker.";
cfg_["updater"] = "grow_fast_histmaker";
} else if (tparam.tree_method == 4) {
cfg_["updater"] = "grow_gpu,prune";
} else if (tparam.tree_method == 5) {
cfg_["updater"] = "grow_gpu_hist";
}
}
void Configure(
const std::vector<std::pair<std::string, std::string> >& args) override {
// add to configurations // add to configurations
tparam.InitAllowUnknown(args); tparam.InitAllowUnknown(args);
cfg_.clear(); cfg_.clear();
for (const auto& kv : args) { for (const auto& kv : args) {
if (kv.first == "eval_metric") { if (kv.first == "eval_metric") {
// check duplication // check duplication
auto dup_check = [&kv](const std::unique_ptr<Metric>&m) { auto dup_check = [&kv](const std::unique_ptr<Metric>& m) {
return m->Name() != kv.second; return m->Name() != kv.second;
}; };
if (std::all_of(metrics_.begin(), metrics_.end(), dup_check)) { if (std::all_of(metrics_.begin(), metrics_.end(), dup_check)) {
@ -172,27 +207,13 @@ class LearnerImpl : public Learner {
} }
} }
if (cfg_.count("max_delta_step") == 0 && if (cfg_.count("max_delta_step") == 0 && cfg_.count("objective") != 0 &&
cfg_.count("objective") != 0 &&
cfg_["objective"] == "count:poisson") { cfg_["objective"] == "count:poisson") {
cfg_["max_delta_step"] = "0.7"; cfg_["max_delta_step"] = "0.7";
} }
if (tparam.tree_method == 3) { ConfigureUpdaters();
/* histogram-based algorithm */
LOG(CONSOLE) << "Tree method is selected to be \'hist\', which uses a single updater "
<< "grow_fast_histmaker.";
cfg_["updater"] = "grow_fast_histmaker";
} else if (cfg_.count("updater") == 0) {
if (tparam.dsplit == 1) {
cfg_["updater"] = "distcol";
} else if (tparam.dsplit == 2) {
cfg_["updater"] = "grow_histmaker,prune";
}
if (tparam.prob_buffer_row != 1.0f) {
cfg_["updater"] = "grow_histmaker,refresh,prune";
}
}
if (cfg_.count("objective") == 0) { if (cfg_.count("objective") == 0) {
cfg_["objective"] = "reg:linear"; cfg_["objective"] = "reg:linear";
} }
@ -220,9 +241,7 @@ class LearnerImpl : public Learner {
} }
} }
void InitModel() override { void InitModel() override { this->LazyInitModel(); }
this->LazyInitModel();
}
void Load(dmlc::Stream* fi) override { void Load(dmlc::Stream* fi) override {
// TODO(tqchen) mark deprecation of old format. // TODO(tqchen) mark deprecation of old format.
@ -256,11 +275,10 @@ class LearnerImpl : public Learner {
if (len != 0) { if (len != 0) {
name_obj_.resize(len); name_obj_.resize(len);
CHECK_EQ(fi->Read(&name_obj_[0], len), len) CHECK_EQ(fi->Read(&name_obj_[0], len), len)
<<"BoostLearner: wrong model format"; << "BoostLearner: wrong model format";
} }
} }
CHECK(fi->Read(&name_gbm_)) CHECK(fi->Read(&name_gbm_)) << "BoostLearner: wrong model format";
<< "BoostLearner: wrong model format";
// duplicated code with LazyInitModel // duplicated code with LazyInitModel
obj_.reset(ObjFunction::Create(name_obj_)); obj_.reset(ObjFunction::Create(name_obj_));
gbm_.reset(GradientBooster::Create(name_gbm_, cache_, mparam.base_score)); gbm_.reset(GradientBooster::Create(name_gbm_, cache_, mparam.base_score));
@ -268,13 +286,13 @@ class LearnerImpl : public Learner {
if (mparam.contain_extra_attrs != 0) { if (mparam.contain_extra_attrs != 0) {
std::vector<std::pair<std::string, std::string> > attr; std::vector<std::pair<std::string, std::string> > attr;
fi->Read(&attr); fi->Read(&attr);
attributes_ = std::map<std::string, std::string>( attributes_ =
attr.begin(), attr.end()); std::map<std::string, std::string>(attr.begin(), attr.end());
} }
if (name_obj_ == "count:poisson") { if (name_obj_ == "count:poisson") {
std::string max_delta_step; std::string max_delta_step;
fi->Read(&max_delta_step); fi->Read(&max_delta_step);
cfg_["max_delta_step"] = max_delta_step; cfg_["max_delta_step"] = max_delta_step;
} }
if (mparam.contain_eval_metrics != 0) { if (mparam.contain_eval_metrics != 0) {
std::vector<std::string> metr; std::vector<std::string> metr;
@ -289,7 +307,7 @@ class LearnerImpl : public Learner {
} }
// rabit save model to rabit checkpoint // rabit save model to rabit checkpoint
void Save(dmlc::Stream *fo) const override { void Save(dmlc::Stream* fo) const override {
fo->Write(&mparam, sizeof(LearnerModelParam)); fo->Write(&mparam, sizeof(LearnerModelParam));
fo->Write(name_obj_); fo->Write(name_obj_);
fo->Write(name_gbm_); fo->Write(name_gbm_);
@ -300,9 +318,9 @@ class LearnerImpl : public Learner {
fo->Write(attr); fo->Write(attr);
} }
if (name_obj_ == "count:poisson") { if (name_obj_ == "count:poisson") {
std::map<std::string, std::string>::const_iterator it = cfg_.find("max_delta_step"); std::map<std::string, std::string>::const_iterator it =
if (it != cfg_.end()) cfg_.find("max_delta_step");
fo->Write(it->second); if (it != cfg_.end()) fo->Write(it->second);
} }
if (mparam.contain_eval_metrics != 0) { if (mparam.contain_eval_metrics != 0) {
std::vector<std::string> metr; std::vector<std::string> metr;
@ -325,8 +343,7 @@ class LearnerImpl : public Learner {
gbm_->DoBoost(train, &gpair_, obj_.get()); gbm_->DoBoost(train, &gpair_, obj_.get());
} }
void BoostOneIter(int iter, void BoostOneIter(int iter, DMatrix* train,
DMatrix* train,
std::vector<bst_gpair>* in_gpair) override { std::vector<bst_gpair>* in_gpair) override {
if (tparam.seed_per_iteration || rabit::IsDistributed()) { if (tparam.seed_per_iteration || rabit::IsDistributed()) {
common::GlobalRandom().seed(tparam.seed * kRandSeedMagic + iter); common::GlobalRandom().seed(tparam.seed * kRandSeedMagic + iter);
@ -335,13 +352,11 @@ class LearnerImpl : public Learner {
gbm_->DoBoost(train, in_gpair); gbm_->DoBoost(train, in_gpair);
} }
std::string EvalOneIter(int iter, std::string EvalOneIter(int iter, const std::vector<DMatrix*>& data_sets,
const std::vector<DMatrix*>& data_sets,
const std::vector<std::string>& data_names) override { const std::vector<std::string>& data_names) override {
double tstart = dmlc::GetTime(); double tstart = dmlc::GetTime();
std::ostringstream os; std::ostringstream os;
os << '[' << iter << ']' os << '[' << iter << ']' << std::setiosflags(std::ios::fixed);
<< std::setiosflags(std::ios::fixed);
if (metrics_.size() == 0) { if (metrics_.size() == 0) {
metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric())); metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric()));
} }
@ -388,20 +403,19 @@ class LearnerImpl : public Learner {
return out; return out;
} }
std::pair<std::string, bst_float> Evaluate(DMatrix* data, std::string metric) { std::pair<std::string, bst_float> Evaluate(DMatrix* data,
std::string metric) {
if (metric == "auto") metric = obj_->DefaultEvalMetric(); if (metric == "auto") metric = obj_->DefaultEvalMetric();
std::unique_ptr<Metric> ev(Metric::Create(metric.c_str())); std::unique_ptr<Metric> ev(Metric::Create(metric.c_str()));
this->PredictRaw(data, &preds_); this->PredictRaw(data, &preds_);
obj_->EvalTransform(&preds_); obj_->EvalTransform(&preds_);
return std::make_pair(metric, ev->Eval(preds_, data->info(), tparam.dsplit == 2)); return std::make_pair(metric,
ev->Eval(preds_, data->info(), tparam.dsplit == 2));
} }
void Predict(DMatrix* data, void Predict(DMatrix* data, bool output_margin,
bool output_margin, std::vector<bst_float>* out_preds, unsigned ntree_limit,
std::vector<bst_float> *out_preds, bool pred_leaf, bool pred_contribs) const override {
unsigned ntree_limit,
bool pred_leaf,
bool pred_contribs) const override {
if (pred_contribs) { if (pred_contribs) {
gbm_->PredictContribution(data, out_preds, ntree_limit); gbm_->PredictContribution(data, out_preds, ntree_limit);
} else if (pred_leaf) { } else if (pred_leaf) {
@ -418,7 +432,12 @@ class LearnerImpl : public Learner {
// check if p_train is ready to used by training. // check if p_train is ready to used by training.
// if not, initialize the column access. // if not, initialize the column access.
inline void LazyInitDMatrix(DMatrix* p_train) { inline void LazyInitDMatrix(DMatrix* p_train) {
if (tparam.tree_method != 3 && !p_train->HaveColAccess()) { if (tparam.tree_method == 3 || tparam.tree_method == 4 ||
tparam.tree_method == 5) {
return;
}
if (!p_train->HaveColAccess()) {
int ncol = static_cast<int>(p_train->info().num_col); int ncol = static_cast<int>(p_train->info().num_col);
std::vector<bool> enabled(ncol, true); std::vector<bool> enabled(ncol, true);
// set max row per batch to limited value // set max row per batch to limited value
@ -426,12 +445,12 @@ class LearnerImpl : public Learner {
size_t max_row_perbatch = tparam.max_row_perbatch; size_t max_row_perbatch = tparam.max_row_perbatch;
const size_t safe_max_row = static_cast<size_t>(32UL << 10UL); const size_t safe_max_row = static_cast<size_t>(32UL << 10UL);
if (tparam.tree_method == 0 && if (tparam.tree_method == 0 && p_train->info().num_row >= (4UL << 20UL)) {
p_train->info().num_row >= (4UL << 20UL)) { LOG(CONSOLE)
LOG(CONSOLE) << "Tree method is automatically selected to be \'approx\'" << "Tree method is automatically selected to be \'approx\'"
<< " for faster speed." << " for faster speed."
<< " to use old behavior(exact greedy algorithm on single machine)," << " to use old behavior(exact greedy algorithm on single machine),"
<< " set tree_method to \'exact\'"; << " set tree_method to \'exact\'";
max_row_perbatch = std::min(max_row_perbatch, safe_max_row); max_row_perbatch = std::min(max_row_perbatch, safe_max_row);
} }
@ -444,15 +463,14 @@ class LearnerImpl : public Learner {
max_row_perbatch = std::min(max_row_perbatch, safe_max_row); max_row_perbatch = std::min(max_row_perbatch, safe_max_row);
} }
// initialize column access // initialize column access
p_train->InitColAccess(enabled, p_train->InitColAccess(enabled, tparam.prob_buffer_row, max_row_perbatch);
tparam.prob_buffer_row,
max_row_perbatch);
} }
if (!p_train->SingleColBlock() && cfg_.count("updater") == 0) { if (!p_train->SingleColBlock() && cfg_.count("updater") == 0) {
if (tparam.tree_method == 2) { if (tparam.tree_method == 2) {
LOG(CONSOLE) << "tree method is set to be 'exact'," LOG(CONSOLE) << "tree method is set to be 'exact',"
<< " but currently we are only able to proceed with approximate algorithm"; << " but currently we are only able to proceed with "
"approximate algorithm";
} }
cfg_["updater"] = "grow_histmaker,prune"; cfg_["updater"] = "grow_histmaker,prune";
if (gbm_.get() != nullptr) { if (gbm_.get() != nullptr) {
@ -462,9 +480,7 @@ class LearnerImpl : public Learner {
} }
// return whether model is already initialized. // return whether model is already initialized.
inline bool ModelInitialized() const { inline bool ModelInitialized() const { return gbm_.get() != nullptr; }
return gbm_.get() != nullptr;
}
// lazily initialize the model if it haven't yet been initialized. // lazily initialize the model if it haven't yet been initialized.
inline void LazyInitModel() { inline void LazyInitModel() {
if (this->ModelInitialized()) return; if (this->ModelInitialized()) return;
@ -497,14 +513,11 @@ class LearnerImpl : public Learner {
* \param ntree_limit limit number of trees used for boosted tree * \param ntree_limit limit number of trees used for boosted tree
* predictor, when it equals 0, this means we are using all the trees * predictor, when it equals 0, this means we are using all the trees
*/ */
inline void PredictRaw(DMatrix* data, inline void PredictRaw(DMatrix* data, std::vector<bst_float>* out_preds,
std::vector<bst_float>* out_preds,
unsigned ntree_limit = 0) const { unsigned ntree_limit = 0) const {
CHECK(gbm_.get() != nullptr) CHECK(gbm_.get() != nullptr)
<< "Predict must happen after Load or InitModel"; << "Predict must happen after Load or InitModel";
gbm_->Predict(data, gbm_->Predict(data, out_preds, ntree_limit);
out_preds,
ntree_limit);
} }
// model parameter // model parameter
LearnerModelParam mparam; LearnerModelParam mparam;
@ -530,7 +543,8 @@ class LearnerImpl : public Learner {
std::vector<std::shared_ptr<DMatrix> > cache_; std::vector<std::shared_ptr<DMatrix> > cache_;
}; };
Learner* Learner::Create(const std::vector<std::shared_ptr<DMatrix> >& cache_data) { Learner* Learner::Create(
const std::vector<std::shared_ptr<DMatrix> >& cache_data) {
return new LearnerImpl(cache_data); return new LearnerImpl(cache_data);
} }
} // namespace xgboost } // namespace xgboost