diff --git a/CMakeLists.txt b/CMakeLists.txt index b02b8f026..e19aedb31 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,9 +3,10 @@ project (xgboost) find_package(OpenMP) option(PLUGIN_UPDATER_GPU "Build GPU accelerated tree construction plugin") -set(GPU_COMPUTE_VER 50;52;60;61 CACHE STRING +set(GPU_COMPUTE_VER 35;50;52;60;61 CACHE STRING "Space separated list of compute versions to be built against") if(PLUGIN_UPDATER_GPU) + cmake_minimum_required (VERSION 3.5) find_package(CUDA REQUIRED) endif() diff --git a/cmake/Cuda.cmake b/cmake/Cuda.cmake deleted file mode 100644 index 30c5139d5..000000000 --- a/cmake/Cuda.cmake +++ /dev/null @@ -1,289 +0,0 @@ - -include(CheckCXXCompilerFlag) -check_cxx_compiler_flag("-std=c++11" SUPPORT_CXX11) - -################################################################################################ -# A function for automatic detection of GPUs installed (if autodetection is enabled) -# Usage: -# mshadow_detect_installed_gpus(out_variable) -function(xgboost_detect_installed_gpus out_variable) -set(CUDA_gpu_detect_output "") - if(NOT CUDA_gpu_detect_output) - set(__cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu) - - file(WRITE ${__cufile} "" - "#include \n" - "int main()\n" - "{\n" - " int count = 0;\n" - " if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n" - " if (count == 0) return -1;\n" - " for (int device = 0; device < count; ++device)\n" - " {\n" - " cudaDeviceProp prop;\n" - " if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n" - " std::printf(\"%d.%d \", prop.major, prop.minor);\n" - " }\n" - " return 0;\n" - "}\n") - if(MSVC) - #find vcvarsall.bat and run it building msvc environment - get_filename_component(MY_COMPILER_DIR ${CMAKE_CXX_COMPILER} DIRECTORY) - find_file(MY_VCVARSALL_BAT vcvarsall.bat "${MY_COMPILER_DIR}/.." "${MY_COMPILER_DIR}/../..") - execute_process(COMMAND ${MY_VCVARSALL_BAT} && ${CUDA_NVCC_EXECUTABLE} -arch sm_30 --run ${__cufile} - WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/" - RESULT_VARIABLE __nvcc_res OUTPUT_VARIABLE __nvcc_out - ERROR_QUIET - OUTPUT_STRIP_TRAILING_WHITESPACE) - else() - if(CUDA_LIBRARY_PATH) - set(CUDA_LINK_LIBRARY_PATH "-L${CUDA_LIBRARY_PATH}") - endif() - execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} -arch sm_30 --run ${__cufile} ${CUDA_LINK_LIBRARY_PATH} - WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/" - RESULT_VARIABLE __nvcc_res OUTPUT_VARIABLE __nvcc_out - ERROR_QUIET - OUTPUT_STRIP_TRAILING_WHITESPACE) - endif() - if(__nvcc_res EQUAL 0) - # nvcc outputs text containing line breaks when building with MSVC. - # The line below prevents CMake from inserting a variable with line - # breaks in the cache - string(REGEX MATCH "([1-9].[0-9])" __nvcc_out "${__nvcc_out}") - string(REPLACE "2.1" "2.1(2.0)" __nvcc_out "${__nvcc_out}") - set(CUDA_gpu_detect_output ${__nvcc_out} CACHE INTERNAL "Returned GPU architetures from xgboost_detect_gpus tool" FORCE) - else() - message(WARNING "Running GPU detection script with nvcc failed: ${__nvcc_out}") - endif() - endif() - - if(NOT CUDA_gpu_detect_output) - message(WARNING "Automatic GPU detection failed. Building for all known architectures (${xgboost_known_gpu_archs}).") - set(${out_variable} ${xgboost_known_gpu_archs} PARENT_SCOPE) - else() - set(${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE) - endif() -endfunction() - - -################################################################################################ -# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME -# Usage: -# xgboost_select_nvcc_arch_flags(out_variable) -function(xgboost_select_nvcc_arch_flags out_variable) - # List of arch names - set(__archs_names "Fermi" "Kepler" "Maxwell" "Pascal" "All" "Manual") - set(__archs_name_default "All") - if(NOT CMAKE_CROSSCOMPILING) - list(APPEND __archs_names "Auto") - set(__archs_name_default "Auto") - endif() - - # set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui) - set(CUDA_ARCH_NAME ${__archs_name_default} CACHE STRING "Select target NVIDIA GPU achitecture.") - set_property( CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${__archs_names} ) - mark_as_advanced(CUDA_ARCH_NAME) - - # verify CUDA_ARCH_NAME value - if(NOT ";${__archs_names};" MATCHES ";${CUDA_ARCH_NAME};") - string(REPLACE ";" ", " __archs_names "${__archs_names}") - message(FATAL_ERROR "Only ${__archs_names} architeture names are supported.") - endif() - - if(${CUDA_ARCH_NAME} STREQUAL "Manual") - set(CUDA_ARCH_BIN ${xgboost_known_gpu_archs} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported") - set(CUDA_ARCH_PTX "50" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for") - mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX) - else() - unset(CUDA_ARCH_BIN CACHE) - unset(CUDA_ARCH_PTX CACHE) - endif() - - if(${CUDA_ARCH_NAME} STREQUAL "Fermi") - set(__cuda_arch_bin "20 21(20)") - elseif(${CUDA_ARCH_NAME} STREQUAL "Kepler") - set(__cuda_arch_bin "30 35") - elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell") - set(__cuda_arch_bin "50") - elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal") - set(__cuda_arch_bin "60 61") - elseif(${CUDA_ARCH_NAME} STREQUAL "All") - set(__cuda_arch_bin ${xgboost_known_gpu_archs}) - elseif(${CUDA_ARCH_NAME} STREQUAL "Auto") - xgboost_detect_installed_gpus(__cuda_arch_bin) - else() # (${CUDA_ARCH_NAME} STREQUAL "Manual") - set(__cuda_arch_bin ${CUDA_ARCH_BIN}) - endif() - - # remove dots and convert to lists - string(REGEX REPLACE "\\." "" __cuda_arch_bin "${__cuda_arch_bin}") - string(REGEX REPLACE "\\." "" __cuda_arch_ptx "${CUDA_ARCH_PTX}") - string(REGEX MATCHALL "[0-9()]+" __cuda_arch_bin "${__cuda_arch_bin}") - string(REGEX MATCHALL "[0-9]+" __cuda_arch_ptx "${__cuda_arch_ptx}") - xgboost_list_unique(__cuda_arch_bin __cuda_arch_ptx) - - set(__nvcc_flags "") - set(__nvcc_archs_readable "") - - # Tell NVCC to add binaries for the specified GPUs - foreach(__arch ${__cuda_arch_bin}) - if(__arch MATCHES "([0-9]+)\\(([0-9]+)\\)") - # User explicitly specified PTX for the concrete BIN - list(APPEND __nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}) - list(APPEND __nvcc_archs_readable sm_${CMAKE_MATCH_1}) - else() - # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN - list(APPEND __nvcc_flags -gencode arch=compute_${__arch},code=sm_${__arch}) - list(APPEND __nvcc_archs_readable sm_${__arch}) - endif() - endforeach() - - # Tell NVCC to add PTX intermediate code for the specified architectures - foreach(__arch ${__cuda_arch_ptx}) - list(APPEND __nvcc_flags -gencode arch=compute_${__arch},code=compute_${__arch}) - list(APPEND __nvcc_archs_readable compute_${__arch}) - endforeach() - - string(REPLACE ";" " " __nvcc_archs_readable "${__nvcc_archs_readable}") - set(${out_variable} ${__nvcc_flags} PARENT_SCOPE) - set(${out_variable}_readable ${__nvcc_archs_readable} PARENT_SCOPE) -endfunction() - -################################################################################################ -# Short command for cuda comnpilation -# Usage: -# xgboost_cuda_compile( ) -macro(xgboost_cuda_compile objlist_variable) - foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG) - set(${var}_backup_in_cuda_compile_ "${${var}}") - - # we remove /EHa as it generates warnings under windows - string(REPLACE "/EHa" "" ${var} "${${var}}") - - endforeach() - if(UNIX OR APPLE) - list(APPEND CUDA_NVCC_FLAGS -Xcompiler -fPIC) - endif() - - if(APPLE) - list(APPEND CUDA_NVCC_FLAGS -Xcompiler -Wno-unused-function) - endif() - - set(CUDA_NVCC_FLAGS_DEBUG "${CUDA_NVCC_FLAGS_DEBUG} -G -lineinfo") - - if(MSVC) - # disable noisy warnings: - # 4819: The file contains a character that cannot be represented in the current code page (number). - list(APPEND CUDA_NVCC_FLAGS -Xcompiler "/wd4819") - foreach(flag_var - CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE - CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) - if(${flag_var} MATCHES "/MD") - string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") - endif(${flag_var} MATCHES "/MD") - endforeach(flag_var) - endif() - - # If the build system is a container, make sure the nvcc intermediate files - # go into the build output area rather than in /tmp, which may run out of space - if(IS_CONTAINER_BUILD) - set(CUDA_NVCC_INTERMEDIATE_DIR "${CMAKE_CURRENT_BINARY_DIR}") - message(STATUS "Container build enabled, so nvcc intermediate files in: ${CUDA_NVCC_INTERMEDIATE_DIR}") - list(APPEND CUDA_NVCC_FLAGS "--keep --keep-dir ${CUDA_NVCC_INTERMEDIATE_DIR}") - endif() - - cuda_compile(cuda_objcs ${ARGN}) - - foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG) - set(${var} "${${var}_backup_in_cuda_compile_}") - unset(${var}_backup_in_cuda_compile_) - endforeach() - - set(${objlist_variable} ${cuda_objcs}) -endmacro() - - -################################################################################################ -### Non macro section -################################################################################################ - -# Try to prime CUDA_TOOLKIT_ROOT_DIR by looking for libcudart.so -if(NOT CUDA_TOOLKIT_ROOT_DIR) - find_library(CUDA_LIBRARY_PATH libcudart.so PATHS ENV LD_LIBRARY_PATH PATH_SUFFIXES lib lib64) - if(CUDA_LIBRARY_PATH) - get_filename_component(CUDA_LIBRARY_PATH ${CUDA_LIBRARY_PATH} DIRECTORY) - set(CUDA_TOOLKIT_ROOT_DIR "${CUDA_LIBRARY_PATH}/..") - endif() -endif() - -find_package(CUDA 5.5 QUIET REQUIRED) -find_cuda_helper_libs(curand) # cmake 2.8.7 compartibility which doesn't search for curand - -if(NOT CUDA_FOUND) - return() -endif() - -set(HAVE_CUDA TRUE) -message(STATUS "CUDA detected: " ${CUDA_VERSION}) -include_directories(SYSTEM ${CUDA_INCLUDE_DIRS}) -list(APPEND xgboost_LINKER_LIBS ${CUDA_CUDART_LIBRARY} - ${CUDA_curand_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}) - -# Known NVIDIA GPU achitectures xgboost can be compiled for. -# This list will be used for CUDA_ARCH_NAME = All option -if(CUDA_ARCH_ALL) - set(xgboost_known_gpu_archs "${CUDA_ARCH_ALL}") -else() - if(${CUDA_VERSION} GREATER 7.5) - set(xgboost_known_gpu_archs "30 35 50 52 60 61") - else() - set(xgboost_known_gpu_archs "30 35 50 52") - endif() -endif() - -# cudnn detection -if(USE_CUDNN) - detect_cuDNN() - if(HAVE_CUDNN) - add_definitions(-DUSE_CUDNN) - include_directories(SYSTEM ${CUDNN_INCLUDE}) - list(APPEND xgboost_LINKER_LIBS ${CUDNN_LIBRARY}) - endif() -endif() - -# setting nvcc arch flags -xgboost_select_nvcc_arch_flags(NVCC_FLAGS_EXTRA) -list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA}) -message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA_readable}") - -# Boost 1.55 workaround, see https://svn.boost.org/trac/boost/ticket/9392 or -# https://github.com/ComputationalRadiationPhysics/picongpu/blob/master/src/picongpu/CMakeLists.txt -if(Boost_VERSION EQUAL 105500) - message(STATUS "Cuda + Boost 1.55: Applying noinline work around") - # avoid warning for CMake >= 2.8.12 - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} \"-DBOOST_NOINLINE=__attribute__((noinline))\" ") -endif() - -# disable some nvcc diagnostic that apears in boost, glog, glags, opencv, etc. -foreach(diag cc_clobber_ignored integer_sign_change useless_using_declaration set_but_not_used) - list(APPEND CUDA_NVCC_FLAGS -Xcudafe --diag_suppress=${diag}) -endforeach() - -# setting default testing device -if(NOT CUDA_TEST_DEVICE) - set(CUDA_TEST_DEVICE -1) -endif() - -mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD) -mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION) - -# Handle clang/libc++ issue -if(APPLE) - xgboost_detect_darwin_version(OSX_VERSION) - - # OSX 10.9 and higher uses clang/libc++ by default which is incompartible with old CUDA toolkits - if(OSX_VERSION VERSION_GREATER 10.8) - # enabled by default if and only if CUDA version is less than 7.0 - xgboost_option(USE_libstdcpp "Use libstdc++ instead of libc++" (CUDA_VERSION VERSION_LESS 7.0)) - endif() -endif() diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake deleted file mode 100644 index 8b737f8b7..000000000 --- a/cmake/Utils.cmake +++ /dev/null @@ -1,398 +0,0 @@ -################################################################################################ -# Command alias for debugging messages -# Usage: -# dmsg() -function(dmsg) - message(STATUS ${ARGN}) -endfunction() - -################################################################################################ -# Removes duplicates from list(s) -# Usage: -# xgboost_list_unique( [] [...]) -macro(xgboost_list_unique) - foreach(__lst ${ARGN}) - if(${__lst}) - list(REMOVE_DUPLICATES ${__lst}) - endif() - endforeach() -endmacro() - -################################################################################################ -# Clears variables from list -# Usage: -# xgboost_clear_vars() -macro(xgboost_clear_vars) - foreach(_var ${ARGN}) - unset(${_var}) - endforeach() -endmacro() - -################################################################################################ -# Removes duplicates from string -# Usage: -# xgboost_string_unique() -function(xgboost_string_unique __string) - if(${__string}) - set(__list ${${__string}}) - separate_arguments(__list) - list(REMOVE_DUPLICATES __list) - foreach(__e ${__list}) - set(__str "${__str} ${__e}") - endforeach() - set(${__string} ${__str} PARENT_SCOPE) - endif() -endfunction() - -################################################################################################ -# Prints list element per line -# Usage: -# xgboost_print_list() -function(xgboost_print_list) - foreach(e ${ARGN}) - message(STATUS ${e}) - endforeach() -endfunction() - -################################################################################################ -# Function merging lists of compiler flags to single string. -# Usage: -# xgboost_merge_flag_lists(out_variable [] [] ...) -function(xgboost_merge_flag_lists out_var) - set(__result "") - foreach(__list ${ARGN}) - foreach(__flag ${${__list}}) - string(STRIP ${__flag} __flag) - set(__result "${__result} ${__flag}") - endforeach() - endforeach() - string(STRIP ${__result} __result) - set(${out_var} ${__result} PARENT_SCOPE) -endfunction() - -################################################################################################ -# Converts all paths in list to absolute -# Usage: -# xgboost_convert_absolute_paths() -function(xgboost_convert_absolute_paths variable) - set(__dlist "") - foreach(__s ${${variable}}) - get_filename_component(__abspath ${__s} ABSOLUTE) - list(APPEND __list ${__abspath}) - endforeach() - set(${variable} ${__list} PARENT_SCOPE) -endfunction() - -################################################################################################ -# Reads set of version defines from the header file -# Usage: -# xgboost_parse_header( ..) -macro(xgboost_parse_header FILENAME FILE_VAR) - set(vars_regex "") - set(__parnet_scope OFF) - set(__add_cache OFF) - foreach(name ${ARGN}) - if("${name}" STREQUAL "PARENT_SCOPE") - set(__parnet_scope ON) - elseif("${name}" STREQUAL "CACHE") - set(__add_cache ON) - elseif(vars_regex) - set(vars_regex "${vars_regex}|${name}") - else() - set(vars_regex "${name}") - endif() - endforeach() - if(EXISTS "${FILENAME}") - file(STRINGS "${FILENAME}" ${FILE_VAR} REGEX "#define[ \t]+(${vars_regex})[ \t]+[0-9]+" ) - else() - unset(${FILE_VAR}) - endif() - foreach(name ${ARGN}) - if(NOT "${name}" STREQUAL "PARENT_SCOPE" AND NOT "${name}" STREQUAL "CACHE") - if(${FILE_VAR}) - if(${FILE_VAR} MATCHES ".+[ \t]${name}[ \t]+([0-9]+).*") - string(REGEX REPLACE ".+[ \t]${name}[ \t]+([0-9]+).*" "\\1" ${name} "${${FILE_VAR}}") - else() - set(${name} "") - endif() - if(__add_cache) - set(${name} ${${name}} CACHE INTERNAL "${name} parsed from ${FILENAME}" FORCE) - elseif(__parnet_scope) - set(${name} "${${name}}" PARENT_SCOPE) - endif() - else() - unset(${name} CACHE) - endif() - endif() - endforeach() -endmacro() - -################################################################################################ -# Reads single version define from the header file and parses it -# Usage: -# xgboost_parse_header_single_define( ) -function(xgboost_parse_header_single_define LIBNAME HDR_PATH VARNAME) - set(${LIBNAME}_H "") - if(EXISTS "${HDR_PATH}") - file(STRINGS "${HDR_PATH}" ${LIBNAME}_H REGEX "^#define[ \t]+${VARNAME}[ \t]+\"[^\"]*\".*$" LIMIT_COUNT 1) - endif() - - if(${LIBNAME}_H) - string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MAJOR "${${LIBNAME}_H}") - string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MINOR "${${LIBNAME}_H}") - string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_PATCH "${${LIBNAME}_H}") - set(${LIBNAME}_VERSION_MAJOR ${${LIBNAME}_VERSION_MAJOR} ${ARGN} PARENT_SCOPE) - set(${LIBNAME}_VERSION_MINOR ${${LIBNAME}_VERSION_MINOR} ${ARGN} PARENT_SCOPE) - set(${LIBNAME}_VERSION_PATCH ${${LIBNAME}_VERSION_PATCH} ${ARGN} PARENT_SCOPE) - set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_MAJOR}.${${LIBNAME}_VERSION_MINOR}.${${LIBNAME}_VERSION_PATCH}" PARENT_SCOPE) - - # append a TWEAK version if it exists: - set(${LIBNAME}_VERSION_TWEAK "") - if("${${LIBNAME}_H}" MATCHES "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.[0-9]+\\.[0-9]+\\.([0-9]+).*$") - set(${LIBNAME}_VERSION_TWEAK "${CMAKE_MATCH_1}" ${ARGN} PARENT_SCOPE) - endif() - if(${LIBNAME}_VERSION_TWEAK) - set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_STRING}.${${LIBNAME}_VERSION_TWEAK}" ${ARGN} PARENT_SCOPE) - else() - set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_STRING}" ${ARGN} PARENT_SCOPE) - endif() - endif() -endfunction() - -######################################################################################################## -# An option that the user can select. Can accept condition to control when option is available for user. -# Usage: -# xgboost_option( "doc string" [IF ]) -function(xgboost_option variable description value) - set(__value ${value}) - set(__condition "") - set(__varname "__value") - foreach(arg ${ARGN}) - if(arg STREQUAL "IF" OR arg STREQUAL "if") - set(__varname "__condition") - else() - list(APPEND ${__varname} ${arg}) - endif() - endforeach() - unset(__varname) - if("${__condition}" STREQUAL "") - set(__condition 2 GREATER 1) - endif() - - if(${__condition}) - if("${__value}" MATCHES ";") - if(${__value}) - option(${variable} "${description}" ON) - else() - option(${variable} "${description}" OFF) - endif() - elseif(DEFINED ${__value}) - if(${__value}) - option(${variable} "${description}" ON) - else() - option(${variable} "${description}" OFF) - endif() - else() - option(${variable} "${description}" ${__value}) - endif() - else() - unset(${variable} CACHE) - endif() -endfunction() - -################################################################################################ -# Utility macro for comparing two lists. Used for CMake debugging purposes -# Usage: -# xgboost_compare_lists( [description]) -function(xgboost_compare_lists list1 list2 desc) - set(__list1 ${${list1}}) - set(__list2 ${${list2}}) - list(SORT __list1) - list(SORT __list2) - list(LENGTH __list1 __len1) - list(LENGTH __list2 __len2) - - if(NOT ${__len1} EQUAL ${__len2}) - message(FATAL_ERROR "Lists are not equal. ${__len1} != ${__len2}. ${desc}") - endif() - - foreach(__i RANGE 1 ${__len1}) - math(EXPR __index "${__i}- 1") - list(GET __list1 ${__index} __item1) - list(GET __list2 ${__index} __item2) - if(NOT ${__item1} STREQUAL ${__item2}) - message(FATAL_ERROR "Lists are not equal. Differ at element ${__index}. ${desc}") - endif() - endforeach() -endfunction() - -################################################################################################ -# Command for disabling warnings for different platforms (see below for gcc and VisualStudio) -# Usage: -# xgboost_warnings_disable( -Wshadow /wd4996 ..,) -macro(xgboost_warnings_disable) - set(_flag_vars "") - set(_msvc_warnings "") - set(_gxx_warnings "") - - foreach(arg ${ARGN}) - if(arg MATCHES "^CMAKE_") - list(APPEND _flag_vars ${arg}) - elseif(arg MATCHES "^/wd") - list(APPEND _msvc_warnings ${arg}) - elseif(arg MATCHES "^-W") - list(APPEND _gxx_warnings ${arg}) - endif() - endforeach() - - if(NOT _flag_vars) - set(_flag_vars CMAKE_C_FLAGS CMAKE_CXX_FLAGS) - endif() - - if(MSVC AND _msvc_warnings) - foreach(var ${_flag_vars}) - foreach(warning ${_msvc_warnings}) - set(${var} "${${var}} ${warning}") - endforeach() - endforeach() - elseif((CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX) AND _gxx_warnings) - foreach(var ${_flag_vars}) - foreach(warning ${_gxx_warnings}) - if(NOT warning MATCHES "^-Wno-") - string(REPLACE "${warning}" "" ${var} "${${var}}") - string(REPLACE "-W" "-Wno-" warning "${warning}") - endif() - set(${var} "${${var}} ${warning}") - endforeach() - endforeach() - endif() - xgboost_clear_vars(_flag_vars _msvc_warnings _gxx_warnings) -endmacro() - -################################################################################################ -# Helper function get current definitions -# Usage: -# xgboost_get_current_definitions() -function(xgboost_get_current_definitions definitions_var) - get_property(current_definitions DIRECTORY PROPERTY COMPILE_DEFINITIONS) - set(result "") - - foreach(d ${current_definitions}) - list(APPEND result -D${d}) - endforeach() - - xgboost_list_unique(result) - set(${definitions_var} ${result} PARENT_SCOPE) -endfunction() - -################################################################################################ -# Helper function get current includes/definitions -# Usage: -# xgboost_get_current_cflags() -function(xgboost_get_current_cflags cflags_var) - get_property(current_includes DIRECTORY PROPERTY INCLUDE_DIRECTORIES) - xgboost_convert_absolute_paths(current_includes) - xgboost_get_current_definitions(cflags) - - foreach(i ${current_includes}) - list(APPEND cflags "-I${i}") - endforeach() - - xgboost_list_unique(cflags) - set(${cflags_var} ${cflags} PARENT_SCOPE) -endfunction() - -################################################################################################ -# Helper function to parse current linker libs into link directories, libflags and osx frameworks -# Usage: -# xgboost_parse_linker_libs( ) -function(xgboost_parse_linker_libs xgboost_LINKER_LIBS_variable folders_var flags_var frameworks_var) - - set(__unspec "") - set(__debug "") - set(__optimized "") - set(__framework "") - set(__varname "__unspec") - - # split libs into debug, optimized, unspecified and frameworks - foreach(list_elem ${${xgboost_LINKER_LIBS_variable}}) - if(list_elem STREQUAL "debug") - set(__varname "__debug") - elseif(list_elem STREQUAL "optimized") - set(__varname "__optimized") - elseif(list_elem MATCHES "^-framework[ \t]+([^ \t].*)") - list(APPEND __framework -framework ${CMAKE_MATCH_1}) - else() - list(APPEND ${__varname} ${list_elem}) - set(__varname "__unspec") - endif() - endforeach() - - # attach debug or optimized libs to unspecified according to current configuration - if(CMAKE_BUILD_TYPE MATCHES "Debug") - set(__libs ${__unspec} ${__debug}) - else() - set(__libs ${__unspec} ${__optimized}) - endif() - - set(libflags "") - set(folders "") - - # convert linker libraries list to link flags - foreach(lib ${__libs}) - if(TARGET ${lib}) - list(APPEND folders $) - list(APPEND libflags -l${lib}) - elseif(lib MATCHES "^-l.*") - list(APPEND libflags ${lib}) - elseif(IS_ABSOLUTE ${lib}) - get_filename_component(name_we ${lib} NAME_WE) - get_filename_component(folder ${lib} PATH) - - string(REGEX MATCH "^lib(.*)" __match ${name_we}) - list(APPEND libflags -l${CMAKE_MATCH_1}) - list(APPEND folders ${folder}) - else() - message(FATAL_ERROR "Logic error. Need to update cmake script") - endif() - endforeach() - - xgboost_list_unique(libflags folders) - - set(${folders_var} ${folders} PARENT_SCOPE) - set(${flags_var} ${libflags} PARENT_SCOPE) - set(${frameworks_var} ${__framework} PARENT_SCOPE) -endfunction() - -################################################################################################ -# Helper function to detect Darwin version, i.e. 10.8, 10.9, 10.10, .... -# Usage: -# xgboost_detect_darwin_version() -function(xgboost_detect_darwin_version output_var) - if(APPLE) - execute_process(COMMAND /usr/bin/sw_vers -productVersion - RESULT_VARIABLE __sw_vers OUTPUT_VARIABLE __sw_vers_out - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - - set(${output_var} ${__sw_vers_out} PARENT_SCOPE) - else() - set(${output_var} "" PARENT_SCOPE) - endif() -endfunction() - -################################################################################################ -# Convenient command to setup source group for IDEs that support this feature (VS, XCode) -# Usage: -# caffe_source_group( GLOB[_RECURSE] ) -function(xgboost_source_group group) - cmake_parse_arguments(CAFFE_SOURCE_GROUP "" "" "GLOB;GLOB_RECURSE" ${ARGN}) - if(CAFFE_SOURCE_GROUP_GLOB) - file(GLOB srcs1 ${CAFFE_SOURCE_GROUP_GLOB}) - source_group(${group} FILES ${srcs1}) - endif() - - if(CAFFE_SOURCE_GROUP_GLOB_RECURSE) - file(GLOB_RECURSE srcs2 ${CAFFE_SOURCE_GROUP_GLOB_RECURSE}) - source_group(${group} FILES ${srcs2}) - endif() -endfunction() diff --git a/demo/gpu_acceleration/bosch.py b/demo/gpu_acceleration/bosch.py index 2294b24e7..894e26835 100644 --- a/demo/gpu_acceleration/bosch.py +++ b/demo/gpu_acceleration/bosch.py @@ -24,8 +24,7 @@ param['eval_metric'] = 'auc' param['max_depth'] = 5 param['eta'] = 0.3 param['silent'] = 0 -param['updater'] = 'grow_gpu' -#param['updater'] = 'grow_colmaker' +param['tree_method'] = 'gpu_exact' num_round = 20 diff --git a/plugin/updater_gpu/README.md b/plugin/updater_gpu/README.md index 142b61459..1cf02f29c 100644 --- a/plugin/updater_gpu/README.md +++ b/plugin/updater_gpu/README.md @@ -1,16 +1,16 @@ # CUDA Accelerated Tree Construction Algorithms This plugin adds GPU accelerated tree construction algorithms to XGBoost. ## Usage -Specify the 'updater' parameter as one of the following algorithms. +Specify the 'tree_method' parameter as one of the following algorithms. ### Algorithms -| updater | Description | +| tree_method | Description | | --- | --- | -grow_gpu | The standard XGBoost tree construction algorithm. Performs exact search for splits. Slower and uses considerably more memory than 'grow_gpu_hist' | -grow_gpu_hist | Equivalent to the XGBoost fast histogram algorithm. Faster and uses considerably less memory. Splits may be less accurate. | +gpu_exact | The standard XGBoost tree construction algorithm. Performs exact search for splits. Slower and uses considerably more memory than 'gpu_hist' | +gpu_hist | Equivalent to the XGBoost fast histogram algorithm. Faster and uses considerably less memory. Splits may be less accurate. | ### Supported parameters -| parameter | grow_gpu | grow_gpu_hist | +| parameter | gpu_exact | gpu_hist | | --- | --- | --- | subsample | ✔ | ✔ | colsample_bytree | ✔ | ✔| @@ -29,7 +29,7 @@ Python example: ```python param['gpu_id'] = 1 param['max_bin'] = 16 -param['updater'] = 'grow_gpu_hist' +param['tree_method'] = 'gpu_hist' ``` ## Benchmarks To run benchmarks on synthetic data for binary classification: @@ -39,18 +39,18 @@ $ python benchmark/benchmark.py Training time time on 1000000 rows x 50 columns with 500 boosting iterations on i7-6700K CPU @ 4.00GHz and Pascal Titan X. -| Updater | Time (s) | +| tree_method | Time (s) | | --- | --- | -| grow_gpu_hist | 11.09 | -| grow_fast_histmaker (histogram XGBoost - CPU) | 41.75 | -| grow_gpu | 193.90 | -| grow_colmaker (standard XGBoost - CPU) | 720.12 | +| gpu_hist | 11.09 | +| hist (histogram XGBoost - CPU) | 41.75 | +| gpu_exact | 193.90 | +| exact (standard XGBoost - CPU) | 720.12 | -[See here](http://dmlc.ml/2016/12/14/GPU-accelerated-xgboost.html) for additional performance benchmarks of the 'grow_gpu' updater. +[See here](http://dmlc.ml/2016/12/14/GPU-accelerated-xgboost.html) for additional performance benchmarks of the 'gpu_exact' tree_method. ## Test -To run tests: +To run tests:Will ```bash $ python -m nose test/python/ ``` @@ -122,6 +122,13 @@ $ make PLUGIN_UPDATER_GPU=ON GTEST_PATH=${CACHE_PREFIX} test ``` ## Changelog +##### 2017/6/26 + +* Change API to use tree_method parameter +* Increase required cmake version to 3.5 +* Add compute arch 3.5 to default archs +* Set default n_gpus to 1 + ##### 2017/6/5 * Multi-GPU support for histogram method using NVIDIA NCCL. diff --git a/plugin/updater_gpu/benchmark/benchmark.py b/plugin/updater_gpu/benchmark/benchmark.py index 525200e0f..e34dbe454 100644 --- a/plugin/updater_gpu/benchmark/benchmark.py +++ b/plugin/updater_gpu/benchmark/benchmark.py @@ -14,19 +14,18 @@ def run_benchmark(args, gpu_algorithm, cpu_algorithm): dtrain = xgb.DMatrix(X, y) param = {'objective': 'binary:logistic', - 'tree_method': 'exact', 'max_depth': 6, 'silent': 1, 'eval_metric': 'auc'} - param['updater'] = gpu_algorithm - print("Training with '%s'" % param['updater']) + param['tree_method'] = gpu_algorithm + print("Training with '%s'" % param['tree_method']) tmp = time.time() xgb.train(param, dtrain, args.iterations) print ("Time: %s seconds" % (str(time.time() - tmp))) - param['updater'] = cpu_algorithm - print("Training with '%s'" % param['updater']) + param['tree_method'] = cpu_algorithm + print("Training with '%s'" % param['tree_method']) tmp = time.time() xgb.train(param, dtrain, args.iterations) print ("Time: %s seconds" % (str(time.time() - tmp))) @@ -34,17 +33,17 @@ def run_benchmark(args, gpu_algorithm, cpu_algorithm): parser = argparse.ArgumentParser() -parser.add_argument('--algorithm', choices=['all', 'grow_gpu', 'grow_gpu_hist'], required=True) +parser.add_argument('--algorithm', choices=['all', 'gpu_exact', 'gpu_hist'], default='all') parser.add_argument('--rows',type=int,default=1000000) parser.add_argument('--columns',type=int,default=50) parser.add_argument('--iterations',type=int,default=500) args = parser.parse_args() -if 'grow_gpu_hist' in args.algorithm: - run_benchmark(args, args.algorithm, 'grow_fast_histmaker') -if 'grow_gpu' in args.algorithm: - run_benchmark(args, args.algorithm, 'grow_colmaker') +if 'gpu_hist' in args.algorithm: + run_benchmark(args, args.algorithm, 'hist') +if 'gpu_exact' in args.algorithm: + run_benchmark(args, args.algorithm, 'exact') if 'all' in args.algorithm: - run_benchmark(args, 'grow_gpu', 'grow_colmaker') - run_benchmark(args, 'grow_gpu_hist', 'grow_fast_histmaker') + run_benchmark(args, 'gpu_exact', 'exact') + run_benchmark(args, 'gpu_hist', 'hist') diff --git a/plugin/updater_gpu/test/python/test.py b/plugin/updater_gpu/test/python/test.py index 10a6cf6cf..cca9cd739 100644 --- a/plugin/updater_gpu/test/python/test.py +++ b/plugin/updater_gpu/test/python/test.py @@ -35,7 +35,7 @@ class TestGPU(unittest.TestCase): 'objective': 'binary:logistic', 'eval_metric': 'auc'} ag_param2 = {'max_depth': 2, - 'updater': 'grow_gpu', + 'tree_method': 'gpu_exact', 'eta': 1, 'silent': 1, 'objective': 'binary:logistic', @@ -59,7 +59,7 @@ class TestGPU(unittest.TestCase): dtest = xgb.DMatrix(X_test, y_test) param = {'objective': 'binary:logistic', - 'updater': 'grow_gpu', + 'tree_method': 'gpu_exact', 'max_depth': 3, 'eval_metric': 'auc'} res = {} @@ -75,7 +75,7 @@ class TestGPU(unittest.TestCase): dtrain2 = xgb.DMatrix(X2, label=y2) param = {'objective': 'binary:logistic', - 'updater': 'grow_gpu', + 'tree_method': 'gpu_exact', 'max_depth': 2, 'eval_metric': 'auc'} res = {} @@ -134,7 +134,7 @@ class TestGPU(unittest.TestCase): 'objective': 'binary:logistic', 'eval_metric': 'auc'} ag_param2 = {'max_depth': max_depth, - 'updater': 'grow_gpu_hist', + 'tree_method': 'gpu_hist', 'eta': 1, 'silent': 1, 'n_gpus': 1, @@ -142,7 +142,7 @@ class TestGPU(unittest.TestCase): 'max_bin': max_bin, 'eval_metric': 'auc'} ag_param3 = {'max_depth': max_depth, - 'updater': 'grow_gpu_hist', + 'tree_method': 'gpu_hist', 'eta': 1, 'silent': 1, 'n_gpus': n_gpus, @@ -177,7 +177,7 @@ class TestGPU(unittest.TestCase): dtest = xgb.DMatrix(X_test, y_test) param = {'objective': 'binary:logistic', - 'updater': 'grow_gpu_hist', + 'tree_method': 'gpu_hist', 'max_depth': max_depth, 'n_gpus': 1, 'max_bin': max_bin, @@ -189,7 +189,7 @@ class TestGPU(unittest.TestCase): assert self.non_decreasing(res['train']['auc']) #assert self.non_decreasing(res['test']['auc']) param2 = {'objective': 'binary:logistic', - 'updater': 'grow_gpu_hist', + 'tree_method': 'gpu_hist', 'max_depth': max_depth, 'n_gpus': n_gpus, 'max_bin': max_bin, @@ -211,7 +211,7 @@ class TestGPU(unittest.TestCase): dtrain2 = xgb.DMatrix(X2, label=y2) param = {'objective': 'binary:logistic', - 'updater': 'grow_gpu_hist', + 'tree_method': 'gpu_hist', 'max_depth': max_depth, 'n_gpus': n_gpus, 'max_bin': max_bin, @@ -250,7 +250,7 @@ class TestGPU(unittest.TestCase): ###################################################################### # fail-safe test for max_bin param = {'objective': 'binary:logistic', - 'updater': 'grow_gpu_hist', + 'tree_method': 'gpu_hist', 'max_depth': max_depth, 'n_gpus': n_gpus, 'eval_metric': 'auc', @@ -263,7 +263,7 @@ class TestGPU(unittest.TestCase): ###################################################################### # subsampling param = {'objective': 'binary:logistic', - 'updater': 'grow_gpu_hist', + 'tree_method': 'gpu_hist', 'max_depth': max_depth, 'n_gpus': n_gpus, 'eval_metric': 'auc', @@ -279,7 +279,7 @@ class TestGPU(unittest.TestCase): ###################################################################### # fail-safe test for max_bin=2 param = {'objective': 'binary:logistic', - 'updater': 'grow_gpu_hist', + 'tree_method': 'gpu_hist', 'max_depth': 2, 'n_gpus': n_gpus, 'eval_metric': 'auc', diff --git a/src/learner.cc b/src/learner.cc index 2622ff4fb..d26e0d682 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -4,19 +4,19 @@ * \brief Implementation of learning algorithm. * \author Tianqi Chen */ -#include -#include -#include #include +#include +#include +#include #include -#include -#include -#include -#include -#include #include -#include "./common/io.h" +#include +#include +#include +#include +#include #include "./common/common.h" +#include "./common/io.h" #include "./common/random.h" namespace xgboost { @@ -25,17 +25,14 @@ bool Learner::AllowLazyCheckPoint() const { return gbm_->AllowLazyCheckPoint(); } -std::vector -Learner::DumpModel(const FeatureMap& fmap, - bool with_stats, - std::string format) const { +std::vector Learner::DumpModel(const FeatureMap& fmap, + bool with_stats, + std::string format) const { return gbm_->DumpModel(fmap, with_stats, format); } - /*! \brief training parameter for regression */ -struct LearnerModelParam - : public dmlc::Parameter { +struct LearnerModelParam : public dmlc::Parameter { /* \brief global bias */ bst_float base_score; /* \brief number of features */ @@ -55,20 +52,21 @@ struct LearnerModelParam } // declare parameters DMLC_DECLARE_PARAMETER(LearnerModelParam) { - DMLC_DECLARE_FIELD(base_score).set_default(0.5f) + DMLC_DECLARE_FIELD(base_score) + .set_default(0.5f) .describe("Global bias of the model."); - DMLC_DECLARE_FIELD(num_feature).set_default(0) - .describe("Number of features in training data,"\ - " this parameter will be automatically detected by learner."); - DMLC_DECLARE_FIELD(num_class).set_default(0).set_lower_bound(0) - .describe("Number of class option for multi-class classifier. "\ - " By default equals 0 and corresponds to binary classifier."); + DMLC_DECLARE_FIELD(num_feature) + .set_default(0) + .describe( + "Number of features in training data," + " this parameter will be automatically detected by learner."); + DMLC_DECLARE_FIELD(num_class).set_default(0).set_lower_bound(0).describe( + "Number of class option for multi-class classifier. " + " By default equals 0 and corresponds to binary classifier."); } }; - -struct LearnerTrainParam - : public dmlc::Parameter { +struct LearnerTrainParam : public dmlc::Parameter { // stored random seed int seed; // whether seed the PRNG each iteration @@ -90,30 +88,40 @@ struct LearnerTrainParam int debug_verbose; // declare parameters DMLC_DECLARE_PARAMETER(LearnerTrainParam) { - DMLC_DECLARE_FIELD(seed).set_default(0) - .describe("Random number seed during training."); - DMLC_DECLARE_FIELD(seed_per_iteration).set_default(false) - .describe("Seed PRNG determnisticly via iterator number, "\ - "this option will be switched on automatically on distributed mode."); - DMLC_DECLARE_FIELD(dsplit).set_default(0) + DMLC_DECLARE_FIELD(seed).set_default(0).describe( + "Random number seed during training."); + DMLC_DECLARE_FIELD(seed_per_iteration) + .set_default(false) + .describe( + "Seed PRNG determnisticly via iterator number, " + "this option will be switched on automatically on distributed " + "mode."); + DMLC_DECLARE_FIELD(dsplit) + .set_default(0) .add_enum("auto", 0) .add_enum("col", 1) .add_enum("row", 2) .describe("Data split mode for distributed training."); - DMLC_DECLARE_FIELD(tree_method).set_default(0) + DMLC_DECLARE_FIELD(tree_method) + .set_default(0) .add_enum("auto", 0) .add_enum("approx", 1) .add_enum("exact", 2) .add_enum("hist", 3) + .add_enum("gpu_exact", 4) + .add_enum("gpu_hist", 5) .describe("Choice of tree construction method."); - DMLC_DECLARE_FIELD(test_flag).set_default("") - .describe("Internal test flag"); - DMLC_DECLARE_FIELD(prob_buffer_row).set_default(1.0f).set_range(0.0f, 1.0f) + DMLC_DECLARE_FIELD(test_flag).set_default("").describe( + "Internal test flag"); + DMLC_DECLARE_FIELD(prob_buffer_row) + .set_default(1.0f) + .set_range(0.0f, 1.0f) .describe("Maximum buffered row portion"); - DMLC_DECLARE_FIELD(max_row_perbatch).set_default(std::numeric_limits::max()) + DMLC_DECLARE_FIELD(max_row_perbatch) + .set_default(std::numeric_limits::max()) .describe("maximum row per batch."); - DMLC_DECLARE_FIELD(nthread).set_default(0) - .describe("Number of threads to use."); + DMLC_DECLARE_FIELD(nthread).set_default(0).describe( + "Number of threads to use."); DMLC_DECLARE_FIELD(debug_verbose) .set_lower_bound(0) .set_default(0) @@ -125,8 +133,8 @@ DMLC_REGISTER_PARAMETER(LearnerModelParam); DMLC_REGISTER_PARAMETER(LearnerTrainParam); /*! - * \brief learner that performs gradient boosting for a specific objective function. - * It does training and prediction. + * \brief learner that performs gradient boosting for a specific objective + * function. It does training and prediction. */ class LearnerImpl : public Learner { public: @@ -137,14 +145,41 @@ class LearnerImpl : public Learner { name_gbm_ = "gbtree"; } - void Configure(const std::vector >& args) override { + void ConfigureUpdaters() { + if (tparam.tree_method == 0 || tparam.tree_method == 1 || + tparam.tree_method == 2) { + if (cfg_.count("updater") == 0) { + if (tparam.dsplit == 1) { + cfg_["updater"] = "distcol"; + } else if (tparam.dsplit == 2) { + cfg_["updater"] = "grow_histmaker,prune"; + } + if (tparam.prob_buffer_row != 1.0f) { + cfg_["updater"] = "grow_histmaker,refresh,prune"; + } + } + } else if (tparam.tree_method == 3) { + /* histogram-based algorithm */ + LOG(CONSOLE) << "Tree method is selected to be \'hist\', which uses a " + "single updater " + << "grow_fast_histmaker."; + cfg_["updater"] = "grow_fast_histmaker"; + } else if (tparam.tree_method == 4) { + cfg_["updater"] = "grow_gpu,prune"; + } else if (tparam.tree_method == 5) { + cfg_["updater"] = "grow_gpu_hist"; + } + } + + void Configure( + const std::vector >& args) override { // add to configurations tparam.InitAllowUnknown(args); cfg_.clear(); for (const auto& kv : args) { if (kv.first == "eval_metric") { // check duplication - auto dup_check = [&kv](const std::unique_ptr&m) { + auto dup_check = [&kv](const std::unique_ptr& m) { return m->Name() != kv.second; }; if (std::all_of(metrics_.begin(), metrics_.end(), dup_check)) { @@ -172,27 +207,13 @@ class LearnerImpl : public Learner { } } - if (cfg_.count("max_delta_step") == 0 && - cfg_.count("objective") != 0 && + if (cfg_.count("max_delta_step") == 0 && cfg_.count("objective") != 0 && cfg_["objective"] == "count:poisson") { cfg_["max_delta_step"] = "0.7"; } - if (tparam.tree_method == 3) { - /* histogram-based algorithm */ - LOG(CONSOLE) << "Tree method is selected to be \'hist\', which uses a single updater " - << "grow_fast_histmaker."; - cfg_["updater"] = "grow_fast_histmaker"; - } else if (cfg_.count("updater") == 0) { - if (tparam.dsplit == 1) { - cfg_["updater"] = "distcol"; - } else if (tparam.dsplit == 2) { - cfg_["updater"] = "grow_histmaker,prune"; - } - if (tparam.prob_buffer_row != 1.0f) { - cfg_["updater"] = "grow_histmaker,refresh,prune"; - } - } + ConfigureUpdaters(); + if (cfg_.count("objective") == 0) { cfg_["objective"] = "reg:linear"; } @@ -220,9 +241,7 @@ class LearnerImpl : public Learner { } } - void InitModel() override { - this->LazyInitModel(); - } + void InitModel() override { this->LazyInitModel(); } void Load(dmlc::Stream* fi) override { // TODO(tqchen) mark deprecation of old format. @@ -256,11 +275,10 @@ class LearnerImpl : public Learner { if (len != 0) { name_obj_.resize(len); CHECK_EQ(fi->Read(&name_obj_[0], len), len) - <<"BoostLearner: wrong model format"; + << "BoostLearner: wrong model format"; } } - CHECK(fi->Read(&name_gbm_)) - << "BoostLearner: wrong model format"; + CHECK(fi->Read(&name_gbm_)) << "BoostLearner: wrong model format"; // duplicated code with LazyInitModel obj_.reset(ObjFunction::Create(name_obj_)); gbm_.reset(GradientBooster::Create(name_gbm_, cache_, mparam.base_score)); @@ -268,13 +286,13 @@ class LearnerImpl : public Learner { if (mparam.contain_extra_attrs != 0) { std::vector > attr; fi->Read(&attr); - attributes_ = std::map( - attr.begin(), attr.end()); + attributes_ = + std::map(attr.begin(), attr.end()); } if (name_obj_ == "count:poisson") { - std::string max_delta_step; - fi->Read(&max_delta_step); - cfg_["max_delta_step"] = max_delta_step; + std::string max_delta_step; + fi->Read(&max_delta_step); + cfg_["max_delta_step"] = max_delta_step; } if (mparam.contain_eval_metrics != 0) { std::vector metr; @@ -289,7 +307,7 @@ class LearnerImpl : public Learner { } // rabit save model to rabit checkpoint - void Save(dmlc::Stream *fo) const override { + void Save(dmlc::Stream* fo) const override { fo->Write(&mparam, sizeof(LearnerModelParam)); fo->Write(name_obj_); fo->Write(name_gbm_); @@ -300,9 +318,9 @@ class LearnerImpl : public Learner { fo->Write(attr); } if (name_obj_ == "count:poisson") { - std::map::const_iterator it = cfg_.find("max_delta_step"); - if (it != cfg_.end()) - fo->Write(it->second); + std::map::const_iterator it = + cfg_.find("max_delta_step"); + if (it != cfg_.end()) fo->Write(it->second); } if (mparam.contain_eval_metrics != 0) { std::vector metr; @@ -325,8 +343,7 @@ class LearnerImpl : public Learner { gbm_->DoBoost(train, &gpair_, obj_.get()); } - void BoostOneIter(int iter, - DMatrix* train, + void BoostOneIter(int iter, DMatrix* train, std::vector* in_gpair) override { if (tparam.seed_per_iteration || rabit::IsDistributed()) { common::GlobalRandom().seed(tparam.seed * kRandSeedMagic + iter); @@ -335,13 +352,11 @@ class LearnerImpl : public Learner { gbm_->DoBoost(train, in_gpair); } - std::string EvalOneIter(int iter, - const std::vector& data_sets, + std::string EvalOneIter(int iter, const std::vector& data_sets, const std::vector& data_names) override { double tstart = dmlc::GetTime(); std::ostringstream os; - os << '[' << iter << ']' - << std::setiosflags(std::ios::fixed); + os << '[' << iter << ']' << std::setiosflags(std::ios::fixed); if (metrics_.size() == 0) { metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric())); } @@ -388,20 +403,19 @@ class LearnerImpl : public Learner { return out; } - std::pair Evaluate(DMatrix* data, std::string metric) { + std::pair Evaluate(DMatrix* data, + std::string metric) { if (metric == "auto") metric = obj_->DefaultEvalMetric(); std::unique_ptr ev(Metric::Create(metric.c_str())); this->PredictRaw(data, &preds_); obj_->EvalTransform(&preds_); - return std::make_pair(metric, ev->Eval(preds_, data->info(), tparam.dsplit == 2)); + return std::make_pair(metric, + ev->Eval(preds_, data->info(), tparam.dsplit == 2)); } - void Predict(DMatrix* data, - bool output_margin, - std::vector *out_preds, - unsigned ntree_limit, - bool pred_leaf, - bool pred_contribs) const override { + void Predict(DMatrix* data, bool output_margin, + std::vector* out_preds, unsigned ntree_limit, + bool pred_leaf, bool pred_contribs) const override { if (pred_contribs) { gbm_->PredictContribution(data, out_preds, ntree_limit); } else if (pred_leaf) { @@ -418,7 +432,12 @@ class LearnerImpl : public Learner { // check if p_train is ready to used by training. // if not, initialize the column access. inline void LazyInitDMatrix(DMatrix* p_train) { - if (tparam.tree_method != 3 && !p_train->HaveColAccess()) { + if (tparam.tree_method == 3 || tparam.tree_method == 4 || + tparam.tree_method == 5) { + return; + } + + if (!p_train->HaveColAccess()) { int ncol = static_cast(p_train->info().num_col); std::vector enabled(ncol, true); // set max row per batch to limited value @@ -426,12 +445,12 @@ class LearnerImpl : public Learner { size_t max_row_perbatch = tparam.max_row_perbatch; const size_t safe_max_row = static_cast(32UL << 10UL); - if (tparam.tree_method == 0 && - p_train->info().num_row >= (4UL << 20UL)) { - LOG(CONSOLE) << "Tree method is automatically selected to be \'approx\'" - << " for faster speed." - << " to use old behavior(exact greedy algorithm on single machine)," - << " set tree_method to \'exact\'"; + if (tparam.tree_method == 0 && p_train->info().num_row >= (4UL << 20UL)) { + LOG(CONSOLE) + << "Tree method is automatically selected to be \'approx\'" + << " for faster speed." + << " to use old behavior(exact greedy algorithm on single machine)," + << " set tree_method to \'exact\'"; max_row_perbatch = std::min(max_row_perbatch, safe_max_row); } @@ -444,15 +463,14 @@ class LearnerImpl : public Learner { max_row_perbatch = std::min(max_row_perbatch, safe_max_row); } // initialize column access - p_train->InitColAccess(enabled, - tparam.prob_buffer_row, - max_row_perbatch); + p_train->InitColAccess(enabled, tparam.prob_buffer_row, max_row_perbatch); } if (!p_train->SingleColBlock() && cfg_.count("updater") == 0) { if (tparam.tree_method == 2) { LOG(CONSOLE) << "tree method is set to be 'exact'," - << " but currently we are only able to proceed with approximate algorithm"; + << " but currently we are only able to proceed with " + "approximate algorithm"; } cfg_["updater"] = "grow_histmaker,prune"; if (gbm_.get() != nullptr) { @@ -462,9 +480,7 @@ class LearnerImpl : public Learner { } // return whether model is already initialized. - inline bool ModelInitialized() const { - return gbm_.get() != nullptr; - } + inline bool ModelInitialized() const { return gbm_.get() != nullptr; } // lazily initialize the model if it haven't yet been initialized. inline void LazyInitModel() { if (this->ModelInitialized()) return; @@ -497,14 +513,11 @@ class LearnerImpl : public Learner { * \param ntree_limit limit number of trees used for boosted tree * predictor, when it equals 0, this means we are using all the trees */ - inline void PredictRaw(DMatrix* data, - std::vector* out_preds, + inline void PredictRaw(DMatrix* data, std::vector* out_preds, unsigned ntree_limit = 0) const { CHECK(gbm_.get() != nullptr) << "Predict must happen after Load or InitModel"; - gbm_->Predict(data, - out_preds, - ntree_limit); + gbm_->Predict(data, out_preds, ntree_limit); } // model parameter LearnerModelParam mparam; @@ -530,7 +543,8 @@ class LearnerImpl : public Learner { std::vector > cache_; }; -Learner* Learner::Create(const std::vector >& cache_data) { +Learner* Learner::Create( + const std::vector >& cache_data) { return new LearnerImpl(cache_data); } } // namespace xgboost