diff --git a/CMakeLists.txt b/CMakeLists.txt
index b02b8f026..e19aedb31 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,9 +3,10 @@ project (xgboost)
 find_package(OpenMP)
 
 option(PLUGIN_UPDATER_GPU "Build GPU accelerated tree construction plugin")
-set(GPU_COMPUTE_VER 50;52;60;61 CACHE STRING
+set(GPU_COMPUTE_VER 35;50;52;60;61 CACHE STRING
     "Space separated list of compute versions to be built against")
 if(PLUGIN_UPDATER_GPU)
+  cmake_minimum_required (VERSION 3.5)
   find_package(CUDA REQUIRED)
 endif()
 
diff --git a/cmake/Cuda.cmake b/cmake/Cuda.cmake
deleted file mode 100644
index 30c5139d5..000000000
--- a/cmake/Cuda.cmake
+++ /dev/null
@@ -1,289 +0,0 @@
-
-include(CheckCXXCompilerFlag)
-check_cxx_compiler_flag("-std=c++11"   SUPPORT_CXX11)
-
-################################################################################################
-# A function for automatic detection of GPUs installed  (if autodetection is enabled)
-# Usage:
-#   mshadow_detect_installed_gpus(out_variable)
-function(xgboost_detect_installed_gpus out_variable)
-set(CUDA_gpu_detect_output "")
-  if(NOT CUDA_gpu_detect_output)
-    set(__cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu)
-
-    file(WRITE ${__cufile} ""
-      "#include <cstdio>\n"
-      "int main()\n"
-      "{\n"
-      "  int count = 0;\n"
-      "  if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n"
-      "  if (count == 0) return -1;\n"
-      "  for (int device = 0; device < count; ++device)\n"
-      "  {\n"
-      "    cudaDeviceProp prop;\n"
-      "    if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n"
-      "      std::printf(\"%d.%d \", prop.major, prop.minor);\n"
-      "  }\n"
-      "  return 0;\n"
-      "}\n")
-    if(MSVC)
-      #find vcvarsall.bat and run it building msvc environment
-      get_filename_component(MY_COMPILER_DIR ${CMAKE_CXX_COMPILER} DIRECTORY)
-      find_file(MY_VCVARSALL_BAT vcvarsall.bat "${MY_COMPILER_DIR}/.." "${MY_COMPILER_DIR}/../..")
-      execute_process(COMMAND ${MY_VCVARSALL_BAT} && ${CUDA_NVCC_EXECUTABLE} -arch sm_30 --run  ${__cufile}
-                      WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
-                      RESULT_VARIABLE __nvcc_res OUTPUT_VARIABLE __nvcc_out
-                      ERROR_QUIET
-                      OUTPUT_STRIP_TRAILING_WHITESPACE)
-    else()
-      if(CUDA_LIBRARY_PATH)
-        set(CUDA_LINK_LIBRARY_PATH "-L${CUDA_LIBRARY_PATH}")
-      endif()
-      execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} -arch sm_30 --run ${__cufile} ${CUDA_LINK_LIBRARY_PATH}
-                      WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
-                      RESULT_VARIABLE __nvcc_res OUTPUT_VARIABLE __nvcc_out
-                      ERROR_QUIET
-                      OUTPUT_STRIP_TRAILING_WHITESPACE)
-    endif()
-    if(__nvcc_res EQUAL 0)
-      # nvcc outputs text containing line breaks when building with MSVC.
-      # The line below prevents CMake from inserting a variable with line
-      # breaks in the cache
-      string(REGEX MATCH "([1-9].[0-9])" __nvcc_out "${__nvcc_out}")
-      string(REPLACE "2.1" "2.1(2.0)" __nvcc_out "${__nvcc_out}")
-      set(CUDA_gpu_detect_output ${__nvcc_out} CACHE INTERNAL "Returned GPU architetures from xgboost_detect_gpus tool" FORCE)
-    else()
-      message(WARNING "Running GPU detection script with nvcc failed: ${__nvcc_out}")
-    endif()
-  endif()
-
-  if(NOT CUDA_gpu_detect_output)
-    message(WARNING "Automatic GPU detection failed. Building for all known architectures (${xgboost_known_gpu_archs}).")
-    set(${out_variable} ${xgboost_known_gpu_archs} PARENT_SCOPE)
-  else()
-    set(${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE)
-  endif()
-endfunction()
-
-
-################################################################################################
-# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME
-# Usage:
-#   xgboost_select_nvcc_arch_flags(out_variable)
-function(xgboost_select_nvcc_arch_flags out_variable)
-  # List of arch names
-  set(__archs_names "Fermi" "Kepler" "Maxwell" "Pascal" "All" "Manual")
-  set(__archs_name_default "All")
-  if(NOT CMAKE_CROSSCOMPILING)
-    list(APPEND __archs_names "Auto")
-    set(__archs_name_default "Auto")
-  endif()
-
-  # set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui)
-  set(CUDA_ARCH_NAME ${__archs_name_default} CACHE STRING "Select target NVIDIA GPU achitecture.")
-  set_property( CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${__archs_names} )
-  mark_as_advanced(CUDA_ARCH_NAME)
-
-  # verify CUDA_ARCH_NAME value
-  if(NOT ";${__archs_names};" MATCHES ";${CUDA_ARCH_NAME};")
-    string(REPLACE ";" ", " __archs_names "${__archs_names}")
-    message(FATAL_ERROR "Only ${__archs_names} architeture names are supported.")
-  endif()
-
-  if(${CUDA_ARCH_NAME} STREQUAL "Manual")
-    set(CUDA_ARCH_BIN ${xgboost_known_gpu_archs} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")
-    set(CUDA_ARCH_PTX "50"                     CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")
-    mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX)
-  else()
-    unset(CUDA_ARCH_BIN CACHE)
-    unset(CUDA_ARCH_PTX CACHE)
-  endif()
-
-  if(${CUDA_ARCH_NAME} STREQUAL "Fermi")
-    set(__cuda_arch_bin "20 21(20)")
-  elseif(${CUDA_ARCH_NAME} STREQUAL "Kepler")
-    set(__cuda_arch_bin "30 35")
-  elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
-    set(__cuda_arch_bin "50")
-  elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal")
-    set(__cuda_arch_bin "60 61")
-  elseif(${CUDA_ARCH_NAME} STREQUAL "All")
-    set(__cuda_arch_bin ${xgboost_known_gpu_archs})
-  elseif(${CUDA_ARCH_NAME} STREQUAL "Auto")
-    xgboost_detect_installed_gpus(__cuda_arch_bin)
-  else()  # (${CUDA_ARCH_NAME} STREQUAL "Manual")
-    set(__cuda_arch_bin ${CUDA_ARCH_BIN})
-  endif()
-
-  # remove dots and convert to lists
-  string(REGEX REPLACE "\\." "" __cuda_arch_bin "${__cuda_arch_bin}")
-  string(REGEX REPLACE "\\." "" __cuda_arch_ptx "${CUDA_ARCH_PTX}")
-  string(REGEX MATCHALL "[0-9()]+" __cuda_arch_bin "${__cuda_arch_bin}")
-  string(REGEX MATCHALL "[0-9]+"   __cuda_arch_ptx "${__cuda_arch_ptx}")
-  xgboost_list_unique(__cuda_arch_bin __cuda_arch_ptx)
-
-  set(__nvcc_flags "")
-  set(__nvcc_archs_readable "")
-
-  # Tell NVCC to add binaries for the specified GPUs
-  foreach(__arch ${__cuda_arch_bin})
-    if(__arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
-      # User explicitly specified PTX for the concrete BIN
-      list(APPEND __nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
-      list(APPEND __nvcc_archs_readable sm_${CMAKE_MATCH_1})
-    else()
-      # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
-      list(APPEND __nvcc_flags -gencode arch=compute_${__arch},code=sm_${__arch})
-      list(APPEND __nvcc_archs_readable sm_${__arch})
-    endif()
-  endforeach()
-
-  # Tell NVCC to add PTX intermediate code for the specified architectures
-  foreach(__arch ${__cuda_arch_ptx})
-    list(APPEND __nvcc_flags -gencode arch=compute_${__arch},code=compute_${__arch})
-    list(APPEND __nvcc_archs_readable compute_${__arch})
-  endforeach()
-
-  string(REPLACE ";" " " __nvcc_archs_readable "${__nvcc_archs_readable}")
-  set(${out_variable}          ${__nvcc_flags}          PARENT_SCOPE)
-  set(${out_variable}_readable ${__nvcc_archs_readable} PARENT_SCOPE)
-endfunction()
-
-################################################################################################
-# Short command for cuda comnpilation
-# Usage:
-#   xgboost_cuda_compile(<objlist_variable> <cuda_files>)
-macro(xgboost_cuda_compile objlist_variable)
-  foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
-    set(${var}_backup_in_cuda_compile_ "${${var}}")
-
-    # we remove /EHa as it generates warnings under windows
-    string(REPLACE "/EHa" "" ${var} "${${var}}")
-
-  endforeach()
-  if(UNIX OR APPLE)
-    list(APPEND CUDA_NVCC_FLAGS -Xcompiler -fPIC)
-  endif()
-
-  if(APPLE)
-    list(APPEND CUDA_NVCC_FLAGS -Xcompiler -Wno-unused-function)
-  endif()
-
-  set(CUDA_NVCC_FLAGS_DEBUG "${CUDA_NVCC_FLAGS_DEBUG} -G -lineinfo")
-
-  if(MSVC)
-    # disable noisy warnings:
-    # 4819: The file contains a character that cannot be represented in the current code page (number).
-    list(APPEND CUDA_NVCC_FLAGS -Xcompiler "/wd4819")
-    foreach(flag_var
-        CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
-        CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
-      if(${flag_var} MATCHES "/MD")
-        string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
-      endif(${flag_var} MATCHES "/MD")
-    endforeach(flag_var)
-  endif()
-
-  # If the build system is a container, make sure the nvcc intermediate files
-  # go into the build output area rather than in /tmp, which may run out of space
-  if(IS_CONTAINER_BUILD)
-    set(CUDA_NVCC_INTERMEDIATE_DIR "${CMAKE_CURRENT_BINARY_DIR}")
-    message(STATUS "Container build enabled, so nvcc intermediate files in: ${CUDA_NVCC_INTERMEDIATE_DIR}")
-    list(APPEND CUDA_NVCC_FLAGS "--keep --keep-dir ${CUDA_NVCC_INTERMEDIATE_DIR}")
-  endif()
-
-  cuda_compile(cuda_objcs ${ARGN})
-
-  foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
-    set(${var} "${${var}_backup_in_cuda_compile_}")
-    unset(${var}_backup_in_cuda_compile_)
-  endforeach()
-
-  set(${objlist_variable} ${cuda_objcs})
-endmacro()
-
-
-################################################################################################
-###  Non macro section
-################################################################################################
-
-# Try to prime CUDA_TOOLKIT_ROOT_DIR by looking for libcudart.so
-if(NOT CUDA_TOOLKIT_ROOT_DIR)
-  find_library(CUDA_LIBRARY_PATH libcudart.so PATHS ENV LD_LIBRARY_PATH PATH_SUFFIXES lib lib64)
-  if(CUDA_LIBRARY_PATH)
-    get_filename_component(CUDA_LIBRARY_PATH ${CUDA_LIBRARY_PATH} DIRECTORY)
-    set(CUDA_TOOLKIT_ROOT_DIR "${CUDA_LIBRARY_PATH}/..")
-  endif()
-endif()
-
-find_package(CUDA 5.5 QUIET REQUIRED)
-find_cuda_helper_libs(curand)  # cmake 2.8.7 compartibility which doesn't search for curand
-
-if(NOT CUDA_FOUND)
-  return()
-endif()
-
-set(HAVE_CUDA TRUE)
-message(STATUS "CUDA detected: " ${CUDA_VERSION})
-include_directories(SYSTEM ${CUDA_INCLUDE_DIRS})
-list(APPEND xgboost_LINKER_LIBS ${CUDA_CUDART_LIBRARY}
-                              ${CUDA_curand_LIBRARY} ${CUDA_CUBLAS_LIBRARIES})
-
-# Known NVIDIA GPU achitectures xgboost can be compiled for.
-# This list will be used for CUDA_ARCH_NAME = All option
-if(CUDA_ARCH_ALL)
-  set(xgboost_known_gpu_archs "${CUDA_ARCH_ALL}")
-else()
-  if(${CUDA_VERSION} GREATER 7.5)
-    set(xgboost_known_gpu_archs "30 35 50 52 60 61")
-  else()
-    set(xgboost_known_gpu_archs "30 35 50 52")
-  endif()
-endif()
-
-# cudnn detection
-if(USE_CUDNN)
-  detect_cuDNN()
-  if(HAVE_CUDNN)
-    add_definitions(-DUSE_CUDNN)
-    include_directories(SYSTEM ${CUDNN_INCLUDE})
-    list(APPEND xgboost_LINKER_LIBS ${CUDNN_LIBRARY})
-  endif()
-endif()
-
-# setting nvcc arch flags
-xgboost_select_nvcc_arch_flags(NVCC_FLAGS_EXTRA)
-list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA})
-message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA_readable}")
-
-# Boost 1.55 workaround, see https://svn.boost.org/trac/boost/ticket/9392 or
-# https://github.com/ComputationalRadiationPhysics/picongpu/blob/master/src/picongpu/CMakeLists.txt
-if(Boost_VERSION EQUAL 105500)
-  message(STATUS "Cuda + Boost 1.55: Applying noinline work around")
-  # avoid warning for CMake >= 2.8.12
-  set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} \"-DBOOST_NOINLINE=__attribute__((noinline))\" ")
-endif()
-
-# disable some nvcc diagnostic that apears in boost, glog, glags, opencv, etc.
-foreach(diag cc_clobber_ignored integer_sign_change useless_using_declaration set_but_not_used)
-  list(APPEND CUDA_NVCC_FLAGS -Xcudafe --diag_suppress=${diag})
-endforeach()
-
-# setting default testing device
-if(NOT CUDA_TEST_DEVICE)
-  set(CUDA_TEST_DEVICE -1)
-endif()
-
-mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD)
-mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION)
-
-# Handle clang/libc++ issue
-if(APPLE)
-  xgboost_detect_darwin_version(OSX_VERSION)
-
-  # OSX 10.9 and higher uses clang/libc++ by default which is incompartible with old CUDA toolkits
-  if(OSX_VERSION VERSION_GREATER 10.8)
-    # enabled by default if and only if CUDA version is less than 7.0
-    xgboost_option(USE_libstdcpp "Use libstdc++ instead of libc++" (CUDA_VERSION VERSION_LESS 7.0))
-  endif()
-endif()
diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake
deleted file mode 100644
index 8b737f8b7..000000000
--- a/cmake/Utils.cmake
+++ /dev/null
@@ -1,398 +0,0 @@
-################################################################################################
-# Command alias for debugging messages
-# Usage:
-#   dmsg(<message>)
-function(dmsg)
-  message(STATUS ${ARGN})
-endfunction()
-
-################################################################################################
-# Removes duplicates from list(s)
-# Usage:
-#   xgboost_list_unique(<list_variable> [<list_variable>] [...])
-macro(xgboost_list_unique)
-  foreach(__lst ${ARGN})
-    if(${__lst})
-      list(REMOVE_DUPLICATES ${__lst})
-    endif()
-  endforeach()
-endmacro()
-
-################################################################################################
-# Clears variables from list
-# Usage:
-#   xgboost_clear_vars(<variables_list>)
-macro(xgboost_clear_vars)
-  foreach(_var ${ARGN})
-    unset(${_var})
-  endforeach()
-endmacro()
-
-################################################################################################
-# Removes duplicates from string
-# Usage:
-#   xgboost_string_unique(<string_variable>)
-function(xgboost_string_unique __string)
-  if(${__string})
-    set(__list ${${__string}})
-    separate_arguments(__list)
-    list(REMOVE_DUPLICATES __list)
-    foreach(__e ${__list})
-      set(__str "${__str} ${__e}")
-    endforeach()
-    set(${__string} ${__str} PARENT_SCOPE)
-  endif()
-endfunction()
-
-################################################################################################
-# Prints list element per line
-# Usage:
-#   xgboost_print_list(<list>)
-function(xgboost_print_list)
-  foreach(e ${ARGN})
-    message(STATUS ${e})
-  endforeach()
-endfunction()
-
-################################################################################################
-# Function merging lists of compiler flags to single string.
-# Usage:
-#   xgboost_merge_flag_lists(out_variable <list1> [<list2>] [<list3>] ...)
-function(xgboost_merge_flag_lists out_var)
-  set(__result "")
-  foreach(__list ${ARGN})
-    foreach(__flag ${${__list}})
-      string(STRIP ${__flag} __flag)
-      set(__result "${__result} ${__flag}")
-    endforeach()
-  endforeach()
-  string(STRIP ${__result} __result)
-  set(${out_var} ${__result} PARENT_SCOPE)
-endfunction()
-
-################################################################################################
-# Converts all paths in list to absolute
-# Usage:
-#   xgboost_convert_absolute_paths(<list_variable>)
-function(xgboost_convert_absolute_paths variable)
-  set(__dlist "")
-  foreach(__s ${${variable}})
-    get_filename_component(__abspath ${__s} ABSOLUTE)
-    list(APPEND __list ${__abspath})
-  endforeach()
-  set(${variable} ${__list} PARENT_SCOPE)
-endfunction()
-
-################################################################################################
-# Reads set of version defines from the header file
-# Usage:
-#   xgboost_parse_header(<file> <define1> <define2> <define3> ..)
-macro(xgboost_parse_header FILENAME FILE_VAR)
-  set(vars_regex "")
-  set(__parnet_scope OFF)
-  set(__add_cache OFF)
-  foreach(name ${ARGN})
-    if("${name}" STREQUAL "PARENT_SCOPE")
-      set(__parnet_scope ON)
-    elseif("${name}" STREQUAL "CACHE")
-      set(__add_cache ON)
-    elseif(vars_regex)
-      set(vars_regex "${vars_regex}|${name}")
-    else()
-      set(vars_regex "${name}")
-    endif()
-  endforeach()
-  if(EXISTS "${FILENAME}")
-    file(STRINGS "${FILENAME}" ${FILE_VAR} REGEX "#define[ \t]+(${vars_regex})[ \t]+[0-9]+" )
-  else()
-    unset(${FILE_VAR})
-  endif()
-  foreach(name ${ARGN})
-    if(NOT "${name}" STREQUAL "PARENT_SCOPE" AND NOT "${name}" STREQUAL "CACHE")
-      if(${FILE_VAR})
-        if(${FILE_VAR} MATCHES ".+[ \t]${name}[ \t]+([0-9]+).*")
-          string(REGEX REPLACE ".+[ \t]${name}[ \t]+([0-9]+).*" "\\1" ${name} "${${FILE_VAR}}")
-        else()
-          set(${name} "")
-        endif()
-        if(__add_cache)
-          set(${name} ${${name}} CACHE INTERNAL "${name} parsed from ${FILENAME}" FORCE)
-        elseif(__parnet_scope)
-          set(${name} "${${name}}" PARENT_SCOPE)
-        endif()
-      else()
-        unset(${name} CACHE)
-      endif()
-    endif()
-  endforeach()
-endmacro()
-
-################################################################################################
-# Reads single version define from the header file and parses it
-# Usage:
-#   xgboost_parse_header_single_define(<library_name> <file> <define_name>)
-function(xgboost_parse_header_single_define LIBNAME HDR_PATH VARNAME)
-  set(${LIBNAME}_H "")
-  if(EXISTS "${HDR_PATH}")
-    file(STRINGS "${HDR_PATH}" ${LIBNAME}_H REGEX "^#define[ \t]+${VARNAME}[ \t]+\"[^\"]*\".*$" LIMIT_COUNT 1)
-  endif()
-
-  if(${LIBNAME}_H)
-    string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MAJOR "${${LIBNAME}_H}")
-    string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MINOR  "${${LIBNAME}_H}")
-    string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_PATCH "${${LIBNAME}_H}")
-    set(${LIBNAME}_VERSION_MAJOR ${${LIBNAME}_VERSION_MAJOR} ${ARGN} PARENT_SCOPE)
-    set(${LIBNAME}_VERSION_MINOR ${${LIBNAME}_VERSION_MINOR} ${ARGN} PARENT_SCOPE)
-    set(${LIBNAME}_VERSION_PATCH ${${LIBNAME}_VERSION_PATCH} ${ARGN} PARENT_SCOPE)
-    set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_MAJOR}.${${LIBNAME}_VERSION_MINOR}.${${LIBNAME}_VERSION_PATCH}" PARENT_SCOPE)
-
-    # append a TWEAK version if it exists:
-    set(${LIBNAME}_VERSION_TWEAK "")
-    if("${${LIBNAME}_H}" MATCHES "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.[0-9]+\\.[0-9]+\\.([0-9]+).*$")
-      set(${LIBNAME}_VERSION_TWEAK "${CMAKE_MATCH_1}" ${ARGN} PARENT_SCOPE)
-    endif()
-    if(${LIBNAME}_VERSION_TWEAK)
-      set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_STRING}.${${LIBNAME}_VERSION_TWEAK}" ${ARGN} PARENT_SCOPE)
-    else()
-      set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_STRING}" ${ARGN} PARENT_SCOPE)
-    endif()
-  endif()
-endfunction()
-
-########################################################################################################
-# An option that the user can select. Can accept condition to control when option is available for user.
-# Usage:
-#   xgboost_option(<option_variable> "doc string" <initial value or boolean expression> [IF <condition>])
-function(xgboost_option variable description value)
-  set(__value ${value})
-  set(__condition "")
-  set(__varname "__value")
-  foreach(arg ${ARGN})
-    if(arg STREQUAL "IF" OR arg STREQUAL "if")
-      set(__varname "__condition")
-    else()
-      list(APPEND ${__varname} ${arg})
-    endif()
-  endforeach()
-  unset(__varname)
-  if("${__condition}" STREQUAL "")
-    set(__condition 2 GREATER 1)
-  endif()
-
-  if(${__condition})
-    if("${__value}" MATCHES ";")
-      if(${__value})
-        option(${variable} "${description}" ON)
-      else()
-        option(${variable} "${description}" OFF)
-      endif()
-    elseif(DEFINED ${__value})
-      if(${__value})
-        option(${variable} "${description}" ON)
-      else()
-        option(${variable} "${description}" OFF)
-      endif()
-    else()
-      option(${variable} "${description}" ${__value})
-    endif()
-  else()
-    unset(${variable} CACHE)
-  endif()
-endfunction()
-
-################################################################################################
-# Utility macro for comparing two lists. Used for CMake debugging purposes
-# Usage:
-#   xgboost_compare_lists(<list_variable> <list2_variable> [description])
-function(xgboost_compare_lists list1 list2 desc)
-  set(__list1 ${${list1}})
-  set(__list2 ${${list2}})
-  list(SORT __list1)
-  list(SORT __list2)
-  list(LENGTH __list1 __len1)
-  list(LENGTH __list2 __len2)
-
-  if(NOT ${__len1} EQUAL ${__len2})
-    message(FATAL_ERROR "Lists are not equal. ${__len1} != ${__len2}. ${desc}")
-  endif()
-
-  foreach(__i RANGE 1 ${__len1})
-    math(EXPR __index "${__i}- 1")
-    list(GET __list1 ${__index} __item1)
-    list(GET __list2 ${__index} __item2)
-    if(NOT ${__item1} STREQUAL ${__item2})
-      message(FATAL_ERROR "Lists are not equal. Differ at element ${__index}. ${desc}")
-    endif()
-  endforeach()
-endfunction()
-
-################################################################################################
-# Command for disabling warnings for different platforms (see below for gcc and VisualStudio)
-# Usage:
-#   xgboost_warnings_disable(<CMAKE_[C|CXX]_FLAGS[_CONFIGURATION]> -Wshadow /wd4996 ..,)
-macro(xgboost_warnings_disable)
-  set(_flag_vars "")
-  set(_msvc_warnings "")
-  set(_gxx_warnings "")
-
-  foreach(arg ${ARGN})
-    if(arg MATCHES "^CMAKE_")
-      list(APPEND _flag_vars ${arg})
-    elseif(arg MATCHES "^/wd")
-      list(APPEND _msvc_warnings ${arg})
-    elseif(arg MATCHES "^-W")
-      list(APPEND _gxx_warnings ${arg})
-    endif()
-  endforeach()
-
-  if(NOT _flag_vars)
-    set(_flag_vars CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
-  endif()
-
-  if(MSVC AND _msvc_warnings)
-    foreach(var ${_flag_vars})
-      foreach(warning ${_msvc_warnings})
-        set(${var} "${${var}} ${warning}")
-      endforeach()
-    endforeach()
-  elseif((CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX) AND _gxx_warnings)
-    foreach(var ${_flag_vars})
-      foreach(warning ${_gxx_warnings})
-        if(NOT warning MATCHES "^-Wno-")
-          string(REPLACE "${warning}" "" ${var} "${${var}}")
-          string(REPLACE "-W" "-Wno-" warning "${warning}")
-        endif()
-        set(${var} "${${var}} ${warning}")
-      endforeach()
-    endforeach()
-  endif()
-  xgboost_clear_vars(_flag_vars _msvc_warnings _gxx_warnings)
-endmacro()
-
-################################################################################################
-# Helper function get current definitions
-# Usage:
-#   xgboost_get_current_definitions(<definitions_variable>)
-function(xgboost_get_current_definitions definitions_var)
-  get_property(current_definitions DIRECTORY PROPERTY COMPILE_DEFINITIONS)
-  set(result "")
-
-  foreach(d ${current_definitions})
-    list(APPEND result -D${d})
-  endforeach()
-
-  xgboost_list_unique(result)
-  set(${definitions_var} ${result} PARENT_SCOPE)
-endfunction()
-
-################################################################################################
-# Helper function get current includes/definitions
-# Usage:
-#   xgboost_get_current_cflags(<cflagslist_variable>)
-function(xgboost_get_current_cflags cflags_var)
-  get_property(current_includes DIRECTORY PROPERTY INCLUDE_DIRECTORIES)
-  xgboost_convert_absolute_paths(current_includes)
-  xgboost_get_current_definitions(cflags)
-
-  foreach(i ${current_includes})
-    list(APPEND cflags "-I${i}")
-  endforeach()
-
-  xgboost_list_unique(cflags)
-  set(${cflags_var} ${cflags} PARENT_SCOPE)
-endfunction()
-
-################################################################################################
-# Helper function to parse current linker libs into link directories, libflags and osx frameworks
-# Usage:
-#   xgboost_parse_linker_libs(<xgboost_LINKER_LIBS_var> <directories_var> <libflags_var> <frameworks_var>)
-function(xgboost_parse_linker_libs xgboost_LINKER_LIBS_variable folders_var flags_var frameworks_var)
-
-  set(__unspec "")
-  set(__debug "")
-  set(__optimized "")
-  set(__framework "")
-  set(__varname "__unspec")
-
-  # split libs into debug, optimized, unspecified and frameworks
-  foreach(list_elem ${${xgboost_LINKER_LIBS_variable}})
-    if(list_elem STREQUAL "debug")
-      set(__varname "__debug")
-    elseif(list_elem STREQUAL "optimized")
-      set(__varname "__optimized")
-    elseif(list_elem MATCHES "^-framework[ \t]+([^ \t].*)")
-      list(APPEND __framework -framework ${CMAKE_MATCH_1})
-    else()
-      list(APPEND ${__varname} ${list_elem})
-      set(__varname "__unspec")
-    endif()
-  endforeach()
-
-  # attach debug or optimized libs to unspecified according to current configuration
-  if(CMAKE_BUILD_TYPE MATCHES "Debug")
-    set(__libs ${__unspec} ${__debug})
-  else()
-    set(__libs ${__unspec} ${__optimized})
-  endif()
-
-  set(libflags "")
-  set(folders "")
-
-  # convert linker libraries list to link flags
-  foreach(lib ${__libs})
-    if(TARGET ${lib})
-      list(APPEND folders $<TARGET_LINKER_FILE_DIR:${lib}>)
-      list(APPEND libflags -l${lib})
-    elseif(lib MATCHES "^-l.*")
-      list(APPEND libflags ${lib})
-    elseif(IS_ABSOLUTE ${lib})
-      get_filename_component(name_we ${lib} NAME_WE)
-      get_filename_component(folder  ${lib} PATH)
-
-      string(REGEX MATCH "^lib(.*)" __match ${name_we})
-      list(APPEND libflags -l${CMAKE_MATCH_1})
-      list(APPEND folders    ${folder})
-    else()
-      message(FATAL_ERROR "Logic error. Need to update cmake script")
-    endif()
-  endforeach()
-
-  xgboost_list_unique(libflags folders)
-
-  set(${folders_var} ${folders} PARENT_SCOPE)
-  set(${flags_var} ${libflags} PARENT_SCOPE)
-  set(${frameworks_var} ${__framework} PARENT_SCOPE)
-endfunction()
-
-################################################################################################
-# Helper function to detect Darwin version, i.e. 10.8, 10.9, 10.10, ....
-# Usage:
-#   xgboost_detect_darwin_version(<version_variable>)
-function(xgboost_detect_darwin_version output_var)
-  if(APPLE)
-    execute_process(COMMAND /usr/bin/sw_vers -productVersion
-                    RESULT_VARIABLE __sw_vers OUTPUT_VARIABLE __sw_vers_out
-                    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
-
-    set(${output_var} ${__sw_vers_out} PARENT_SCOPE)
-  else()
-    set(${output_var} "" PARENT_SCOPE)
-  endif()
-endfunction()
-
-################################################################################################
-# Convenient command to setup source group for IDEs that support this feature (VS, XCode)
-# Usage:
-#   caffe_source_group(<group> GLOB[_RECURSE] <globbing_expression>)
-function(xgboost_source_group group)
-  cmake_parse_arguments(CAFFE_SOURCE_GROUP "" "" "GLOB;GLOB_RECURSE" ${ARGN})
-  if(CAFFE_SOURCE_GROUP_GLOB)
-    file(GLOB srcs1 ${CAFFE_SOURCE_GROUP_GLOB})
-    source_group(${group} FILES ${srcs1})
-  endif()
-
-  if(CAFFE_SOURCE_GROUP_GLOB_RECURSE)
-    file(GLOB_RECURSE srcs2 ${CAFFE_SOURCE_GROUP_GLOB_RECURSE})
-    source_group(${group} FILES ${srcs2})
-  endif()
-endfunction()
diff --git a/demo/gpu_acceleration/bosch.py b/demo/gpu_acceleration/bosch.py
index 2294b24e7..894e26835 100644
--- a/demo/gpu_acceleration/bosch.py
+++ b/demo/gpu_acceleration/bosch.py
@@ -24,8 +24,7 @@ param['eval_metric'] = 'auc'
 param['max_depth'] = 5
 param['eta'] = 0.3
 param['silent'] = 0
-param['updater'] = 'grow_gpu'
-#param['updater'] = 'grow_colmaker'
+param['tree_method'] = 'gpu_exact'
 
 num_round = 20
 
diff --git a/plugin/updater_gpu/README.md b/plugin/updater_gpu/README.md
index 142b61459..1cf02f29c 100644
--- a/plugin/updater_gpu/README.md
+++ b/plugin/updater_gpu/README.md
@@ -1,16 +1,16 @@
 # CUDA Accelerated Tree Construction Algorithms
 This plugin adds GPU accelerated tree construction algorithms to XGBoost.
 ## Usage
-Specify the 'updater' parameter as one of the following algorithms. 
+Specify the 'tree_method' parameter as one of the following algorithms. 
 
 ### Algorithms
-| updater | Description |
+| tree_method | Description |
 | --- | --- |
-grow_gpu | The standard XGBoost tree construction algorithm. Performs exact search for splits. Slower and uses considerably more memory than 'grow_gpu_hist' |
-grow_gpu_hist | Equivalent to the XGBoost fast histogram algorithm. Faster and uses considerably less memory. Splits may be less accurate. |
+gpu_exact | The standard XGBoost tree construction algorithm. Performs exact search for splits. Slower and uses considerably more memory than 'gpu_hist' |
+gpu_hist | Equivalent to the XGBoost fast histogram algorithm. Faster and uses considerably less memory. Splits may be less accurate. |
 
 ### Supported parameters 
-| parameter | grow_gpu | grow_gpu_hist |
+| parameter | gpu_exact | gpu_hist |
 | --- | --- | --- |
 subsample | &#10004; | &#10004; |
 colsample_bytree | &#10004; | &#10004;|
@@ -29,7 +29,7 @@ Python example:
 ```python
 param['gpu_id'] = 1
 param['max_bin'] = 16
-param['updater'] = 'grow_gpu_hist'
+param['tree_method'] = 'gpu_hist'
 ```
 ## Benchmarks
 To run benchmarks on synthetic data for binary classification:
@@ -39,18 +39,18 @@ $ python benchmark/benchmark.py
 
 Training time time on 1000000 rows x 50 columns with 500 boosting iterations on i7-6700K CPU @ 4.00GHz and Pascal Titan X.
 
-| Updater | Time (s) |
+| tree_method | Time (s) |
 | --- | --- |
-| grow_gpu_hist | 11.09 |
-| grow_fast_histmaker (histogram XGBoost - CPU) | 41.75 |
-| grow_gpu | 193.90 |
-| grow_colmaker (standard XGBoost - CPU) | 720.12 |
+| gpu_hist | 11.09 |
+| hist (histogram XGBoost - CPU) | 41.75 |
+| gpu_exact | 193.90 |
+| exact (standard XGBoost - CPU) | 720.12 |
 
 
-[See here](http://dmlc.ml/2016/12/14/GPU-accelerated-xgboost.html) for additional performance benchmarks of the 'grow_gpu' updater.
+[See here](http://dmlc.ml/2016/12/14/GPU-accelerated-xgboost.html) for additional performance benchmarks of the 'gpu_exact' tree_method.
 
 ## Test
-To run tests:
+To run tests:Will
 ```bash
 $ python -m nose test/python/
 ```
@@ -122,6 +122,13 @@ $ make PLUGIN_UPDATER_GPU=ON GTEST_PATH=${CACHE_PREFIX} test
 ```
 
 ## Changelog
+##### 2017/6/26
+
+* Change API to use tree_method parameter
+* Increase required cmake version to 3.5
+* Add compute arch 3.5 to default archs
+* Set default n_gpus to 1
+
 ##### 2017/6/5
 
 * Multi-GPU support for histogram method using NVIDIA NCCL.
diff --git a/plugin/updater_gpu/benchmark/benchmark.py b/plugin/updater_gpu/benchmark/benchmark.py
index 525200e0f..e34dbe454 100644
--- a/plugin/updater_gpu/benchmark/benchmark.py
+++ b/plugin/updater_gpu/benchmark/benchmark.py
@@ -14,19 +14,18 @@ def run_benchmark(args, gpu_algorithm, cpu_algorithm):
     dtrain = xgb.DMatrix(X, y)
 
     param = {'objective': 'binary:logistic',
-             'tree_method': 'exact',
              'max_depth': 6,
              'silent': 1,
              'eval_metric': 'auc'}
 
-    param['updater'] = gpu_algorithm
-    print("Training with '%s'" % param['updater'])
+    param['tree_method'] = gpu_algorithm
+    print("Training with '%s'" % param['tree_method'])
     tmp = time.time()
     xgb.train(param, dtrain, args.iterations)
     print ("Time: %s seconds" % (str(time.time() - tmp)))
 
-    param['updater'] = cpu_algorithm
-    print("Training with '%s'" % param['updater'])
+    param['tree_method'] = cpu_algorithm
+    print("Training with '%s'" % param['tree_method'])
     tmp = time.time()
     xgb.train(param, dtrain, args.iterations)
     print ("Time: %s seconds" % (str(time.time() - tmp)))
@@ -34,17 +33,17 @@ def run_benchmark(args, gpu_algorithm, cpu_algorithm):
 
 
 parser = argparse.ArgumentParser()
-parser.add_argument('--algorithm', choices=['all', 'grow_gpu', 'grow_gpu_hist'], required=True)
+parser.add_argument('--algorithm', choices=['all', 'gpu_exact', 'gpu_hist'], default='all')
 parser.add_argument('--rows',type=int,default=1000000)
 parser.add_argument('--columns',type=int,default=50)
 parser.add_argument('--iterations',type=int,default=500)
 args = parser.parse_args()
 
-if 'grow_gpu_hist' in args.algorithm:
-    run_benchmark(args, args.algorithm, 'grow_fast_histmaker')
-if 'grow_gpu' in args.algorithm:
-    run_benchmark(args, args.algorithm, 'grow_colmaker')
+if 'gpu_hist' in args.algorithm:
+    run_benchmark(args, args.algorithm, 'hist')
+if 'gpu_exact' in args.algorithm:
+    run_benchmark(args, args.algorithm, 'exact')
 if 'all' in args.algorithm:
-    run_benchmark(args, 'grow_gpu', 'grow_colmaker')
-    run_benchmark(args, 'grow_gpu_hist', 'grow_fast_histmaker')
+    run_benchmark(args, 'gpu_exact', 'exact')
+    run_benchmark(args, 'gpu_hist', 'hist')
 
diff --git a/plugin/updater_gpu/test/python/test.py b/plugin/updater_gpu/test/python/test.py
index 10a6cf6cf..cca9cd739 100644
--- a/plugin/updater_gpu/test/python/test.py
+++ b/plugin/updater_gpu/test/python/test.py
@@ -35,7 +35,7 @@ class TestGPU(unittest.TestCase):
                     'objective': 'binary:logistic',
                     'eval_metric': 'auc'}
         ag_param2 = {'max_depth': 2,
-                     'updater': 'grow_gpu',
+                     'tree_method': 'gpu_exact',
                      'eta': 1,
                      'silent': 1,
                      'objective': 'binary:logistic',
@@ -59,7 +59,7 @@ class TestGPU(unittest.TestCase):
         dtest = xgb.DMatrix(X_test, y_test)
 
         param = {'objective': 'binary:logistic',
-                 'updater': 'grow_gpu',
+                 'tree_method': 'gpu_exact',
                  'max_depth': 3,
                  'eval_metric': 'auc'}
         res = {}
@@ -75,7 +75,7 @@ class TestGPU(unittest.TestCase):
         dtrain2 = xgb.DMatrix(X2, label=y2)
 
         param = {'objective': 'binary:logistic',
-                 'updater': 'grow_gpu',
+                 'tree_method': 'gpu_exact',
                  'max_depth': 2,
                  'eval_metric': 'auc'}
         res = {}
@@ -134,7 +134,7 @@ class TestGPU(unittest.TestCase):
                             'objective': 'binary:logistic',
                             'eval_metric': 'auc'}
                 ag_param2 = {'max_depth': max_depth,
-                             'updater': 'grow_gpu_hist',
+                             'tree_method': 'gpu_hist',
                              'eta': 1,
                              'silent': 1,
                              'n_gpus': 1,
@@ -142,7 +142,7 @@ class TestGPU(unittest.TestCase):
                                  'max_bin': max_bin,
                              'eval_metric': 'auc'}
                 ag_param3 = {'max_depth': max_depth,
-                             'updater': 'grow_gpu_hist',
+                             'tree_method': 'gpu_hist',
                              'eta': 1,
                              'silent': 1,
                              'n_gpus': n_gpus,
@@ -177,7 +177,7 @@ class TestGPU(unittest.TestCase):
                 dtest = xgb.DMatrix(X_test, y_test)
 
                 param = {'objective': 'binary:logistic',
-                         'updater': 'grow_gpu_hist',
+                         'tree_method': 'gpu_hist',
                          'max_depth': max_depth,
                          'n_gpus': 1,
                          'max_bin': max_bin,
@@ -189,7 +189,7 @@ class TestGPU(unittest.TestCase):
                 assert self.non_decreasing(res['train']['auc'])
                 #assert self.non_decreasing(res['test']['auc'])
                 param2 = {'objective': 'binary:logistic',
-                          'updater': 'grow_gpu_hist',
+                          'tree_method': 'gpu_hist',
                           'max_depth': max_depth,
                           'n_gpus': n_gpus,
                           'max_bin': max_bin,
@@ -211,7 +211,7 @@ class TestGPU(unittest.TestCase):
                 dtrain2 = xgb.DMatrix(X2, label=y2)
 
                 param = {'objective': 'binary:logistic',
-                         'updater': 'grow_gpu_hist',
+                         'tree_method': 'gpu_hist',
                          'max_depth': max_depth,
                          'n_gpus': n_gpus,
                          'max_bin': max_bin,
@@ -250,7 +250,7 @@ class TestGPU(unittest.TestCase):
                 ######################################################################
                 # fail-safe test for max_bin
                 param = {'objective': 'binary:logistic',
-                         'updater': 'grow_gpu_hist',
+                         'tree_method': 'gpu_hist',
                          'max_depth': max_depth,
                          'n_gpus': n_gpus,
                          'eval_metric': 'auc',
@@ -263,7 +263,7 @@ class TestGPU(unittest.TestCase):
                 ######################################################################
                 # subsampling
                 param = {'objective': 'binary:logistic',
-                         'updater': 'grow_gpu_hist',
+                         'tree_method': 'gpu_hist',
                          'max_depth': max_depth,
                          'n_gpus': n_gpus,
                          'eval_metric': 'auc',
@@ -279,7 +279,7 @@ class TestGPU(unittest.TestCase):
         ######################################################################
         # fail-safe test for max_bin=2
         param = {'objective': 'binary:logistic',
-                 'updater': 'grow_gpu_hist',
+                 'tree_method': 'gpu_hist',
                  'max_depth': 2,
                  'n_gpus': n_gpus,
                  'eval_metric': 'auc',
diff --git a/src/learner.cc b/src/learner.cc
index 2622ff4fb..d26e0d682 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -4,19 +4,19 @@
  * \brief Implementation of learning algorithm.
  * \author Tianqi Chen
  */
-#include <xgboost/logging.h>
-#include <xgboost/learner.h>
-#include <dmlc/timer.h>
 #include <dmlc/io.h>
+#include <dmlc/timer.h>
+#include <xgboost/learner.h>
+#include <xgboost/logging.h>
 #include <algorithm>
-#include <vector>
-#include <utility>
-#include <string>
-#include <sstream>
-#include <limits>
 #include <iomanip>
-#include "./common/io.h"
+#include <limits>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
 #include "./common/common.h"
+#include "./common/io.h"
 #include "./common/random.h"
 
 namespace xgboost {
@@ -25,17 +25,14 @@ bool Learner::AllowLazyCheckPoint() const {
   return gbm_->AllowLazyCheckPoint();
 }
 
-std::vector<std::string>
-Learner::DumpModel(const FeatureMap& fmap,
-                   bool with_stats,
-                   std::string format) const {
+std::vector<std::string> Learner::DumpModel(const FeatureMap& fmap,
+                                            bool with_stats,
+                                            std::string format) const {
   return gbm_->DumpModel(fmap, with_stats, format);
 }
 
-
 /*! \brief training parameter for regression */
-struct LearnerModelParam
-    : public dmlc::Parameter<LearnerModelParam> {
+struct LearnerModelParam : public dmlc::Parameter<LearnerModelParam> {
   /* \brief global bias */
   bst_float base_score;
   /* \brief number of features  */
@@ -55,20 +52,21 @@ struct LearnerModelParam
   }
   // declare parameters
   DMLC_DECLARE_PARAMETER(LearnerModelParam) {
-    DMLC_DECLARE_FIELD(base_score).set_default(0.5f)
+    DMLC_DECLARE_FIELD(base_score)
+        .set_default(0.5f)
         .describe("Global bias of the model.");
-    DMLC_DECLARE_FIELD(num_feature).set_default(0)
-        .describe("Number of features in training data,"\
-                  " this parameter will be automatically detected by learner.");
-    DMLC_DECLARE_FIELD(num_class).set_default(0).set_lower_bound(0)
-        .describe("Number of class option for multi-class classifier. "\
-                  " By default equals 0 and corresponds to binary classifier.");
+    DMLC_DECLARE_FIELD(num_feature)
+        .set_default(0)
+        .describe(
+            "Number of features in training data,"
+            " this parameter will be automatically detected by learner.");
+    DMLC_DECLARE_FIELD(num_class).set_default(0).set_lower_bound(0).describe(
+        "Number of class option for multi-class classifier. "
+        " By default equals 0 and corresponds to binary classifier.");
   }
 };
 
-
-struct LearnerTrainParam
-    : public dmlc::Parameter<LearnerTrainParam> {
+struct LearnerTrainParam : public dmlc::Parameter<LearnerTrainParam> {
   // stored random seed
   int seed;
   // whether seed the PRNG each iteration
@@ -90,30 +88,40 @@ struct LearnerTrainParam
   int debug_verbose;
   // declare parameters
   DMLC_DECLARE_PARAMETER(LearnerTrainParam) {
-    DMLC_DECLARE_FIELD(seed).set_default(0)
-        .describe("Random number seed during training.");
-    DMLC_DECLARE_FIELD(seed_per_iteration).set_default(false)
-        .describe("Seed PRNG determnisticly via iterator number, "\
-                  "this option will be switched on automatically on distributed mode.");
-    DMLC_DECLARE_FIELD(dsplit).set_default(0)
+    DMLC_DECLARE_FIELD(seed).set_default(0).describe(
+        "Random number seed during training.");
+    DMLC_DECLARE_FIELD(seed_per_iteration)
+        .set_default(false)
+        .describe(
+            "Seed PRNG determnisticly via iterator number, "
+            "this option will be switched on automatically on distributed "
+            "mode.");
+    DMLC_DECLARE_FIELD(dsplit)
+        .set_default(0)
         .add_enum("auto", 0)
         .add_enum("col", 1)
         .add_enum("row", 2)
         .describe("Data split mode for distributed training.");
-    DMLC_DECLARE_FIELD(tree_method).set_default(0)
+    DMLC_DECLARE_FIELD(tree_method)
+        .set_default(0)
         .add_enum("auto", 0)
         .add_enum("approx", 1)
         .add_enum("exact", 2)
         .add_enum("hist", 3)
+        .add_enum("gpu_exact", 4)
+        .add_enum("gpu_hist", 5)
         .describe("Choice of tree construction method.");
-    DMLC_DECLARE_FIELD(test_flag).set_default("")
-        .describe("Internal test flag");
-    DMLC_DECLARE_FIELD(prob_buffer_row).set_default(1.0f).set_range(0.0f, 1.0f)
+    DMLC_DECLARE_FIELD(test_flag).set_default("").describe(
+        "Internal test flag");
+    DMLC_DECLARE_FIELD(prob_buffer_row)
+        .set_default(1.0f)
+        .set_range(0.0f, 1.0f)
         .describe("Maximum buffered row portion");
-    DMLC_DECLARE_FIELD(max_row_perbatch).set_default(std::numeric_limits<size_t>::max())
+    DMLC_DECLARE_FIELD(max_row_perbatch)
+        .set_default(std::numeric_limits<size_t>::max())
         .describe("maximum row per batch.");
-    DMLC_DECLARE_FIELD(nthread).set_default(0)
-        .describe("Number of threads to use.");
+    DMLC_DECLARE_FIELD(nthread).set_default(0).describe(
+        "Number of threads to use.");
     DMLC_DECLARE_FIELD(debug_verbose)
         .set_lower_bound(0)
         .set_default(0)
@@ -125,8 +133,8 @@ DMLC_REGISTER_PARAMETER(LearnerModelParam);
 DMLC_REGISTER_PARAMETER(LearnerTrainParam);
 
 /*!
- * \brief learner that performs gradient boosting for a specific objective function.
- *  It does training and prediction.
+ * \brief learner that performs gradient boosting for a specific objective
+ * function. It does training and prediction.
  */
 class LearnerImpl : public Learner {
  public:
@@ -137,14 +145,41 @@ class LearnerImpl : public Learner {
     name_gbm_ = "gbtree";
   }
 
-  void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
+  void ConfigureUpdaters() {
+    if (tparam.tree_method == 0 || tparam.tree_method == 1 ||
+        tparam.tree_method == 2) {
+      if (cfg_.count("updater") == 0) {
+        if (tparam.dsplit == 1) {
+          cfg_["updater"] = "distcol";
+        } else if (tparam.dsplit == 2) {
+          cfg_["updater"] = "grow_histmaker,prune";
+        }
+        if (tparam.prob_buffer_row != 1.0f) {
+          cfg_["updater"] = "grow_histmaker,refresh,prune";
+        }
+      }
+    } else if (tparam.tree_method == 3) {
+      /* histogram-based algorithm */
+      LOG(CONSOLE) << "Tree method is selected to be \'hist\', which uses a "
+                      "single updater "
+                   << "grow_fast_histmaker.";
+      cfg_["updater"] = "grow_fast_histmaker";
+    } else if (tparam.tree_method == 4) {
+      cfg_["updater"] = "grow_gpu,prune";
+    } else if (tparam.tree_method == 5) {
+      cfg_["updater"] = "grow_gpu_hist";
+    }
+  }
+
+  void Configure(
+      const std::vector<std::pair<std::string, std::string> >& args) override {
     // add to configurations
     tparam.InitAllowUnknown(args);
     cfg_.clear();
     for (const auto& kv : args) {
       if (kv.first == "eval_metric") {
         // check duplication
-        auto dup_check = [&kv](const std::unique_ptr<Metric>&m) {
+        auto dup_check = [&kv](const std::unique_ptr<Metric>& m) {
           return m->Name() != kv.second;
         };
         if (std::all_of(metrics_.begin(), metrics_.end(), dup_check)) {
@@ -172,27 +207,13 @@ class LearnerImpl : public Learner {
       }
     }
 
-    if (cfg_.count("max_delta_step") == 0 &&
-        cfg_.count("objective") != 0 &&
+    if (cfg_.count("max_delta_step") == 0 && cfg_.count("objective") != 0 &&
         cfg_["objective"] == "count:poisson") {
       cfg_["max_delta_step"] = "0.7";
     }
 
-    if (tparam.tree_method == 3) {
-      /* histogram-based algorithm */
-      LOG(CONSOLE) << "Tree method is selected to be \'hist\', which uses a single updater "
-                   << "grow_fast_histmaker.";
-      cfg_["updater"] = "grow_fast_histmaker";
-    } else if (cfg_.count("updater") == 0) {
-      if (tparam.dsplit == 1) {
-        cfg_["updater"] = "distcol";
-      } else if (tparam.dsplit == 2) {
-        cfg_["updater"] = "grow_histmaker,prune";
-      }
-      if (tparam.prob_buffer_row != 1.0f) {
-        cfg_["updater"] = "grow_histmaker,refresh,prune";
-      }
-    }
+    ConfigureUpdaters();
+
     if (cfg_.count("objective") == 0) {
       cfg_["objective"] = "reg:linear";
     }
@@ -220,9 +241,7 @@ class LearnerImpl : public Learner {
     }
   }
 
-  void InitModel() override {
-    this->LazyInitModel();
-  }
+  void InitModel() override { this->LazyInitModel(); }
 
   void Load(dmlc::Stream* fi) override {
     // TODO(tqchen) mark deprecation of old format.
@@ -256,11 +275,10 @@ class LearnerImpl : public Learner {
       if (len != 0) {
         name_obj_.resize(len);
         CHECK_EQ(fi->Read(&name_obj_[0], len), len)
-            <<"BoostLearner: wrong model format";
+            << "BoostLearner: wrong model format";
       }
     }
-    CHECK(fi->Read(&name_gbm_))
-        << "BoostLearner: wrong model format";
+    CHECK(fi->Read(&name_gbm_)) << "BoostLearner: wrong model format";
     // duplicated code with LazyInitModel
     obj_.reset(ObjFunction::Create(name_obj_));
     gbm_.reset(GradientBooster::Create(name_gbm_, cache_, mparam.base_score));
@@ -268,13 +286,13 @@ class LearnerImpl : public Learner {
     if (mparam.contain_extra_attrs != 0) {
       std::vector<std::pair<std::string, std::string> > attr;
       fi->Read(&attr);
-      attributes_ = std::map<std::string, std::string>(
-          attr.begin(), attr.end());
+      attributes_ =
+          std::map<std::string, std::string>(attr.begin(), attr.end());
     }
     if (name_obj_ == "count:poisson") {
-        std::string max_delta_step;
-        fi->Read(&max_delta_step);
-        cfg_["max_delta_step"] = max_delta_step;
+      std::string max_delta_step;
+      fi->Read(&max_delta_step);
+      cfg_["max_delta_step"] = max_delta_step;
     }
     if (mparam.contain_eval_metrics != 0) {
       std::vector<std::string> metr;
@@ -289,7 +307,7 @@ class LearnerImpl : public Learner {
   }
 
   // rabit save model to rabit checkpoint
-  void Save(dmlc::Stream *fo) const override {
+  void Save(dmlc::Stream* fo) const override {
     fo->Write(&mparam, sizeof(LearnerModelParam));
     fo->Write(name_obj_);
     fo->Write(name_gbm_);
@@ -300,9 +318,9 @@ class LearnerImpl : public Learner {
       fo->Write(attr);
     }
     if (name_obj_ == "count:poisson") {
-        std::map<std::string, std::string>::const_iterator it = cfg_.find("max_delta_step");
-        if (it != cfg_.end())
-            fo->Write(it->second);
+      std::map<std::string, std::string>::const_iterator it =
+          cfg_.find("max_delta_step");
+      if (it != cfg_.end()) fo->Write(it->second);
     }
     if (mparam.contain_eval_metrics != 0) {
       std::vector<std::string> metr;
@@ -325,8 +343,7 @@ class LearnerImpl : public Learner {
     gbm_->DoBoost(train, &gpair_, obj_.get());
   }
 
-  void BoostOneIter(int iter,
-                    DMatrix* train,
+  void BoostOneIter(int iter, DMatrix* train,
                     std::vector<bst_gpair>* in_gpair) override {
     if (tparam.seed_per_iteration || rabit::IsDistributed()) {
       common::GlobalRandom().seed(tparam.seed * kRandSeedMagic + iter);
@@ -335,13 +352,11 @@ class LearnerImpl : public Learner {
     gbm_->DoBoost(train, in_gpair);
   }
 
-  std::string EvalOneIter(int iter,
-                          const std::vector<DMatrix*>& data_sets,
+  std::string EvalOneIter(int iter, const std::vector<DMatrix*>& data_sets,
                           const std::vector<std::string>& data_names) override {
     double tstart = dmlc::GetTime();
     std::ostringstream os;
-    os << '[' << iter << ']'
-       << std::setiosflags(std::ios::fixed);
+    os << '[' << iter << ']' << std::setiosflags(std::ios::fixed);
     if (metrics_.size() == 0) {
       metrics_.emplace_back(Metric::Create(obj_->DefaultEvalMetric()));
     }
@@ -388,20 +403,19 @@ class LearnerImpl : public Learner {
     return out;
   }
 
-  std::pair<std::string, bst_float> Evaluate(DMatrix* data, std::string metric) {
+  std::pair<std::string, bst_float> Evaluate(DMatrix* data,
+                                             std::string metric) {
     if (metric == "auto") metric = obj_->DefaultEvalMetric();
     std::unique_ptr<Metric> ev(Metric::Create(metric.c_str()));
     this->PredictRaw(data, &preds_);
     obj_->EvalTransform(&preds_);
-    return std::make_pair(metric, ev->Eval(preds_, data->info(), tparam.dsplit == 2));
+    return std::make_pair(metric,
+                          ev->Eval(preds_, data->info(), tparam.dsplit == 2));
   }
 
-  void Predict(DMatrix* data,
-               bool output_margin,
-               std::vector<bst_float> *out_preds,
-               unsigned ntree_limit,
-               bool pred_leaf,
-               bool pred_contribs) const override {
+  void Predict(DMatrix* data, bool output_margin,
+               std::vector<bst_float>* out_preds, unsigned ntree_limit,
+               bool pred_leaf, bool pred_contribs) const override {
     if (pred_contribs) {
       gbm_->PredictContribution(data, out_preds, ntree_limit);
     } else if (pred_leaf) {
@@ -418,7 +432,12 @@ class LearnerImpl : public Learner {
   // check if p_train is ready to used by training.
   // if not, initialize the column access.
   inline void LazyInitDMatrix(DMatrix* p_train) {
-    if (tparam.tree_method != 3 && !p_train->HaveColAccess()) {
+    if (tparam.tree_method == 3 || tparam.tree_method == 4 ||
+        tparam.tree_method == 5) {
+      return;
+    }
+
+    if (!p_train->HaveColAccess()) {
       int ncol = static_cast<int>(p_train->info().num_col);
       std::vector<bool> enabled(ncol, true);
       // set max row per batch to limited value
@@ -426,12 +445,12 @@ class LearnerImpl : public Learner {
       size_t max_row_perbatch = tparam.max_row_perbatch;
       const size_t safe_max_row = static_cast<size_t>(32UL << 10UL);
 
-      if (tparam.tree_method == 0 &&
-          p_train->info().num_row >= (4UL << 20UL)) {
-        LOG(CONSOLE) << "Tree method is automatically selected to be \'approx\'"
-                     << " for faster speed."
-                     << " to use old behavior(exact greedy algorithm on single machine),"
-                     << " set tree_method to \'exact\'";
+      if (tparam.tree_method == 0 && p_train->info().num_row >= (4UL << 20UL)) {
+        LOG(CONSOLE)
+            << "Tree method is automatically selected to be \'approx\'"
+            << " for faster speed."
+            << " to use old behavior(exact greedy algorithm on single machine),"
+            << " set tree_method to \'exact\'";
         max_row_perbatch = std::min(max_row_perbatch, safe_max_row);
       }
 
@@ -444,15 +463,14 @@ class LearnerImpl : public Learner {
         max_row_perbatch = std::min(max_row_perbatch, safe_max_row);
       }
       // initialize column access
-      p_train->InitColAccess(enabled,
-                             tparam.prob_buffer_row,
-                             max_row_perbatch);
+      p_train->InitColAccess(enabled, tparam.prob_buffer_row, max_row_perbatch);
     }
 
     if (!p_train->SingleColBlock() && cfg_.count("updater") == 0) {
       if (tparam.tree_method == 2) {
         LOG(CONSOLE) << "tree method is set to be 'exact',"
-                     << " but currently we are only able to proceed with approximate algorithm";
+                     << " but currently we are only able to proceed with "
+                        "approximate algorithm";
       }
       cfg_["updater"] = "grow_histmaker,prune";
       if (gbm_.get() != nullptr) {
@@ -462,9 +480,7 @@ class LearnerImpl : public Learner {
   }
 
   // return whether model is already initialized.
-  inline bool ModelInitialized() const {
-    return gbm_.get() != nullptr;
-  }
+  inline bool ModelInitialized() const { return gbm_.get() != nullptr; }
   // lazily initialize the model if it haven't yet been initialized.
   inline void LazyInitModel() {
     if (this->ModelInitialized()) return;
@@ -497,14 +513,11 @@ class LearnerImpl : public Learner {
    * \param ntree_limit limit number of trees used for boosted tree
    *   predictor, when it equals 0, this means we are using all the trees
    */
-  inline void PredictRaw(DMatrix* data,
-                         std::vector<bst_float>* out_preds,
+  inline void PredictRaw(DMatrix* data, std::vector<bst_float>* out_preds,
                          unsigned ntree_limit = 0) const {
     CHECK(gbm_.get() != nullptr)
         << "Predict must happen after Load or InitModel";
-    gbm_->Predict(data,
-                  out_preds,
-                  ntree_limit);
+    gbm_->Predict(data, out_preds, ntree_limit);
   }
   // model parameter
   LearnerModelParam mparam;
@@ -530,7 +543,8 @@ class LearnerImpl : public Learner {
   std::vector<std::shared_ptr<DMatrix> > cache_;
 };
 
-Learner* Learner::Create(const std::vector<std::shared_ptr<DMatrix> >& cache_data) {
+Learner* Learner::Create(
+    const std::vector<std::shared_ptr<DMatrix> >& cache_data) {
   return new LearnerImpl(cache_data);
 }
 }  // namespace xgboost